• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OGGM / oggm / 5392406468

pending completion
5392406468

Pull #1600

github

web-flow
Merge 2edc8ce4d into 15ff031af
Pull Request #1600: added more flexiblity to dynamic spinup

28 of 39 new or added lines in 1 file covered. (71.79%)

8 existing lines in 4 files now uncovered.

11309 of 13184 relevant lines covered (85.78%)

3.41 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.29
/oggm/utils/_workflow.py
1
"""Classes and functions used by the OGGM workflow"""
2

3
# Builtins
4
import glob
9✔
5
import os
9✔
6
import tempfile
9✔
7
import gzip
9✔
8
import json
9✔
9
import time
9✔
10
import random
9✔
11
import shutil
9✔
12
import tarfile
9✔
13
import sys
9✔
14
import signal
9✔
15
import datetime
9✔
16
import logging
9✔
17
import pickle
9✔
18
import warnings
9✔
19
import itertools
9✔
20
from collections import OrderedDict
9✔
21
from functools import partial, wraps
9✔
22
from time import gmtime, strftime
9✔
23
import fnmatch
9✔
24
import platform
9✔
25
import struct
9✔
26
import importlib
9✔
27
import re as regexp
9✔
28

29
# External libs
30
import pandas as pd
9✔
31
import numpy as np
9✔
32
from scipy import stats
9✔
33
import xarray as xr
9✔
34
import shapely.geometry as shpg
9✔
35
import shapely.affinity as shpa
9✔
36
from shapely.ops import transform as shp_trafo
9✔
37
import netCDF4
9✔
38

39
# Optional libs
40
try:
9✔
41
    import geopandas as gpd
9✔
42
except ImportError:
43
    pass
44
try:
9✔
45
    import salem
9✔
46
except ImportError:
47
    pass
48
try:
9✔
49
    from salem import wgs84
9✔
50
    from salem.gis import transform_proj
9✔
51
except ImportError:
52
    pass
53
try:
9✔
54
    import pyproj
9✔
55
except ImportError:
56
    pass
57

58

59
# Locals
60
from oggm import __version__
9✔
61
from oggm.utils._funcs import (calendardate_to_hydrodate, date_to_floatyear,
9✔
62
                               tolist, filter_rgi_name, parse_rgi_meta,
63
                               haversine, multipolygon_to_polygon, clip_scalar)
64
from oggm.utils._downloads import (get_demo_file, get_wgms_files,
9✔
65
                                   get_rgi_glacier_entities)
66
from oggm import cfg
9✔
67
from oggm.exceptions import InvalidParamsError, InvalidWorkflowError
9✔
68

69

70
# Default RGI date (median per region in RGI6)
71
RGI_DATE = {'01': 2009,
9✔
72
            '02': 2004,
73
            '03': 1999,
74
            '04': 2001,
75
            '05': 2001,
76
            '06': 2000,
77
            '07': 2008,
78
            '08': 2002,
79
            '09': 2001,
80
            '10': 2011,
81
            '11': 2003,
82
            '12': 2001,
83
            '13': 2006,
84
            '14': 2001,
85
            '15': 2001,
86
            '16': 2000,
87
            '17': 2000,
88
            '18': 1978,
89
            '19': 1989,
90
            }
91

92
# Module logger
93
log = logging.getLogger('.'.join(__name__.split('.')[:-1]))
9✔
94

95

96
def empty_cache():
9✔
97
    """Empty oggm's cache directory."""
98

99
    if os.path.exists(cfg.CACHE_DIR):
×
100
        shutil.rmtree(cfg.CACHE_DIR)
×
101
    os.makedirs(cfg.CACHE_DIR)
×
102

103

104
def expand_path(p):
9✔
105
    """Helper function for os.path.expanduser and os.path.expandvars"""
106

107
    return os.path.expandvars(os.path.expanduser(p))
×
108

109

110
def gettempdir(dirname='', reset=False, home=False):
9✔
111
    """Get a temporary directory.
112

113
    The default is to locate it in the system's temporary directory as
114
    given by python's `tempfile.gettempdir()/OGGM'. You can set `home=True` for
115
    a directory in the user's `home/tmp` folder instead (this isn't really
116
    a temporary folder but well...)
117

118
    Parameters
119
    ----------
120
    dirname : str
121
        if you want to give it a name
122
    reset : bool
123
        if it has to be emptied first.
124
    home : bool
125
        if True, returns `HOME/tmp/OGGM` instead
126

127
    Returns
128
    -------
129
    the path to the temporary directory
130
    """
131

132
    basedir = (os.path.join(os.path.expanduser('~'), 'tmp') if home
1✔
133
               else tempfile.gettempdir())
134
    return mkdir(os.path.join(basedir, 'OGGM', dirname), reset=reset)
1✔
135

136

137
# alias
138
get_temp_dir = gettempdir
9✔
139

140

141
def get_sys_info():
9✔
142
    """Returns system information as a list of tuples"""
143

144
    blob = []
8✔
145
    try:
8✔
146
        (sysname, nodename, release,
8✔
147
         version, machine, processor) = platform.uname()
148
        blob.extend([
8✔
149
            ("python", "%d.%d.%d.%s.%s" % sys.version_info[:]),
150
            ("python-bits", struct.calcsize("P") * 8),
151
            ("OS", "%s" % (sysname)),
152
            ("OS-release", "%s" % (release)),
153
            ("machine", "%s" % (machine)),
154
            ("processor", "%s" % (processor)),
155
        ])
156
    except BaseException:
×
157
        pass
×
158

159
    return blob
8✔
160

161

162
def get_env_info():
9✔
163
    """Returns env information as a list of tuples"""
164

165
    deps = [
8✔
166
        # (MODULE_NAME, f(mod) -> mod version)
167
        ("oggm", lambda mod: mod.__version__),
168
        ("numpy", lambda mod: mod.__version__),
169
        ("scipy", lambda mod: mod.__version__),
170
        ("pandas", lambda mod: mod.__version__),
171
        ("geopandas", lambda mod: mod.__version__),
172
        ("netCDF4", lambda mod: mod.__version__),
173
        ("matplotlib", lambda mod: mod.__version__),
174
        ("rasterio", lambda mod: mod.__version__),
175
        ("fiona", lambda mod: mod.__version__),
176
        ("pyproj", lambda mod: mod.__version__),
177
        ("shapely", lambda mod: mod.__version__),
178
        ("xarray", lambda mod: mod.__version__),
179
        ("dask", lambda mod: mod.__version__),
180
        ("salem", lambda mod: mod.__version__),
181
    ]
182

183
    deps_blob = list()
8✔
184
    for (modname, ver_f) in deps:
8✔
185
        try:
8✔
186
            if modname in sys.modules:
8✔
187
                mod = sys.modules[modname]
8✔
188
            else:
189
                mod = importlib.import_module(modname)
×
190
            ver = ver_f(mod)
8✔
191
            deps_blob.append((modname, ver))
8✔
192
        except BaseException:
×
193
            deps_blob.append((modname, None))
×
194

195
    return deps_blob
8✔
196

197

198
def get_git_ident():
9✔
199
    ident_str = '$Id$'
8✔
200
    if ":" not in ident_str:
8✔
201
        return 'no_git_id'
×
202
    return ident_str.replace("$", "").replace("Id:", "").replace(" ", "")
8✔
203

204

205
def show_versions(logger=None):
9✔
206
    """Prints the OGGM version and other system information.
207

208
    Parameters
209
    ----------
210
    logger : optional
211
        the logger you want to send the printouts to. If None, will use stdout
212

213
    Returns
214
    -------
215
    the output string
216
    """
217

218
    sys_info = get_sys_info()
1✔
219
    deps_blob = get_env_info()
1✔
220

221
    out = ['# OGGM environment: ']
1✔
222
    out.append("## System info:")
1✔
223
    for k, stat in sys_info:
1✔
224
        out.append("    %s: %s" % (k, stat))
1✔
225
    out.append("## Packages info:")
1✔
226
    for k, stat in deps_blob:
1✔
227
        out.append("    %s: %s" % (k, stat))
1✔
228
    out.append("    OGGM git identifier: " + get_git_ident())
1✔
229

230
    if logger is not None:
1✔
231
        logger.workflow('\n'.join(out))
1✔
232

233
    return '\n'.join(out)
1✔
234

235

236
class SuperclassMeta(type):
9✔
237
    """Metaclass for abstract base classes.
238

239
    http://stackoverflow.com/questions/40508492/python-sphinx-inherit-
240
    method-documentation-from-superclass
241
    """
242
    def __new__(mcls, classname, bases, cls_dict):
9✔
243
        cls = super().__new__(mcls, classname, bases, cls_dict)
9✔
244
        for name, member in cls_dict.items():
9✔
245
            if not getattr(member, '__doc__'):
9✔
246
                try:
9✔
247
                    member.__doc__ = getattr(bases[-1], name).__doc__
9✔
248
                except AttributeError:
9✔
249
                    pass
9✔
250
        return cls
9✔
251

252

253
class LRUFileCache():
9✔
254
    """A least recently used cache for temporary files.
255

256
    The files which are no longer used are deleted from the disk.
257
    """
258

259
    def __init__(self, l0=None, maxsize=None):
9✔
260
        """Instantiate.
261

262
        Parameters
263
        ----------
264
        l0 : list
265
            a list of file paths
266
        maxsize : int
267
            the max number of files to keep
268
        """
269
        self.files = [] if l0 is None else l0
6✔
270
        # if no maxsize is specified, use value from configuration
271
        maxsize = cfg.PARAMS['lru_maxsize'] if maxsize is None else maxsize
6✔
272
        self.maxsize = maxsize
6✔
273
        self.purge()
6✔
274

275
    def purge(self):
9✔
276
        """Remove expired entries."""
277
        if len(self.files) > self.maxsize:
6✔
278
            fpath = self.files.pop(0)
1✔
279
            if os.path.exists(fpath):
1✔
280
                os.remove(fpath)
1✔
281

282
    def append(self, fpath):
9✔
283
        """Append a file to the list."""
284
        if fpath not in self.files:
6✔
285
            self.files.append(fpath)
2✔
286
        self.purge()
6✔
287

288

289
def lazy_property(fn):
9✔
290
    """Decorator that makes a property lazy-evaluated."""
291

292
    attr_name = '_lazy_' + fn.__name__
9✔
293

294
    @property
9✔
295
    @wraps(fn)
9✔
296
    def _lazy_property(self):
9✔
297
        if not hasattr(self, attr_name):
8✔
298
            setattr(self, attr_name, fn(self))
8✔
299
        return getattr(self, attr_name)
8✔
300

301
    return _lazy_property
9✔
302

303

304
def mkdir(path, reset=False):
9✔
305
    """Checks if directory exists and if not, create one.
306

307
    Parameters
308
    ----------
309
    reset: erase the content of the directory if exists
310

311
    Returns
312
    -------
313
    the path
314
    """
315

316
    if reset and os.path.exists(path):
8✔
317
        shutil.rmtree(path)
2✔
318
        # deleting stuff takes time
319
        while os.path.exists(path):  # check if it still exists
2✔
320
            pass
×
321
    try:
8✔
322
        os.makedirs(path)
8✔
323
    except FileExistsError:
6✔
324
        pass
6✔
325
    return path
8✔
326

327

328
def include_patterns(*patterns):
9✔
329
    """Factory function that can be used with copytree() ignore parameter.
330

331
    Arguments define a sequence of glob-style patterns
332
    that are used to specify what files to NOT ignore.
333
    Creates and returns a function that determines this for each directory
334
    in the file hierarchy rooted at the source directory when used with
335
    shutil.copytree().
336

337
    https://stackoverflow.com/questions/35155382/copying-specific-files-to-a-
338
    new-folder-while-maintaining-the-original-subdirect
339
    """
340

341
    def _ignore_patterns(path, names):
2✔
342
        # This is our cuisine
343
        bname = os.path.basename(path)
2✔
344
        if 'divide' in bname or 'log' in bname:
2✔
345
            keep = []
×
346
        else:
347
            keep = set(name for pattern in patterns
2✔
348
                       for name in fnmatch.filter(names, pattern))
349
        ignore = set(name for name in names
2✔
350
                     if name not in keep and not
351
                     os.path.isdir(os.path.join(path, name)))
352
        return ignore
2✔
353

354
    return _ignore_patterns
2✔
355

356

357
class ncDataset(netCDF4.Dataset):
9✔
358
    """Wrapper around netCDF4 setting auto_mask to False"""
359

360
    def __init__(self, *args, **kwargs):
9✔
361
        super(ncDataset, self).__init__(*args, **kwargs)
7✔
362
        self.set_auto_mask(False)
7✔
363

364

365
def pipe_log(gdir, task_func_name, err=None):
9✔
366
    """Log the error in a specific directory."""
367

368
    time_str = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
6✔
369

370
    # Defaults to working directory: it must be set!
371
    if not cfg.PATHS['working_dir']:
6✔
372
        warnings.warn("Cannot log to file without a valid "
×
373
                      "cfg.PATHS['working_dir']!", RuntimeWarning)
374
        return
×
375

376
    fpath = os.path.join(cfg.PATHS['working_dir'], 'log')
6✔
377
    mkdir(fpath)
6✔
378

379
    fpath = os.path.join(fpath, gdir.rgi_id)
6✔
380

381
    sep = '; '
6✔
382

383
    if err is not None:
6✔
384
        fpath += '.ERROR'
6✔
385
    else:
386
        return  # for now
×
387
        fpath += '.SUCCESS'
388

389
    with open(fpath, 'a') as f:
6✔
390
        f.write(time_str + sep + task_func_name + sep)
6✔
391
        if err is not None:
6✔
392
            f.write(err.__class__.__name__ + sep + '{}\n'.format(err))
6✔
393
        else:
394
            f.write(sep + '\n')
×
395

396

397
class DisableLogger():
9✔
398
    """Context manager to temporarily disable all loggers."""
399

400
    def __enter__(self):
9✔
401
        logging.disable(logging.CRITICAL)
5✔
402

403
    def __exit__(self, a, b, c):
9✔
404
        logging.disable(logging.NOTSET)
5✔
405

406

407
def _timeout_handler(signum, frame):
9✔
408
    raise TimeoutError('This task was killed because of timeout')
×
409

410

411
class entity_task(object):
9✔
412
    """Decorator for common job-controlling logic.
413

414
    All tasks share common operations. This decorator is here to handle them:
415
    exceptions, logging, and (some day) database for job-controlling.
416
    """
417

418
    def __init__(self, log, writes=[], fallback=None):
9✔
419
        """Decorator syntax: ``@entity_task(log, writes=['dem', 'outlines'])``
420

421
        Parameters
422
        ----------
423
        log: logger
424
            module logger
425
        writes: list
426
            list of files that the task will write down to disk (must be
427
            available in ``cfg.BASENAMES``)
428
        fallback: python function
429
            will be executed on gdir if entity_task fails
430
        return_value: bool
431
            whether the return value from the task should be passed over
432
            to the caller or not. In general you will always want this to
433
            be true, but sometimes the task return things which are not
434
            useful in production and my use a lot of memory, etc,
435
        """
436
        self.log = log
9✔
437
        self.writes = writes
9✔
438
        self.fallback = fallback
9✔
439

440
        cnt = ['    Notes']
9✔
441
        cnt += ['    -----']
9✔
442
        cnt += ['    Files written to the glacier directory:']
9✔
443

444
        for k in sorted(writes):
9✔
445
            cnt += [cfg.BASENAMES.doc_str(k)]
9✔
446
        self.iodoc = '\n'.join(cnt)
9✔
447

448
    def __call__(self, task_func):
9✔
449
        """Decorate."""
450

451
        # Add to the original docstring
452
        if task_func.__doc__ is None:
9✔
453
            raise RuntimeError('Entity tasks should have a docstring!')
×
454

455
        task_func.__doc__ = '\n'.join((task_func.__doc__, self.iodoc))
9✔
456

457
        @wraps(task_func)
9✔
458
        def _entity_task(gdir, *, reset=None, print_log=True,
9✔
459
                         return_value=True, continue_on_error=None,
460
                         add_to_log_file=True, **kwargs):
461

462
            if reset is None:
7✔
463
                reset = not cfg.PARAMS['auto_skip_task']
7✔
464

465
            if continue_on_error is None:
7✔
466
                continue_on_error = cfg.PARAMS['continue_on_error']
7✔
467

468
            task_name = task_func.__name__
7✔
469

470
            # Filesuffix are typically used to differentiate tasks
471
            fsuffix = (kwargs.get('filesuffix', False) or
7✔
472
                       kwargs.get('output_filesuffix', False))
473
            if fsuffix:
7✔
474
                task_name += fsuffix
3✔
475

476
            # Do we need to run this task?
477
            s = gdir.get_task_status(task_name)
7✔
478
            if not reset and s and ('SUCCESS' in s):
7✔
479
                return
1✔
480

481
            # Log what we are doing
482
            if print_log:
7✔
483
                self.log.info('(%s) %s', gdir.rgi_id, task_name)
7✔
484

485
            # Run the task
486
            try:
7✔
487
                if cfg.PARAMS['task_timeout'] > 0:
7✔
488
                    signal.signal(signal.SIGALRM, _timeout_handler)
×
489
                    signal.alarm(cfg.PARAMS['task_timeout'])
×
490
                ex_t = time.time()
7✔
491
                out = task_func(gdir, **kwargs)
7✔
492
                ex_t = time.time() - ex_t
7✔
493
                if cfg.PARAMS['task_timeout'] > 0:
7✔
494
                    signal.alarm(0)
×
495
                if task_name != 'gdir_to_tar':
7✔
496
                    if add_to_log_file:
7✔
497
                        gdir.log(task_name, task_time=ex_t)
7✔
498
            except Exception as err:
6✔
499
                # Something happened
500
                out = None
6✔
501
                if add_to_log_file:
6✔
502
                    gdir.log(task_name, err=err)
6✔
503
                    pipe_log(gdir, task_name, err=err)
6✔
504
                if print_log:
6✔
505
                    self.log.error('%s occurred during task %s on %s: %s',
6✔
506
                                   type(err).__name__, task_name,
507
                                   gdir.rgi_id, str(err))
508
                if not continue_on_error:
6✔
509
                    raise
6✔
510

511
                if self.fallback is not None:
1✔
512
                    self.fallback(gdir)
×
513
            if return_value:
7✔
514
                return out
7✔
515

516
        _entity_task.__dict__['is_entity_task'] = True
9✔
517
        return _entity_task
9✔
518

519

520
class global_task(object):
9✔
521
    """Decorator for common job-controlling logic.
522

523
    Indicates that this task expects a list of all GlacierDirs as parameter
524
    instead of being called once per dir.
525
    """
526

527
    def __init__(self, log):
9✔
528
        """Decorator syntax: ``@global_task(log)``
529

530
        Parameters
531
        ----------
532
        log: logger
533
            module logger
534
        """
535
        self.log = log
9✔
536

537
    def __call__(self, task_func):
9✔
538
        """Decorate."""
539

540
        @wraps(task_func)
9✔
541
        def _global_task(gdirs, **kwargs):
9✔
542

543
            # Should be iterable
544
            gdirs = tolist(gdirs)
7✔
545

546
            self.log.workflow('Applying global task %s on %s glaciers',
7✔
547
                              task_func.__name__, len(gdirs))
548

549
            # Run the task
550
            return task_func(gdirs, **kwargs)
7✔
551

552
        _global_task.__dict__['is_global_task'] = True
9✔
553
        return _global_task
9✔
554

555

556
def get_ref_mb_glaciers_candidates(rgi_version=None):
9✔
557
    """Reads in the WGMS list of glaciers with available MB data.
558

559
    Can be found afterwards (and extended) in cdf.DATA['RGIXX_ref_ids'].
560
    """
561

562
    if rgi_version is None:
1✔
563
        rgi_version = cfg.PARAMS['rgi_version']
1✔
564

565
    if len(rgi_version) == 2:
1✔
566
        # We might change this one day
567
        rgi_version = rgi_version[:1]
1✔
568

569
    key = 'RGI{}0_ref_ids'.format(rgi_version)
1✔
570

571
    if key not in cfg.DATA:
1✔
572
        flink, _ = get_wgms_files()
1✔
573
        cfg.DATA[key] = flink['RGI{}0_ID'.format(rgi_version)].tolist()
1✔
574

575
    return cfg.DATA[key]
1✔
576

577

578
@global_task(log)
9✔
579
def get_ref_mb_glaciers(gdirs, y0=None, y1=None):
9✔
580
    """Get the list of glaciers we have valid mass balance measurements for.
581

582
    To be valid glaciers must have more than 5 years of measurements and
583
    be land terminating. Therefore, the list depends on the time period of the
584
    baseline climate data and this method selects them out of a list
585
    of potential candidates (`gdirs` arg).
586

587
    Parameters
588
    ----------
589
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
590
        list of glaciers to check for valid reference mass balance data
591
    y0 : int
592
        override the default behavior which is to check the available
593
        climate data (or PARAMS['ref_mb_valid_window']) and decide
594
    y1 : int
595
        override the default behavior which is to check the available
596
        climate data (or PARAMS['ref_mb_valid_window']) and decide
597

598
    Returns
599
    -------
600
    ref_gdirs : list of :py:class:`oggm.GlacierDirectory` objects
601
        list of those glaciers with valid reference mass balance data
602

603
    See Also
604
    --------
605
    get_ref_mb_glaciers_candidates
606
    """
607

608
    # Get the links
609
    ref_ids = get_ref_mb_glaciers_candidates(gdirs[0].rgi_version)
1✔
610

611
    # We remove tidewater glaciers and glaciers with < 5 years
612
    ref_gdirs = []
1✔
613
    for g in gdirs:
1✔
614
        if g.rgi_id not in ref_ids or g.is_tidewater:
1✔
615
            continue
1✔
616
        try:
1✔
617
            mbdf = g.get_ref_mb_data(y0=y0, y1=y1)
1✔
618
            if len(mbdf) >= 5:
1✔
619
                ref_gdirs.append(g)
1✔
620
        except RuntimeError as e:
1✔
621
            if 'Please process some climate data before call' in str(e):
×
622
                raise
×
623
    return ref_gdirs
1✔
624

625

626
def _chaikins_corner_cutting(line, refinements=5):
9✔
627
    """Some magic here.
628

629
    https://stackoverflow.com/questions/47068504/where-to-find-python-
630
    implementation-of-chaikins-corner-cutting-algorithm
631
    """
632
    coords = np.array(line.coords)
1✔
633

634
    for _ in range(refinements):
1✔
635
        L = coords.repeat(2, axis=0)
1✔
636
        R = np.empty_like(L)
1✔
637
        R[0] = L[0]
1✔
638
        R[2::2] = L[1:-1:2]
1✔
639
        R[1:-1:2] = L[2::2]
1✔
640
        R[-1] = L[-1]
1✔
641
        coords = L * 0.75 + R * 0.25
1✔
642

643
    return shpg.LineString(coords)
1✔
644

645

646
@entity_task(log)
9✔
647
def get_centerline_lonlat(gdir,
9✔
648
                          keep_main_only=False,
649
                          flowlines_output=False,
650
                          ensure_exterior_match=False,
651
                          geometrical_widths_output=False,
652
                          corrected_widths_output=False,
653
                          to_crs='wgs84',
654
                          simplify_line=0,
655
                          corner_cutting=0):
656
    """Helper task to convert the centerlines to a shapefile
657

658
    Parameters
659
    ----------
660
    gdir : the glacier directory
661
    flowlines_output : create a shapefile for the flowlines
662
    ensure_exterior_match : per design, OGGM centerlines match the underlying
663
    DEM grid. This may imply that they do not "touch" the exterior outlines
664
    of the glacier in vector space. Set this to True to correct for that.
665
    geometrical_widths_output : for the geometrical widths
666
    corrected_widths_output : for the corrected widths
667

668
    Returns
669
    -------
670
    a shapefile
671
    """
672
    if flowlines_output or geometrical_widths_output or corrected_widths_output:
1✔
673
        cls = gdir.read_pickle('inversion_flowlines')
1✔
674
    else:
675
        cls = gdir.read_pickle('centerlines')
1✔
676

677
    exterior = None
1✔
678
    if ensure_exterior_match:
1✔
679
        exterior = gdir.read_shapefile('outlines')
1✔
680
        # Transform to grid
681
        tra_func = partial(gdir.grid.transform, crs=exterior.crs)
1✔
682
        exterior = shpg.Polygon(shp_trafo(tra_func, exterior.geometry[0].exterior))
1✔
683

684
    tra_func = partial(gdir.grid.ij_to_crs, crs=to_crs)
1✔
685

686
    olist = []
1✔
687
    for j, cl in enumerate(cls):
1✔
688
        mm = 1 if j == (len(cls)-1) else 0
1✔
689
        if keep_main_only and mm == 0:
1✔
690
            continue
×
691
        if corrected_widths_output:
1✔
692
            le_segment = np.rint(np.max(cl.dis_on_line) * gdir.grid.dx)
1✔
693
            for wi, cur, (n1, n2), wi_m in zip(cl.widths, cl.line.coords,
1✔
694
                                               cl.normals, cl.widths_m):
695
                _l = shpg.LineString([shpg.Point(cur + wi / 2. * n1),
1✔
696
                                      shpg.Point(cur + wi / 2. * n2)])
697
                gs = dict()
1✔
698
                gs['RGIID'] = gdir.rgi_id
1✔
699
                gs['SEGMENT_ID'] = j
1✔
700
                gs['LE_SEGMENT'] = le_segment
1✔
701
                gs['MAIN'] = mm
1✔
702
                gs['WIDTH_m'] = wi_m
1✔
703
                gs['geometry'] = shp_trafo(tra_func, _l)
1✔
704
                olist.append(gs)
1✔
705
        elif geometrical_widths_output:
1✔
706
            le_segment = np.rint(np.max(cl.dis_on_line) * gdir.grid.dx)
1✔
707
            for _l, wi_m in zip(cl.geometrical_widths, cl.widths_m):
1✔
708
                gs = dict()
1✔
709
                gs['RGIID'] = gdir.rgi_id
1✔
710
                gs['SEGMENT_ID'] = j
1✔
711
                gs['LE_SEGMENT'] = le_segment
1✔
712
                gs['MAIN'] = mm
1✔
713
                gs['WIDTH_m'] = wi_m
1✔
714
                gs['geometry'] = shp_trafo(tra_func, _l)
1✔
715
                olist.append(gs)
1✔
716
        else:
717
            gs = dict()
1✔
718
            gs['RGIID'] = gdir.rgi_id
1✔
719
            gs['SEGMENT_ID'] = j
1✔
720
            gs['LE_SEGMENT'] = np.rint(np.max(cl.dis_on_line) * gdir.grid.dx)
1✔
721
            gs['MAIN'] = mm
1✔
722
            line = cl.line
1✔
723
            if ensure_exterior_match:
1✔
724
                # Extend line at the start by 10
725
                fs = shpg.LineString(line.coords[:2])
1✔
726
                # First check if this is necessary - this segment should
727
                # be within the geometry or it's already good to go
728
                if fs.within(exterior):
1✔
729
                    fs = shpa.scale(fs, xfact=3, yfact=3, origin=fs.boundary.geoms[1])
1✔
730
                    line = shpg.LineString([*fs.coords, *line.coords[2:]])
1✔
731
                # If last also extend at the end
732
                if mm == 1:
1✔
733
                    ls = shpg.LineString(line.coords[-2:])
1✔
734
                    if ls.within(exterior):
1✔
735
                        ls = shpa.scale(ls, xfact=3, yfact=3, origin=ls.boundary.geoms[0])
1✔
736
                        line = shpg.LineString([*line.coords[:-2], *ls.coords])
1✔
737

738
                # Simplify and smooth?
739
                if simplify_line:
1✔
740
                    line = line.simplify(simplify_line)
1✔
741
                if corner_cutting:
1✔
742
                    line = _chaikins_corner_cutting(line, corner_cutting)
1✔
743

744
                # Intersect with exterior geom
745
                line = line.intersection(exterior)
1✔
746
                if line.geom_type in ['MultiLineString', 'GeometryCollection']:
1✔
747
                    # Take the longest
748
                    lens = [il.length for il in line.geoms]
×
749
                    line = line.geoms[np.argmax(lens)]
×
750

751
                # Recompute length
752
                gs['LE_SEGMENT'] = np.rint(line.length * gdir.grid.dx)
1✔
753
            gs['geometry'] = shp_trafo(tra_func, line)
1✔
754
            olist.append(gs)
1✔
755

756
    return olist
1✔
757

758

759
def _write_shape_to_disk(gdf, fpath, to_tar=False):
9✔
760
    """Write a shapefile to disk with optional compression
761

762
    Parameters
763
    ----------
764
    gdf : gpd.GeoDataFrame
765
        the data to write
766
    fpath : str
767
        where to writ the file - should be ending in shp
768
    to_tar : bool
769
        put the files in a .tar file. If cfg.PARAMS['use_compression'],
770
        also compress to .gz
771
    """
772

773
    if '.shp' not in fpath:
7✔
774
        raise ValueError('File ending should be .shp')
×
775

776
    gdf.to_file(fpath)
7✔
777

778
    if not to_tar:
7✔
779
        # Done here
780
        return
3✔
781

782
    # Write them in tar
783
    fpath = fpath.replace('.shp', '.tar')
7✔
784
    mode = 'w'
7✔
785
    if cfg.PARAMS['use_compression']:
7✔
786
        fpath += '.gz'
7✔
787
        mode += ':gz'
7✔
788
    if os.path.exists(fpath):
7✔
789
        os.remove(fpath)
2✔
790

791
    # List all files that were written as shape
792
    fs = glob.glob(fpath.replace('.gz', '').replace('.tar', '.*'))
7✔
793
    # Add them to tar
794
    with tarfile.open(fpath, mode=mode) as tf:
7✔
795
        for ff in fs:
7✔
796
            tf.add(ff, arcname=os.path.basename(ff))
7✔
797

798
    # Delete the old ones
799
    for ff in fs:
7✔
800
        os.remove(ff)
7✔
801

802

803
@global_task(log)
9✔
804
def write_centerlines_to_shape(gdirs, *, path=True, to_tar=False,
9✔
805
                               to_crs='EPSG:4326',
806
                               filesuffix='', flowlines_output=False,
807
                               ensure_exterior_match=False,
808
                               geometrical_widths_output=False,
809
                               corrected_widths_output=False,
810
                               keep_main_only=False,
811
                               simplify_line=0,
812
                               corner_cutting=0):
813
    """Write the centerlines to a shapefile.
814

815
    Parameters
816
    ----------
817
    gdirs:
818
        the list of GlacierDir to process.
819
    path: str or bool
820
        Set to "True" in order  to store the shape in the working directory
821
        Set to a str path to store the file to your chosen location
822
    to_tar : bool
823
        put the files in a .tar file. If cfg.PARAMS['use_compression'],
824
        also compress to .gz
825
    filesuffix : str
826
        add a suffix to the output file
827
    flowlines_output : bool
828
        output the OGGM flowlines instead of the centerlines
829
    geometrical_widths_output : bool
830
        output the geometrical widths instead of the centerlines
831
    corrected_widths_output : bool
832
        output the corrected widths instead of the centerlines
833
    ensure_exterior_match : bool
834
        per design, the centerlines will match the underlying DEM grid.
835
        This may imply that they do not "touch" the exterior outlines of the
836
        glacier in vector space. Set this to True to correct for that.
837
    to_crs : str
838
        write the shape to another coordinate reference system (CRS)
839
    keep_main_only : bool
840
        write only the main flowlines to the output files
841
    simplify_line : float
842
        apply shapely's `simplify` method to the line before writing. It is
843
        a purely cosmetic option, although glacier length will be affected.
844
        All points in the simplified object will be within the tolerance
845
        distance of the original geometry (units: grid points). A good
846
        value to test first is 0.5
847
    corner_cutting : int
848
        apply the Chaikin's corner cutting algorithm to the geometry before
849
        writing. The integer represents the number of refinements to apply.
850
        A good first value to test is 5.
851
    """
852
    from oggm.workflow import execute_entity_task
3✔
853

854
    if path is True:
3✔
855
        path = os.path.join(cfg.PATHS['working_dir'],
2✔
856
                            'glacier_centerlines' + filesuffix + '.shp')
857

858
    _to_crs = salem.check_crs(to_crs)
3✔
859
    if not _to_crs:
3✔
860
        raise InvalidParamsError(f'CRS not understood: {to_crs}')
×
861

862
    log.workflow('write_centerlines_to_shape on {} ...'.format(path))
3✔
863

864
    olist = execute_entity_task(get_centerline_lonlat, gdirs,
3✔
865
                                flowlines_output=flowlines_output,
866
                                ensure_exterior_match=ensure_exterior_match,
867
                                geometrical_widths_output=geometrical_widths_output,
868
                                corrected_widths_output=corrected_widths_output,
869
                                keep_main_only=keep_main_only,
870
                                simplify_line=simplify_line,
871
                                corner_cutting=corner_cutting,
872
                                to_crs=_to_crs)
873
    # filter for none
874
    olist = [o for o in olist if o is not None]
3✔
875
    odf = gpd.GeoDataFrame(itertools.chain.from_iterable(olist))
3✔
876
    odf = odf.sort_values(by='RGIID')
3✔
877
    odf.crs = to_crs
3✔
878
    # Sanity checks to avoid bad surprises
879
    gtype = np.array([g.geom_type for g in odf.geometry])
3✔
880
    if 'GeometryCollection' in gtype:
3✔
881
        errdf = odf.loc[gtype == 'GeometryCollection']
×
882
        with warnings.catch_warnings():
×
883
            # errdf.length warns because of use of wgs84
884
            warnings.filterwarnings("ignore", category=UserWarning)
×
885
            if not np.all(errdf.length) == 0:
×
886
                errdf = errdf.loc[errdf.length > 0]
×
887
                raise RuntimeError('Some geometries are non-empty GeometryCollection '
×
888
                                   f'at RGI Ids: {errdf.RGIID.values}')
889
    _write_shape_to_disk(odf, path, to_tar=to_tar)
3✔
890

891

892
def demo_glacier_id(key):
9✔
893
    """Get the RGI id of a glacier by name or key: None if not found."""
894

895
    df = cfg.DATA['demo_glaciers']
1✔
896

897
    # Is the name in key?
898
    s = df.loc[df.Key.str.lower() == key.lower()]
1✔
899
    if len(s) == 1:
1✔
900
        return s.index[0]
1✔
901

902
    # Is the name in name?
903
    s = df.loc[df.Name.str.lower() == key.lower()]
1✔
904
    if len(s) == 1:
1✔
905
        return s.index[0]
1✔
906

907
    # Is the name in Ids?
908
    try:
1✔
909
        s = df.loc[[key]]
1✔
910
        if len(s) == 1:
1✔
911
            return s.index[0]
1✔
912
    except KeyError:
1✔
913
        pass
1✔
914

915
    return None
1✔
916

917

918
class compile_to_netcdf(object):
9✔
919
    """Decorator for common compiling NetCDF files logic.
920

921
    All compile_* tasks can be optimized the same way, by using temporary
922
    files and merging them afterwards.
923
    """
924

925
    def __init__(self, log):
9✔
926
        """Decorator syntax: ``@compile_to_netcdf(log, n_tmp_files=1000)``
927

928
        Parameters
929
        ----------
930
        log: logger
931
            module logger
932
        tmp_file_size: int
933
            number of glacier directories per temporary files
934
        """
935
        self.log = log
9✔
936

937
    def __call__(self, task_func):
9✔
938
        """Decorate."""
939

940
        @wraps(task_func)
9✔
941
        def _compile_to_netcdf(gdirs, input_filesuffix='',
9✔
942
                               output_filesuffix='',
943
                               path=True,
944
                               tmp_file_size=1000,
945
                               **kwargs):
946

947
            if not output_filesuffix:
5✔
948
                output_filesuffix = input_filesuffix
5✔
949

950
            gdirs = tolist(gdirs)
5✔
951
            task_name = task_func.__name__
5✔
952
            output_base = task_name.replace('compile_', '')
5✔
953

954
            if path is True:
5✔
955
                path = os.path.join(cfg.PATHS['working_dir'],
5✔
956
                                    output_base + output_filesuffix + '.nc')
957

958
            self.log.workflow('Applying %s on %d gdirs.',
5✔
959
                              task_name, len(gdirs))
960

961
            # Run the task
962
            # If small gdir size, no need for temporary files
963
            if len(gdirs) < tmp_file_size or not path:
5✔
964
                return task_func(gdirs, input_filesuffix=input_filesuffix,
5✔
965
                                 path=path, **kwargs)
966

967
            # Otherwise, divide and conquer
968
            sub_gdirs = [gdirs[i: i + tmp_file_size] for i in
1✔
969
                         range(0, len(gdirs), tmp_file_size)]
970

971
            tmp_paths = [os.path.join(cfg.PATHS['working_dir'],
1✔
972
                                      'compile_tmp_{:06d}.nc'.format(i))
973
                         for i in range(len(sub_gdirs))]
974

975
            try:
1✔
976
                for spath, sgdirs in zip(tmp_paths, sub_gdirs):
1✔
977
                    task_func(sgdirs, input_filesuffix=input_filesuffix,
1✔
978
                              path=spath, **kwargs)
979
            except BaseException:
×
980
                # If something wrong, delete the tmp files
981
                for f in tmp_paths:
×
982
                    try:
×
983
                        os.remove(f)
×
984
                    except FileNotFoundError:
×
985
                        pass
×
986
                raise
×
987

988
            # Ok, now merge and return
989
            try:
1✔
990
                with xr.open_mfdataset(tmp_paths, combine='nested',
1✔
991
                                       concat_dim='rgi_id') as ds:
992
                    # the .load() is actually quite uncool here, but it solves
993
                    # an unbelievable stalling problem in multiproc
994
                    ds.load().to_netcdf(path)
1✔
995
            except TypeError:
×
996
                # xr < v 0.13
997
                with xr.open_mfdataset(tmp_paths, concat_dim='rgi_id') as ds:
×
998
                    # the .load() is actually quite uncool here, but it solves
999
                    # an unbelievable stalling problem in multiproc
1000
                    ds.load().to_netcdf(path)
×
1001

1002
            # We can't return the dataset without loading it, so we don't
1003
            return None
1✔
1004

1005
        return _compile_to_netcdf
9✔
1006

1007

1008
@entity_task(log)
9✔
1009
def merge_consecutive_run_outputs(gdir,
9✔
1010
                                  input_filesuffix_1=None,
1011
                                  input_filesuffix_2=None,
1012
                                  output_filesuffix=None,
1013
                                  delete_input=False):
1014
    """Merges the output of two model_diagnostics files into one.
1015

1016
    It assumes that the last time of file1 is equal to the first time of file2.
1017

1018
    Parameters
1019
    ----------
1020
    gdir : the glacier directory
1021
    input_filesuffix_1 : str
1022
        how to recognize the first file
1023
    input_filesuffix_2 : str
1024
        how to recognize the second file
1025
    output_filesuffix : str
1026
        where to write the output (default: no suffix)
1027

1028
    Returns
1029
    -------
1030
    The merged dataset
1031
    """
1032

1033
    # Read in the input files and check
1034
    fp1 = gdir.get_filepath('model_diagnostics', filesuffix=input_filesuffix_1)
1✔
1035
    with xr.open_dataset(fp1) as ds:
1✔
1036
        ds1 = ds.load()
1✔
1037
    fp2 = gdir.get_filepath('model_diagnostics', filesuffix=input_filesuffix_2)
1✔
1038
    with xr.open_dataset(fp2) as ds:
1✔
1039
        ds2 = ds.load()
1✔
1040
    if ds1.time[-1] != ds2.time[0]:
1✔
1041
        raise InvalidWorkflowError('The two files are incompatible by time')
×
1042

1043
    # Samity check for all variables as well
1044
    for v in ds1:
1✔
1045
        if not np.all(np.isfinite(ds1[v].data[-1])):
1✔
1046
            # This is the last year of hydro output - we will discard anyway
1047
            continue
1✔
1048
        if np.allclose(ds1[v].data[-1], ds2[v].data[0]):
1✔
1049
            # This means that we're OK - the two match
1050
            continue
1✔
1051

1052
        # This has to be a bucket of some sort, probably snow or calving
1053
        if len(ds2[v].data.shape) == 1:
1✔
1054
            if ds2[v].data[0] != 0:
1✔
1055
                raise InvalidWorkflowError('The two files seem incompatible '
×
1056
                                           f'by data on variable : {v}')
1057
            bucket = ds1[v].data[-1]
1✔
1058
        elif len(ds2[v].data.shape) == 2:
1✔
1059
            if ds2[v].data[0, 0] != 0:
1✔
1060
                raise InvalidWorkflowError('The two files seem incompatible '
×
1061
                                           f'by data on variable : {v}')
1062
            bucket = ds1[v].data[-1, -1]
1✔
1063
        # Carry it to the rest
1064
        ds2[v] = ds2[v] + bucket
1✔
1065

1066
    # Merge by removing the last step of file 1 and delete the files if asked
1067
    out_ds = xr.concat([ds1.isel(time=slice(0, -1)), ds2], dim='time')
1✔
1068
    if delete_input:
1✔
1069
        os.remove(fp1)
×
1070
        os.remove(fp2)
×
1071
    # Write out and return
1072
    fp = gdir.get_filepath('model_diagnostics', filesuffix=output_filesuffix)
1✔
1073
    out_ds.to_netcdf(fp)
1✔
1074
    return out_ds
1✔
1075

1076

1077
@global_task(log)
9✔
1078
@compile_to_netcdf(log)
9✔
1079
def compile_run_output(gdirs, path=True, input_filesuffix='',
9✔
1080
                       use_compression=True):
1081
    """Compiles the output of the model runs of several gdirs into one file.
1082

1083
    Parameters
1084
    ----------
1085
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1086
        the glacier directories to process
1087
    path : str
1088
        where to store (default is on the working dir).
1089
        Set to `False` to disable disk storage.
1090
    input_filesuffix : str
1091
        the filesuffix of the files to be compiled
1092
    use_compression : bool
1093
        use zlib compression on the output netCDF files
1094

1095
    Returns
1096
    -------
1097
    ds : :py:class:`xarray.Dataset`
1098
        compiled output
1099
    """
1100

1101
    # Get the dimensions of all this
1102
    rgi_ids = [gd.rgi_id for gd in gdirs]
4✔
1103

1104
    # To find the longest time, we have to open all files unfortunately, we
1105
    # also create a list of all data variables (in case not all files contain
1106
    # the same data variables), and finally we decide on the name of "3d"
1107
    # variables in case we have daily
1108
    time_info = {}
4✔
1109
    time_keys = ['hydro_year', 'hydro_month', 'calendar_year', 'calendar_month']
4✔
1110
    allowed_data_vars = ['volume_m3', 'volume_bsl_m3', 'volume_bwl_m3',
4✔
1111
                         'volume_m3_min_h',  # only here for back compatibility
1112
                         # as it is a variable in gdirs v1.6 2023.1
1113
                         'area_m2', 'area_m2_min_h', 'length_m', 'calving_m3',
1114
                         'calving_rate_myr', 'off_area',
1115
                         'on_area', 'model_mb', 'is_fixed_geometry_spinup']
1116
    for gi in range(10):
4✔
1117
        allowed_data_vars += [f'terminus_thick_{gi}']
4✔
1118
    # this hydro variables can be _monthly or _daily
1119
    hydro_vars = ['melt_off_glacier', 'melt_on_glacier',
4✔
1120
                  'liq_prcp_off_glacier', 'liq_prcp_on_glacier',
1121
                  'snowfall_off_glacier', 'snowfall_on_glacier',
1122
                  'melt_residual_off_glacier', 'melt_residual_on_glacier',
1123
                  'snow_bucket', 'residual_mb']
1124
    for v in hydro_vars:
4✔
1125
        allowed_data_vars += [v]
4✔
1126
        allowed_data_vars += [v + '_monthly']
4✔
1127
        allowed_data_vars += [v + '_daily']
4✔
1128
    data_vars = {}
4✔
1129
    name_2d_dim = 'month_2d'
4✔
1130
    contains_3d_data = False
4✔
1131
    for gd in gdirs:
4✔
1132
        fp = gd.get_filepath('model_diagnostics', filesuffix=input_filesuffix)
4✔
1133
        try:
4✔
1134
            with ncDataset(fp) as ds:
4✔
1135
                time = ds.variables['time'][:]
4✔
1136
                if 'time' not in time_info:
4✔
1137
                    time_info['time'] = time
4✔
1138
                    for cn in time_keys:
4✔
1139
                        time_info[cn] = ds.variables[cn][:]
4✔
1140
                else:
1141
                    # Here we may need to append or add stuff
1142
                    ot = time_info['time']
4✔
1143
                    if time[0] > ot[-1] or ot[-1] < time[0]:
4✔
1144
                        raise InvalidWorkflowError('Trying to compile output '
×
1145
                                                   'without overlap.')
1146
                    if time[-1] > ot[-1]:
4✔
1147
                        p = np.nonzero(time == ot[-1])[0][0] + 1
×
1148
                        time_info['time'] = np.append(ot, time[p:])
×
1149
                        for cn in time_keys:
×
1150
                            time_info[cn] = np.append(time_info[cn],
×
1151
                                                      ds.variables[cn][p:])
1152
                    if time[0] < ot[0]:
4✔
1153
                        p = np.nonzero(time == ot[0])[0][0]
×
1154
                        time_info['time'] = np.append(time[:p], ot)
×
1155
                        for cn in time_keys:
×
1156
                            time_info[cn] = np.append(ds.variables[cn][:p],
×
1157
                                                      time_info[cn])
1158

1159
                # check if their are new data variables and add them
1160
                for vn in ds.variables:
4✔
1161
                    # exclude time variables
1162
                    if vn in ['month_2d', 'calendar_month_2d',
4✔
1163
                              'hydro_month_2d']:
1164
                        name_2d_dim = 'month_2d'
2✔
1165
                        contains_3d_data = True
2✔
1166
                    elif vn in ['day_2d', 'calendar_day_2d', 'hydro_day_2d']:
4✔
1167
                        name_2d_dim = 'day_2d'
×
1168
                        contains_3d_data = True
×
1169
                    elif vn in allowed_data_vars:
4✔
1170
                        # check if data variable is new
1171
                        if vn not in data_vars.keys():
4✔
1172
                            data_vars[vn] = dict()
4✔
1173
                            data_vars[vn]['dims'] = ds.variables[vn].dimensions
4✔
1174
                            data_vars[vn]['attrs'] = dict()
4✔
1175
                            for attr in ds.variables[vn].ncattrs():
4✔
1176
                                if attr not in ['_FillValue', 'coordinates',
4✔
1177
                                                'dtype']:
1178
                                    data_vars[vn]['attrs'][attr] = getattr(
4✔
1179
                                        ds.variables[vn], attr)
1180
                    elif vn not in ['time'] + time_keys:
4✔
1181
                        # This check has future developments in mind.
1182
                        # If you end here it means the current data variable is
1183
                        # not under the allowed_data_vars OR not under the
1184
                        # defined time dimensions. If it is a new data variable
1185
                        # add it to allowed_data_vars above (also add it to
1186
                        # test_compile_run_output). If it is a new dimension
1187
                        # handle it in the if/elif statements.
1188
                        raise InvalidParamsError(f'The data variable "{vn}" '
×
1189
                                                 'is not known. Is it new or '
1190
                                                 'is it a new dimension? '
1191
                                                 'Check comment above this '
1192
                                                 'raise for more info!')
1193

1194
            # If this worked, keep it as template
1195
            ppath = fp
4✔
1196
        except FileNotFoundError:
×
1197
            pass
×
1198

1199
    if 'time' not in time_info:
4✔
1200
        raise RuntimeError('Found no valid glaciers!')
×
1201

1202
    # OK found it, open it and prepare the output
1203
    with xr.open_dataset(ppath) as ds_diag:
4✔
1204

1205
        # Prepare output
1206
        ds = xr.Dataset()
4✔
1207

1208
        # Global attributes
1209
        ds.attrs['description'] = 'OGGM model output'
4✔
1210
        ds.attrs['oggm_version'] = __version__
4✔
1211
        ds.attrs['calendar'] = '365-day no leap'
4✔
1212
        ds.attrs['creation_date'] = strftime("%Y-%m-%d %H:%M:%S", gmtime())
4✔
1213

1214
        # Copy coordinates
1215
        time = time_info['time']
4✔
1216
        ds.coords['time'] = ('time', time)
4✔
1217
        ds['time'].attrs['description'] = 'Floating year'
4✔
1218
        # New coord
1219
        ds.coords['rgi_id'] = ('rgi_id', rgi_ids)
4✔
1220
        ds['rgi_id'].attrs['description'] = 'RGI glacier identifier'
4✔
1221
        # This is just taken from there
1222
        for cn in ['hydro_year', 'hydro_month',
4✔
1223
                   'calendar_year', 'calendar_month']:
1224
            ds.coords[cn] = ('time', time_info[cn])
4✔
1225
            ds[cn].attrs['description'] = ds_diag[cn].attrs['description']
4✔
1226

1227
        # Prepare the 2D variables
1228
        shape = (len(time), len(rgi_ids))
4✔
1229
        out_2d = dict()
4✔
1230
        for vn in data_vars:
4✔
1231
            if name_2d_dim in data_vars[vn]['dims']:
4✔
1232
                continue
2✔
1233
            var = dict()
4✔
1234
            var['data'] = np.full(shape, np.nan)
4✔
1235
            var['attrs'] = data_vars[vn]['attrs']
4✔
1236
            out_2d[vn] = var
4✔
1237

1238
        # 1D Variables
1239
        out_1d = dict()
4✔
1240
        for vn, attrs in [('water_level', {'description': 'Calving water level',
4✔
1241
                                           'units': 'm'}),
1242
                          ('glen_a', {'description': 'Simulation Glen A',
1243
                                      'units': ''}),
1244
                          ('fs', {'description': 'Simulation sliding parameter',
1245
                                  'units': ''}),
1246
                          ]:
1247
            var = dict()
4✔
1248
            var['data'] = np.full(len(rgi_ids), np.nan)
4✔
1249
            var['attrs'] = attrs
4✔
1250
            out_1d[vn] = var
4✔
1251

1252
        # Maybe 3D?
1253
        out_3d = dict()
4✔
1254
        if contains_3d_data:
4✔
1255
            # We have some 3d vars
1256
            month_2d = ds_diag[name_2d_dim]
2✔
1257
            ds.coords[name_2d_dim] = (name_2d_dim, month_2d.data)
2✔
1258
            cn = f'calendar_{name_2d_dim}'
2✔
1259
            ds.coords[cn] = (name_2d_dim, ds_diag[cn].values)
2✔
1260

1261
            shape = (len(time), len(month_2d), len(rgi_ids))
2✔
1262
            for vn in data_vars:
2✔
1263
                if name_2d_dim not in data_vars[vn]['dims']:
2✔
1264
                    continue
2✔
1265
                var = dict()
2✔
1266
                var['data'] = np.full(shape, np.nan)
2✔
1267
                var['attrs'] = data_vars[vn]['attrs']
2✔
1268
                out_3d[vn] = var
2✔
1269

1270
    # Read out
1271
    for i, gdir in enumerate(gdirs):
4✔
1272
        try:
4✔
1273
            ppath = gdir.get_filepath('model_diagnostics',
4✔
1274
                                      filesuffix=input_filesuffix)
1275
            with ncDataset(ppath) as ds_diag:
4✔
1276
                it = ds_diag.variables['time'][:]
4✔
1277
                a = np.nonzero(time == it[0])[0][0]
4✔
1278
                b = np.nonzero(time == it[-1])[0][0] + 1
4✔
1279
                for vn, var in out_2d.items():
4✔
1280
                    # try statement if some data variables not in all files
1281
                    try:
4✔
1282
                        var['data'][a:b, i] = ds_diag.variables[vn][:]
4✔
1283
                    except KeyError:
1✔
1284
                        pass
1✔
1285
                for vn, var in out_3d.items():
4✔
1286
                    # try statement if some data variables not in all files
1287
                    try:
2✔
1288
                        var['data'][a:b, :, i] = ds_diag.variables[vn][:]
2✔
1289
                    except KeyError:
1✔
1290
                        pass
1✔
1291
                for vn, var in out_1d.items():
4✔
1292
                    var['data'][i] = ds_diag.getncattr(vn)
4✔
1293
        except FileNotFoundError:
×
1294
            pass
×
1295

1296
    # To xarray
1297
    for vn, var in out_2d.items():
4✔
1298
        # Backwards compatibility - to remove one day...
1299
        for r in ['_m3', '_m2', '_myr', '_m']:
4✔
1300
            # Order matters
1301
            vn = regexp.sub(r + '$', '', vn)
4✔
1302
        ds[vn] = (('time', 'rgi_id'), var['data'])
4✔
1303
        ds[vn].attrs = var['attrs']
4✔
1304
    for vn, var in out_3d.items():
4✔
1305
        ds[vn] = (('time', name_2d_dim, 'rgi_id'), var['data'])
2✔
1306
        ds[vn].attrs = var['attrs']
2✔
1307
    for vn, var in out_1d.items():
4✔
1308
        ds[vn] = (('rgi_id', ), var['data'])
4✔
1309
        ds[vn].attrs = var['attrs']
4✔
1310

1311
    # To file?
1312
    if path:
4✔
1313
        enc_var = {'dtype': 'float32'}
4✔
1314
        if use_compression:
4✔
1315
            enc_var['complevel'] = 5
4✔
1316
            enc_var['zlib'] = True
4✔
1317
        encoding = {v: enc_var for v in ds.data_vars}
4✔
1318
        ds.to_netcdf(path, encoding=encoding)
4✔
1319

1320
    return ds
4✔
1321

1322

1323
@global_task(log)
9✔
1324
@compile_to_netcdf(log)
9✔
1325
def compile_climate_input(gdirs, path=True, filename='climate_historical',
9✔
1326
                          input_filesuffix='', use_compression=True):
1327
    """Merge the climate input files in the glacier directories into one file.
1328

1329
    Parameters
1330
    ----------
1331
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1332
        the glacier directories to process
1333
    path : str
1334
        where to store (default is on the working dir).
1335
        Set to `False` to disable disk storage.
1336
    filename : str
1337
        BASENAME of the climate input files
1338
    input_filesuffix : str
1339
        the filesuffix of the files to be compiled
1340
    use_compression : bool
1341
        use zlib compression on the output netCDF files
1342

1343
    Returns
1344
    -------
1345
    ds : :py:class:`xarray.Dataset`
1346
        compiled climate data
1347
    """
1348

1349
    # Get the dimensions of all this
1350
    rgi_ids = [gd.rgi_id for gd in gdirs]
1✔
1351

1352
    # The first gdir might have blown up, try some others
1353
    i = 0
1✔
1354
    while True:
1✔
1355
        if i >= len(gdirs):
1✔
1356
            raise RuntimeError('Found no valid glaciers!')
×
1357
        try:
1✔
1358
            pgdir = gdirs[i]
1✔
1359
            ppath = pgdir.get_filepath(filename=filename,
1✔
1360
                                       filesuffix=input_filesuffix)
1361
            with xr.open_dataset(ppath) as ds_clim:
1✔
1362
                ds_clim.time.values
1✔
1363
            # If this worked, we have a valid gdir
1364
            break
1✔
1365
        except BaseException:
×
1366
            i += 1
×
1367

1368
    with xr.open_dataset(ppath) as ds_clim:
1✔
1369
        cyrs = ds_clim['time.year']
1✔
1370
        cmonths = ds_clim['time.month']
1✔
1371
        sm = cfg.PARAMS['hydro_month_' + pgdir.hemisphere]
1✔
1372
        hyrs, hmonths = calendardate_to_hydrodate(cyrs, cmonths, start_month=sm)
1✔
1373
        time = date_to_floatyear(cyrs, cmonths)
1✔
1374

1375
    # Prepare output
1376
    ds = xr.Dataset()
1✔
1377

1378
    # Global attributes
1379
    ds.attrs['description'] = 'OGGM model output'
1✔
1380
    ds.attrs['oggm_version'] = __version__
1✔
1381
    ds.attrs['calendar'] = '365-day no leap'
1✔
1382
    ds.attrs['creation_date'] = strftime("%Y-%m-%d %H:%M:%S", gmtime())
1✔
1383

1384
    # Coordinates
1385
    ds.coords['time'] = ('time', time)
1✔
1386
    ds.coords['rgi_id'] = ('rgi_id', rgi_ids)
1✔
1387
    ds.coords['calendar_year'] = ('time', cyrs.data)
1✔
1388
    ds.coords['calendar_month'] = ('time', cmonths.data)
1✔
1389
    ds.coords['hydro_year'] = ('time', hyrs)
1✔
1390
    ds.coords['hydro_month'] = ('time', hmonths)
1✔
1391
    ds['time'].attrs['description'] = 'Floating year'
1✔
1392
    ds['rgi_id'].attrs['description'] = 'RGI glacier identifier'
1✔
1393
    ds['hydro_year'].attrs['description'] = 'Hydrological year'
1✔
1394
    ds['hydro_month'].attrs['description'] = 'Hydrological month'
1✔
1395
    ds['calendar_year'].attrs['description'] = 'Calendar year'
1✔
1396
    ds['calendar_month'].attrs['description'] = 'Calendar month'
1✔
1397

1398
    shape = (len(time), len(rgi_ids))
1✔
1399
    temp = np.zeros(shape) * np.NaN
1✔
1400
    prcp = np.zeros(shape) * np.NaN
1✔
1401

1402
    ref_hgt = np.zeros(len(rgi_ids)) * np.NaN
1✔
1403
    ref_pix_lon = np.zeros(len(rgi_ids)) * np.NaN
1✔
1404
    ref_pix_lat = np.zeros(len(rgi_ids)) * np.NaN
1✔
1405

1406
    for i, gdir in enumerate(gdirs):
1✔
1407
        try:
1✔
1408
            ppath = gdir.get_filepath(filename=filename,
1✔
1409
                                      filesuffix=input_filesuffix)
1410
            with xr.open_dataset(ppath) as ds_clim:
1✔
1411
                prcp[:, i] = ds_clim.prcp.values
1✔
1412
                temp[:, i] = ds_clim.temp.values
1✔
1413
                ref_hgt[i] = ds_clim.ref_hgt
1✔
1414
                ref_pix_lon[i] = ds_clim.ref_pix_lon
1✔
1415
                ref_pix_lat[i] = ds_clim.ref_pix_lat
1✔
1416
        except BaseException:
×
1417
            pass
×
1418

1419
    ds['temp'] = (('time', 'rgi_id'), temp)
1✔
1420
    ds['temp'].attrs['units'] = 'DegC'
1✔
1421
    ds['temp'].attrs['description'] = '2m Temperature at height ref_hgt'
1✔
1422
    ds['prcp'] = (('time', 'rgi_id'), prcp)
1✔
1423
    ds['prcp'].attrs['units'] = 'kg m-2'
1✔
1424
    ds['prcp'].attrs['description'] = 'total monthly precipitation amount'
1✔
1425
    ds['ref_hgt'] = ('rgi_id', ref_hgt)
1✔
1426
    ds['ref_hgt'].attrs['units'] = 'm'
1✔
1427
    ds['ref_hgt'].attrs['description'] = 'reference height'
1✔
1428
    ds['ref_pix_lon'] = ('rgi_id', ref_pix_lon)
1✔
1429
    ds['ref_pix_lon'].attrs['description'] = 'longitude'
1✔
1430
    ds['ref_pix_lat'] = ('rgi_id', ref_pix_lat)
1✔
1431
    ds['ref_pix_lat'].attrs['description'] = 'latitude'
1✔
1432

1433
    if path:
1✔
1434
        enc_var = {'dtype': 'float32'}
1✔
1435
        if use_compression:
1✔
1436
            enc_var['complevel'] = 5
1✔
1437
            enc_var['zlib'] = True
1✔
1438
        vars = ['temp', 'prcp']
1✔
1439
        encoding = {v: enc_var for v in vars}
1✔
1440
        ds.to_netcdf(path, encoding=encoding)
1✔
1441
    return ds
1✔
1442

1443

1444
@global_task(log)
9✔
1445
def compile_task_log(gdirs, task_names=[], filesuffix='', path=True,
9✔
1446
                     append=True):
1447
    """Gathers the log output for the selected task(s)
1448

1449
    Parameters
1450
    ----------
1451
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1452
        the glacier directories to process
1453
    task_names : list of str
1454
        The tasks to check for
1455
    filesuffix : str
1456
        add suffix to output file
1457
    path:
1458
        Set to `True` in order  to store the info in the working directory
1459
        Set to a path to store the file to your chosen location
1460
        Set to `False` to omit disk storage
1461
    append:
1462
        If a task log file already exists in the working directory, the new
1463
        logs will be added to the existing file
1464

1465
    Returns
1466
    -------
1467
    out : :py:class:`pandas.DataFrame`
1468
        log output
1469
    """
1470

1471
    out_df = []
3✔
1472
    for gdir in gdirs:
3✔
1473
        d = OrderedDict()
3✔
1474
        d['rgi_id'] = gdir.rgi_id
3✔
1475
        for task_name in task_names:
3✔
1476
            ts = gdir.get_task_status(task_name)
3✔
1477
            if ts is None:
3✔
1478
                ts = ''
1✔
1479
            d[task_name] = ts.replace(',', ' ')
3✔
1480
        out_df.append(d)
3✔
1481

1482
    out = pd.DataFrame(out_df).set_index('rgi_id')
3✔
1483
    if path:
3✔
1484
        if path is True:
3✔
1485
            path = os.path.join(cfg.PATHS['working_dir'],
3✔
1486
                                'task_log' + filesuffix + '.csv')
1487
        if os.path.exists(path) and append:
3✔
1488
            odf = pd.read_csv(path, index_col=0)
1✔
1489
            out = odf.join(out, rsuffix='_n')
1✔
1490
        out.to_csv(path)
3✔
1491
    return out
3✔
1492

1493

1494
@global_task(log)
9✔
1495
def compile_task_time(gdirs, task_names=[], filesuffix='', path=True,
9✔
1496
                      append=True):
1497
    """Gathers the time needed for the selected task(s) to run
1498

1499
    Parameters
1500
    ----------
1501
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1502
        the glacier directories to process
1503
    task_names : list of str
1504
        The tasks to check for
1505
    filesuffix : str
1506
        add suffix to output file
1507
    path:
1508
        Set to `True` in order  to store the info in the working directory
1509
        Set to a path to store the file to your chosen location
1510
        Set to `False` to omit disk storage
1511
    append:
1512
        If a task log file already exists in the working directory, the new
1513
        logs will be added to the existing file
1514

1515
    Returns
1516
    -------
1517
    out : :py:class:`pandas.DataFrame`
1518
        log output
1519
    """
1520

1521
    out_df = []
1✔
1522
    for gdir in gdirs:
1✔
1523
        d = OrderedDict()
1✔
1524
        d['rgi_id'] = gdir.rgi_id
1✔
1525
        for task_name in task_names:
1✔
1526
            d[task_name] = gdir.get_task_time(task_name)
1✔
1527
        out_df.append(d)
1✔
1528

1529
    out = pd.DataFrame(out_df).set_index('rgi_id')
1✔
1530
    if path:
1✔
1531
        if path is True:
1✔
1532
            path = os.path.join(cfg.PATHS['working_dir'],
1✔
1533
                                'task_time' + filesuffix + '.csv')
1534
        if os.path.exists(path) and append:
1✔
1535
            odf = pd.read_csv(path, index_col=0)
1✔
1536
            out = odf.join(out, rsuffix='_n')
1✔
1537
        out.to_csv(path)
1✔
1538
    return out
1✔
1539

1540

1541
@entity_task(log)
9✔
1542
def glacier_statistics(gdir, inversion_only=False, apply_func=None):
9✔
1543
    """Gather as much statistics as possible about this glacier.
1544

1545
    It can be used to do result diagnostics and other stuffs. If the data
1546
    necessary for a statistic is not available (e.g.: flowlines length) it
1547
    will simply be ignored.
1548

1549
    Parameters
1550
    ----------
1551
    inversion_only : bool
1552
        if one wants to summarize the inversion output only (including calving)
1553
    apply_func : function
1554
        if one wants to summarize further information about a glacier, set
1555
        this kwarg to a function that accepts a glacier directory as first
1556
        positional argument, and the directory to fill in with data as
1557
        second argument. The directory should only store scalar values (strings,
1558
        float, int)
1559
    """
1560

1561
    d = OrderedDict()
4✔
1562

1563
    # Easy stats - this should always be possible
1564
    d['rgi_id'] = gdir.rgi_id
4✔
1565
    d['rgi_region'] = gdir.rgi_region
4✔
1566
    d['rgi_subregion'] = gdir.rgi_subregion
4✔
1567
    d['name'] = gdir.name
4✔
1568
    d['cenlon'] = gdir.cenlon
4✔
1569
    d['cenlat'] = gdir.cenlat
4✔
1570
    d['rgi_area_km2'] = gdir.rgi_area_km2
4✔
1571
    d['rgi_year'] = gdir.rgi_date
4✔
1572
    d['glacier_type'] = gdir.glacier_type
4✔
1573
    d['terminus_type'] = gdir.terminus_type
4✔
1574
    d['is_tidewater'] = gdir.is_tidewater
4✔
1575
    d['status'] = gdir.status
4✔
1576

1577
    # The rest is less certain. We put these in a try block and see
1578
    # We're good with any error - we store the dict anyway below
1579
    # TODO: should be done with more preselected errors
1580
    with warnings.catch_warnings():
4✔
1581
        warnings.filterwarnings("ignore", category=RuntimeWarning)
4✔
1582

1583
        try:
4✔
1584
            # Inversion
1585
            if gdir.has_file('inversion_output'):
4✔
1586
                vol = []
3✔
1587
                vol_bsl = []
3✔
1588
                vol_bwl = []
3✔
1589
                cl = gdir.read_pickle('inversion_output')
3✔
1590
                for c in cl:
3✔
1591
                    vol.extend(c['volume'])
3✔
1592
                    vol_bsl.extend(c.get('volume_bsl', [0]))
3✔
1593
                    vol_bwl.extend(c.get('volume_bwl', [0]))
3✔
1594
                d['inv_volume_km3'] = np.nansum(vol) * 1e-9
3✔
1595
                area = gdir.rgi_area_km2
3✔
1596
                d['vas_volume_km3'] = 0.034 * (area ** 1.375)
3✔
1597
                # BSL / BWL
1598
                d['inv_volume_bsl_km3'] = np.nansum(vol_bsl) * 1e-9
3✔
1599
                d['inv_volume_bwl_km3'] = np.nansum(vol_bwl) * 1e-9
3✔
1600
        except BaseException:
×
1601
            pass
×
1602

1603
        try:
4✔
1604
            # Diagnostics
1605
            diags = gdir.get_diagnostics()
4✔
1606
            for k, v in diags.items():
4✔
1607
                d[k] = v
4✔
1608
        except BaseException:
×
1609
            pass
×
1610

1611
        if inversion_only:
4✔
1612
            return d
×
1613

1614
        try:
4✔
1615
            # Error log
1616
            errlog = gdir.get_error_log()
4✔
1617
            if errlog is not None:
4✔
1618
                d['error_task'] = errlog.split(';')[-2]
1✔
1619
                d['error_msg'] = errlog.split(';')[-1]
1✔
1620
            else:
1621
                d['error_task'] = None
4✔
1622
                d['error_msg'] = None
4✔
1623
        except BaseException:
×
1624
            pass
×
1625

1626
        try:
4✔
1627
            # Masks related stuff
1628
            fpath = gdir.get_filepath('gridded_data')
4✔
1629
            with ncDataset(fpath) as nc:
4✔
1630
                mask = nc.variables['glacier_mask'][:] == 1
4✔
1631
                topo = nc.variables['topo'][:][mask]
4✔
1632
            d['dem_mean_elev'] = np.mean(topo)
4✔
1633
            d['dem_med_elev'] = np.median(topo)
4✔
1634
            d['dem_min_elev'] = np.min(topo)
4✔
1635
            d['dem_max_elev'] = np.max(topo)
4✔
1636
        except BaseException:
2✔
1637
            pass
2✔
1638

1639
        try:
4✔
1640
            # Ext related stuff
1641
            fpath = gdir.get_filepath('gridded_data')
4✔
1642
            with ncDataset(fpath) as nc:
4✔
1643
                ext = nc.variables['glacier_ext'][:] == 1
4✔
1644
                mask = nc.variables['glacier_mask'][:] == 1
4✔
1645
                topo = nc.variables['topo'][:]
4✔
1646
            d['dem_max_elev_on_ext'] = np.max(topo[ext])
4✔
1647
            d['dem_min_elev_on_ext'] = np.min(topo[ext])
4✔
1648
            a = np.sum(mask & (topo > d['dem_max_elev_on_ext']))
4✔
1649
            d['dem_perc_area_above_max_elev_on_ext'] = a / np.sum(mask)
4✔
1650
            # Terminus loc
1651
            j, i = np.nonzero((topo[ext].min() == topo) & ext)
4✔
1652
            lon, lat = gdir.grid.ij_to_crs(i[0], j[0], crs=salem.wgs84)
4✔
1653
            d['terminus_lon'] = lon
4✔
1654
            d['terminus_lat'] = lat
4✔
1655
        except BaseException:
2✔
1656
            pass
2✔
1657

1658
        try:
4✔
1659
            # Centerlines
1660
            cls = gdir.read_pickle('centerlines')
4✔
1661
            longest = 0.
4✔
1662
            for cl in cls:
4✔
1663
                longest = np.max([longest, cl.dis_on_line[-1]])
4✔
1664
            d['n_centerlines'] = len(cls)
4✔
1665
            d['longest_centerline_km'] = longest * gdir.grid.dx / 1000.
4✔
1666
        except BaseException:
2✔
1667
            pass
2✔
1668

1669
        try:
4✔
1670
            # Flowline related stuff
1671
            h = np.array([])
4✔
1672
            widths = np.array([])
4✔
1673
            slope = np.array([])
4✔
1674
            fls = gdir.read_pickle('inversion_flowlines')
4✔
1675
            dx = fls[0].dx * gdir.grid.dx
4✔
1676
            for fl in fls:
4✔
1677
                hgt = fl.surface_h
4✔
1678
                h = np.append(h, hgt)
4✔
1679
                widths = np.append(widths, fl.widths * gdir.grid.dx)
4✔
1680
                slope = np.append(slope, np.arctan(-np.gradient(hgt, dx)))
3✔
1681
                length = len(hgt) * dx
3✔
1682
            d['main_flowline_length'] = length
3✔
1683
            d['inv_flowline_glacier_area'] = np.sum(widths * dx)
3✔
1684
            d['flowline_mean_elev'] = np.average(h, weights=widths)
3✔
1685
            d['flowline_max_elev'] = np.max(h)
3✔
1686
            d['flowline_min_elev'] = np.min(h)
3✔
1687
            d['flowline_avg_slope'] = np.mean(slope)
3✔
1688
            d['flowline_avg_width'] = np.mean(widths)
3✔
1689
            d['flowline_last_width'] = fls[-1].widths[-1] * gdir.grid.dx
3✔
1690
            d['flowline_last_5_widths'] = np.mean(fls[-1].widths[-5:] *
3✔
1691
                                                  gdir.grid.dx)
1692
        except BaseException:
2✔
1693
            pass
2✔
1694

1695
        try:
4✔
1696
            # climate
1697
            info = gdir.get_climate_info()
4✔
1698
            for k, v in info.items():
4✔
1699
                d[k] = v
4✔
1700
        except BaseException:
×
1701
            pass
×
1702

1703
        try:
4✔
1704
            # MB calib
1705
            mb_calib = gdir.read_json('mb_calib')
4✔
1706
            for k, v in mb_calib.items():
3✔
1707
                if np.isscalar(v):
3✔
1708
                    d[k] = v
3✔
1709
                else:
1710
                    for k2, v2 in v.items():
3✔
1711
                        d[k2] = v2
2✔
1712
        except BaseException:
3✔
1713
            pass
3✔
1714

1715
        if apply_func:
4✔
1716
            # User defined statistics
1717
            try:
1✔
1718
                apply_func(gdir, d)
1✔
1719
            except BaseException:
×
1720
                pass
×
1721

1722
    return d
4✔
1723

1724

1725
@global_task(log)
9✔
1726
def compile_glacier_statistics(gdirs, filesuffix='', path=True,
9✔
1727
                               inversion_only=False, apply_func=None):
1728
    """Gather as much statistics as possible about a list of glaciers.
1729

1730
    It can be used to do result diagnostics and other stuffs. If the data
1731
    necessary for a statistic is not available (e.g.: flowlines length) it
1732
    will simply be ignored.
1733

1734
    Parameters
1735
    ----------
1736
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1737
        the glacier directories to process
1738
    filesuffix : str
1739
        add suffix to output file
1740
    path : str, bool
1741
        Set to "True" in order  to store the info in the working directory
1742
        Set to a path to store the file to your chosen location
1743
    inversion_only : bool
1744
        if one wants to summarize the inversion output only (including calving)
1745
    apply_func : function
1746
        if one wants to summarize further information about a glacier, set
1747
        this kwarg to a function that accepts a glacier directory as first
1748
        positional argument, and the directory to fill in with data as
1749
        second argument. The directory should only store scalar values (strings,
1750
        float, int).
1751
        !Careful! For multiprocessing, the function cannot be located at the
1752
        top level, i.e. you may need to import it from a module for this to work,
1753
        or from a dummy class (https://stackoverflow.com/questions/8804830)
1754
    """
1755
    from oggm.workflow import execute_entity_task
6✔
1756

1757
    out_df = execute_entity_task(glacier_statistics, gdirs,
6✔
1758
                                 apply_func=apply_func,
1759
                                 inversion_only=inversion_only)
1760

1761
    out = pd.DataFrame(out_df).set_index('rgi_id')
6✔
1762

1763
    if path:
6✔
1764
        if path is True:
6✔
1765
            out.to_csv(os.path.join(cfg.PATHS['working_dir'],
5✔
1766
                                    ('glacier_statistics' +
1767
                                     filesuffix + '.csv')))
1768
        else:
1769
            out.to_csv(path)
2✔
1770
    return out
6✔
1771

1772

1773
@global_task(log)
9✔
1774
def compile_fixed_geometry_mass_balance(gdirs, filesuffix='',
9✔
1775
                                        path=True, csv=False,
1776
                                        use_inversion_flowlines=True,
1777
                                        ys=None, ye=None, years=None,
1778
                                        climate_filename='climate_historical',
1779
                                        climate_input_filesuffix='',
1780
                                        temperature_bias=None,
1781
                                        precipitation_factor=None):
1782

1783
    """Compiles a table of specific mass balance timeseries for all glaciers.
1784

1785
    The file is stored in a hdf file (not csv) per default. Use pd.read_hdf
1786
    to open it.
1787

1788
    Parameters
1789
    ----------
1790
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1791
        the glacier directories to process
1792
    filesuffix : str
1793
        add suffix to output file
1794
    path : str, bool
1795
        Set to "True" in order  to store the info in the working directory
1796
        Set to a path to store the file to your chosen location (file
1797
        extension matters)
1798
    csv : bool
1799
        Set to store the data in csv instead of hdf.
1800
    use_inversion_flowlines : bool
1801
        whether to use the inversion flowlines or the model flowlines
1802
    ys : int
1803
        start year of the model run (default: from the climate file)
1804
        date)
1805
    ye : int
1806
        end year of the model run (default: from the climate file)
1807
    years : array of ints
1808
        override ys and ye with the years of your choice
1809
    climate_filename : str
1810
        name of the climate file, e.g. 'climate_historical' (default) or
1811
        'gcm_data'
1812
    climate_input_filesuffix: str
1813
        filesuffix for the input climate file
1814
    temperature_bias : float
1815
        add a bias to the temperature timeseries
1816
    precipitation_factor: float
1817
        multiply a factor to the precipitation time series
1818
        default is None and means that the precipitation factor from the
1819
        calibration is applied which is cfg.PARAMS['prcp_fac']
1820
    """
1821

1822
    from oggm.workflow import execute_entity_task
3✔
1823
    from oggm.core.massbalance import fixed_geometry_mass_balance
3✔
1824

1825
    out_df = execute_entity_task(fixed_geometry_mass_balance, gdirs,
3✔
1826
                                 use_inversion_flowlines=use_inversion_flowlines,
1827
                                 ys=ys, ye=ye, years=years, climate_filename=climate_filename,
1828
                                 climate_input_filesuffix=climate_input_filesuffix,
1829
                                 temperature_bias=temperature_bias,
1830
                                 precipitation_factor=precipitation_factor)
1831

1832
    for idx, s in enumerate(out_df):
3✔
1833
        if s is None:
3✔
1834
            out_df[idx] = pd.Series(np.NaN)
×
1835

1836
    out = pd.concat(out_df, axis=1, keys=[gd.rgi_id for gd in gdirs])
3✔
1837
    out = out.dropna(axis=0, how='all')
3✔
1838

1839
    if path:
3✔
1840
        if path is True:
3✔
1841
            fpath = os.path.join(cfg.PATHS['working_dir'],
1✔
1842
                                 'fixed_geometry_mass_balance' + filesuffix)
1843
            if csv:
1✔
1844
                out.to_csv(fpath + '.csv')
×
1845
            else:
1846
                out.to_hdf(fpath + '.hdf', key='df')
1✔
1847
        else:
1848
            ext = os.path.splitext(path)[-1]
2✔
1849
            if ext.lower() == '.csv':
2✔
1850
                out.to_csv(path)
2✔
1851
            elif ext.lower() == '.hdf':
×
1852
                out.to_hdf(path, key='df')
×
1853
    return out
3✔
1854

1855

1856
@global_task(log)
9✔
1857
def compile_ela(gdirs, filesuffix='', path=True, csv=False, ys=None, ye=None,
9✔
1858
                years=None, climate_filename='climate_historical', temperature_bias=None,
1859
                precipitation_factor=None, climate_input_filesuffix='',
1860
                mb_model_class=None):
1861
    """Compiles a table of ELA timeseries for all glaciers for a given years,
1862
    using the mb_model_class (default MonthlyTIModel).
1863

1864
    The file is stored in a hdf file (not csv) per default. Use pd.read_hdf
1865
    to open it.
1866

1867
    Parameters
1868
    ----------
1869
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1870
        the glacier directories to process
1871
    filesuffix : str
1872
        add suffix to output file
1873
    path : str, bool
1874
        Set to "True" in order  to store the info in the working directory
1875
        Set to a path to store the file to your chosen location (file
1876
        extension matters)
1877
    csv: bool
1878
        Set to store the data in csv instead of hdf.
1879
    ys : int
1880
        start year
1881
    ye : int
1882
        end year
1883
    years : array of ints
1884
        override ys and ye with the years of your choice
1885
    climate_filename : str
1886
        name of the climate file, e.g. 'climate_historical' (default) or
1887
        'gcm_data'
1888
    climate_input_filesuffix : str
1889
        filesuffix for the input climate file
1890
    temperature_bias : float
1891
        add a bias to the temperature timeseries
1892
    precipitation_factor: float
1893
        multiply a factor to the precipitation time series
1894
        default is None and means that the precipitation factor from the
1895
        calibration is applied which is cfg.PARAMS['prcp_fac']
1896
    mb_model_class : MassBalanceModel class
1897
        the MassBalanceModel class to use, default is MonthlyTIModel
1898
    """
1899
    from oggm.workflow import execute_entity_task
1✔
1900
    from oggm.core.massbalance import compute_ela, MonthlyTIModel
1✔
1901

1902
    if mb_model_class is None:
1✔
1903
        mb_model_class = MonthlyTIModel
1✔
1904

1905
    out_df = execute_entity_task(compute_ela, gdirs, ys=ys, ye=ye, years=years,
1✔
1906
                                 climate_filename=climate_filename,
1907
                                 climate_input_filesuffix=climate_input_filesuffix,
1908
                                 temperature_bias=temperature_bias,
1909
                                 precipitation_factor=precipitation_factor,
1910
                                 mb_model_class=mb_model_class)
1911

1912
    for idx, s in enumerate(out_df):
1✔
1913
        if s is None:
1✔
1914
            out_df[idx] = pd.Series(np.NaN)
×
1915

1916
    out = pd.concat(out_df, axis=1, keys=[gd.rgi_id for gd in gdirs])
1✔
1917
    out = out.dropna(axis=0, how='all')
1✔
1918

1919
    if path:
1✔
1920
        if path is True:
1✔
1921
            fpath = os.path.join(cfg.PATHS['working_dir'],
1✔
1922
                                 'ELA' + filesuffix)
1923
            if csv:
1✔
1924
                out.to_csv(fpath + '.csv')
1✔
1925
            else:
1926
                out.to_hdf(fpath + '.hdf', key='df')
1✔
1927
        else:
1928
            ext = os.path.splitext(path)[-1]
×
1929
            if ext.lower() == '.csv':
×
1930
                out.to_csv(path)
×
1931
            elif ext.lower() == '.hdf':
×
1932
                out.to_hdf(path, key='df')
×
1933
    return out
1✔
1934

1935

1936
@entity_task(log)
9✔
1937
def climate_statistics(gdir, add_climate_period=1995, halfsize=15,
9✔
1938
                       input_filesuffix=''):
1939
    """Gather as much statistics as possible about this glacier.
1940

1941
    It can be used to do result diagnostics and other stuffs. If the data
1942
    necessary for a statistic is not available (e.g.: flowlines length) it
1943
    will simply be ignored.
1944

1945
    Important note: the climate is extracted from the mass-balance model and
1946
    is therefore "corrected" according to the mass-balance calibration scheme
1947
    (e.g. the precipitation factor and the temp bias correction). For more
1948
    flexible information about the raw climate data, use `compile_climate_input`
1949
    or `raw_climate_statistics`.
1950

1951
    Parameters
1952
    ----------
1953
    add_climate_period : int or list of ints
1954
        compile climate statistics for the halfsize*2 + 1 yrs period
1955
        around the selected date.
1956
    halfsize : int
1957
        the half size of the window
1958
    """
1959
    from oggm.core.massbalance import (ConstantMassBalance,
1✔
1960
                                       MultipleFlowlineMassBalance)
1961

1962
    d = OrderedDict()
1✔
1963

1964
    # Easy stats - this should always be possible
1965
    d['rgi_id'] = gdir.rgi_id
1✔
1966
    d['rgi_region'] = gdir.rgi_region
1✔
1967
    d['rgi_subregion'] = gdir.rgi_subregion
1✔
1968
    d['name'] = gdir.name
1✔
1969
    d['cenlon'] = gdir.cenlon
1✔
1970
    d['cenlat'] = gdir.cenlat
1✔
1971
    d['rgi_area_km2'] = gdir.rgi_area_km2
1✔
1972
    d['glacier_type'] = gdir.glacier_type
1✔
1973
    d['terminus_type'] = gdir.terminus_type
1✔
1974
    d['status'] = gdir.status
1✔
1975

1976
    # The rest is less certain
1977
    with warnings.catch_warnings():
1✔
1978
        warnings.filterwarnings("ignore", category=RuntimeWarning)
1✔
1979
        try:
1✔
1980
            # Flowline related stuff
1981
            h = np.array([])
1✔
1982
            widths = np.array([])
1✔
1983
            fls = gdir.read_pickle('inversion_flowlines')
1✔
1984
            dx = fls[0].dx * gdir.grid.dx
1✔
1985
            for fl in fls:
1✔
1986
                hgt = fl.surface_h
1✔
1987
                h = np.append(h, hgt)
1✔
1988
                widths = np.append(widths, fl.widths * dx)
1✔
1989
            d['flowline_mean_elev'] = np.average(h, weights=widths)
1✔
1990
            d['flowline_max_elev'] = np.max(h)
1✔
1991
            d['flowline_min_elev'] = np.min(h)
1✔
1992
        except BaseException:
1✔
1993
            pass
1✔
1994

1995
        # Climate and MB at specified dates
1996
        add_climate_period = tolist(add_climate_period)
1✔
1997
        for y0 in add_climate_period:
1✔
1998
            try:
1✔
1999
                fs = '{}-{}'.format(y0 - halfsize, y0 + halfsize)
1✔
2000
                mbcl = ConstantMassBalance
1✔
2001
                mbmod = MultipleFlowlineMassBalance(gdir, mb_model_class=mbcl,
1✔
2002
                                                    y0=y0, halfsize=halfsize,
2003
                                                    use_inversion_flowlines=True,
2004
                                                    input_filesuffix=input_filesuffix)
2005
                h, w, mbh = mbmod.get_annual_mb_on_flowlines()
1✔
2006
                mbh = mbh * cfg.SEC_IN_YEAR * cfg.PARAMS['ice_density']
1✔
2007
                pacc = np.where(mbh >= 0)
1✔
2008
                pab = np.where(mbh < 0)
1✔
2009
                d[fs + '_aar'] = np.sum(w[pacc]) / np.sum(w)
1✔
2010
                try:
1✔
2011
                    # Try to get the slope
2012
                    mb_slope, _, _, _, _ = stats.linregress(h[pab], mbh[pab])
1✔
2013
                    d[fs + '_mb_grad'] = mb_slope
1✔
2014
                except BaseException:
×
2015
                    # we don't mind if something goes wrong
2016
                    d[fs + '_mb_grad'] = np.NaN
×
2017
                d[fs + '_ela_h'] = mbmod.get_ela()
1✔
2018
                # Climate
2019
                t, tm, p, ps = mbmod.flowline_mb_models[0].get_annual_climate(
1✔
2020
                    [d[fs + '_ela_h'],
2021
                     d['flowline_mean_elev'],
2022
                     d['flowline_max_elev'],
2023
                     d['flowline_min_elev']])
2024
                for n, v in zip(['temp', 'tempmelt', 'prcpsol'], [t, tm, ps]):
1✔
2025
                    d[fs + '_avg_' + n + '_ela_h'] = v[0]
1✔
2026
                    d[fs + '_avg_' + n + '_mean_elev'] = v[1]
1✔
2027
                    d[fs + '_avg_' + n + '_max_elev'] = v[2]
1✔
2028
                    d[fs + '_avg_' + n + '_min_elev'] = v[3]
1✔
2029
                d[fs + '_avg_prcp'] = p[0]
1✔
2030
            except BaseException:
1✔
2031
                pass
1✔
2032

2033
    return d
1✔
2034

2035

2036
@entity_task(log)
9✔
2037
def raw_climate_statistics(gdir, add_climate_period=1995, halfsize=15,
9✔
2038
                           input_filesuffix=''):
2039
    """Gather as much statistics as possible about this glacier.
2040

2041
    This is like "climate_statistics" but without relying on the
2042
    mass-balance model, i.e. closer to the actual data (uncorrected)
2043

2044
    Parameters
2045
    ----------
2046
    add_climate_period : int or list of ints
2047
        compile climate statistics for the 30 yrs period around the selected
2048
        date.
2049
    """
2050
    d = OrderedDict()
1✔
2051
    # Easy stats - this should always be possible
2052
    d['rgi_id'] = gdir.rgi_id
1✔
2053
    d['rgi_region'] = gdir.rgi_region
1✔
2054
    d['rgi_subregion'] = gdir.rgi_subregion
1✔
2055
    d['name'] = gdir.name
1✔
2056
    d['cenlon'] = gdir.cenlon
1✔
2057
    d['cenlat'] = gdir.cenlat
1✔
2058
    d['rgi_area_km2'] = gdir.rgi_area_km2
1✔
2059
    d['glacier_type'] = gdir.glacier_type
1✔
2060
    d['terminus_type'] = gdir.terminus_type
1✔
2061
    d['status'] = gdir.status
1✔
2062

2063
    # The rest is less certain
2064
    with warnings.catch_warnings():
1✔
2065
        warnings.filterwarnings("ignore", category=RuntimeWarning)
1✔
2066

2067
        # Climate and MB at specified dates
2068
        add_climate_period = tolist(add_climate_period)
1✔
2069

2070
        # get non-corrected winter daily mean prcp (kg m-2 day-1) for
2071
        # the chosen time period
2072
        for y0 in add_climate_period:
1✔
2073
            fs = '{}-{}'.format(y0 - halfsize, y0 + halfsize)
1✔
2074
            try:
1✔
2075
                # get non-corrected winter daily mean prcp (kg m-2 day-1)
2076
                # it is easier to get this directly from the raw climate files
2077
                fp = gdir.get_filepath('climate_historical',
1✔
2078
                                       filesuffix=input_filesuffix)
2079
                with xr.open_dataset(fp).prcp as ds_pr:
1✔
2080
                    # just select winter months
2081
                    if gdir.hemisphere == 'nh':
1✔
2082
                        m_winter = [10, 11, 12, 1, 2, 3, 4]
1✔
2083
                    else:
2084
                        m_winter = [4, 5, 6, 7, 8, 9, 10]
×
2085
                    ds_pr_winter = ds_pr.where(ds_pr['time.month'].isin(m_winter), drop=True)
1✔
2086
                    # select the correct year time period
2087
                    ds_pr_winter = ds_pr_winter.sel(time=slice(f'{fs[:4]}-01-01',
1✔
2088
                                                               f'{fs[-4:]}-12-01'))
2089
                    # check if we have the full time period
2090
                    n_years = int(fs[-4:]) - int(fs[:4]) + 1
1✔
2091
                    assert len(ds_pr_winter.time) == n_years * 7, 'chosen time-span invalid'
1✔
2092
                    ds_d_pr_winter_mean = (ds_pr_winter / ds_pr_winter.time.dt.daysinmonth).mean()
1✔
2093
                    d[f'{fs}_uncorrected_winter_daily_mean_prcp'] = ds_d_pr_winter_mean.values
1✔
2094
            except BaseException:
1✔
2095
                pass
1✔
2096
    return d
1✔
2097

2098

2099
@global_task(log)
9✔
2100
def compile_climate_statistics(gdirs, filesuffix='', path=True,
9✔
2101
                               add_climate_period=1995,
2102
                               halfsize=15,
2103
                               add_raw_climate_statistics=False,
2104
                               input_filesuffix=''):
2105
    """Gather as much statistics as possible about a list of glaciers.
2106

2107
    It can be used to do result diagnostics and other stuffs. If the data
2108
    necessary for a statistic is not available (e.g.: flowlines length) it
2109
    will simply be ignored.
2110

2111
    Parameters
2112
    ----------
2113
    gdirs: the list of GlacierDir to process.
2114
    filesuffix : str
2115
        add suffix to output file
2116
    path : str, bool
2117
        Set to "True" in order  to store the info in the working directory
2118
        Set to a path to store the file to your chosen location
2119
    add_climate_period : int or list of ints
2120
        compile climate statistics for the 30 yrs period around the selected
2121
        date.
2122
    input_filesuffix : str
2123
        filesuffix of the used climate_historical file, default is no filesuffix
2124
    """
2125
    from oggm.workflow import execute_entity_task
3✔
2126

2127
    out_df = execute_entity_task(climate_statistics, gdirs,
3✔
2128
                                 add_climate_period=add_climate_period,
2129
                                 halfsize=halfsize,
2130
                                 input_filesuffix=input_filesuffix)
2131
    out = pd.DataFrame(out_df).set_index('rgi_id')
3✔
2132

2133
    if add_raw_climate_statistics:
3✔
2134
        out_df = execute_entity_task(raw_climate_statistics, gdirs,
1✔
2135
                                     add_climate_period=add_climate_period,
2136
                                     halfsize=halfsize,
2137
                                     input_filesuffix=input_filesuffix)
2138
        out = out.merge(pd.DataFrame(out_df).set_index('rgi_id'))
1✔
2139

2140
    if path:
3✔
2141
        if path is True:
3✔
2142
            out.to_csv(os.path.join(cfg.PATHS['working_dir'],
3✔
2143
                                    ('climate_statistics' +
2144
                                     filesuffix + '.csv')))
2145
        else:
2146
            out.to_csv(path)
1✔
2147
    return out
3✔
2148

2149

2150
def extend_past_climate_run(past_run_file=None,
9✔
2151
                            fixed_geometry_mb_file=None,
2152
                            glacier_statistics_file=None,
2153
                            path=False,
2154
                            use_compression=True):
2155
    """Utility function to extend past MB runs prior to the RGI date.
2156

2157
    We use a fixed geometry (and a fixed calving rate) for all dates prior
2158
    to the RGI date.
2159

2160
    This is not parallelized, i.e a bit slow.
2161

2162
    Parameters
2163
    ----------
2164
    past_run_file : str
2165
        path to the historical run (nc)
2166
    fixed_geometry_mb_file : str
2167
        path to the MB file (csv)
2168
    glacier_statistics_file : str
2169
        path to the glacier stats file (csv)
2170
    path : str
2171
        where to store the file
2172
    use_compression : bool
2173

2174
    Returns
2175
    -------
2176
    the extended dataset
2177
    """
2178

2179
    log.workflow('Applying extend_past_climate_run on '
2✔
2180
                 '{}'.format(past_run_file))
2181

2182
    fixed_geometry_mb_df = pd.read_csv(fixed_geometry_mb_file, index_col=0,
2✔
2183
                                       low_memory=False)
2184
    stats_df = pd.read_csv(glacier_statistics_file, index_col=0,
2✔
2185
                           low_memory=False)
2186

2187
    with xr.open_dataset(past_run_file) as past_ds:
2✔
2188

2189
        # We need at least area and vol to do something
2190
        if 'volume' not in past_ds.data_vars or 'area' not in past_ds.data_vars:
2✔
2191
            raise InvalidWorkflowError('Need both volume and area to proceed')
×
2192

2193
        y0_run = int(past_ds.time[0])
2✔
2194
        y1_run = int(past_ds.time[-1])
2✔
2195
        if (y1_run - y0_run + 1) != len(past_ds.time):
2✔
2196
            raise NotImplementedError('Currently only supports annual outputs')
2197
        y0_clim = int(fixed_geometry_mb_df.index[0])
2✔
2198
        y1_clim = int(fixed_geometry_mb_df.index[-1])
2✔
2199
        if y0_clim > y0_run or y1_clim < y0_run:
2✔
2200
            raise InvalidWorkflowError('Dates do not match.')
×
2201
        if y1_clim != y1_run - 1:
2✔
2202
            raise InvalidWorkflowError('Dates do not match.')
×
2203
        if len(past_ds.rgi_id) != len(fixed_geometry_mb_df.columns):
2✔
2204
            # This might happen if we are testing on new directories
2205
            fixed_geometry_mb_df = fixed_geometry_mb_df[past_ds.rgi_id]
×
2206
        if len(past_ds.rgi_id) != len(stats_df.index):
2✔
2207
            stats_df = stats_df.loc[past_ds.rgi_id]
×
2208

2209
        # Make sure we agree on order
2210
        df = fixed_geometry_mb_df[past_ds.rgi_id]
2✔
2211

2212
        # Output data
2213
        years = np.arange(y0_clim, y1_run+1)
2✔
2214
        ods = past_ds.reindex({'time': years})
2✔
2215

2216
        # Time
2217
        ods['hydro_year'].data[:] = years
2✔
2218
        ods['hydro_month'].data[:] = ods['hydro_month'][-1]
2✔
2219
        ods['calendar_year'].data[:] = years
2✔
2220
        ods['calendar_month'].data[:] = ods['calendar_month'][-1]
2✔
2221
        for vn in ['hydro_year', 'hydro_month', 'calendar_year', 'calendar_month']:
2✔
2222
            ods[vn] = ods[vn].astype(int)
2✔
2223

2224
        # New vars
2225
        for vn in ['volume', 'volume_m3_min_h', 'volume_bsl', 'volume_bwl',
2✔
2226
                   'area', 'area_m2_min_h', 'length', 'calving', 'calving_rate']:
2227
            if vn in ods.data_vars:
2✔
2228
                ods[vn + '_ext'] = ods[vn].copy(deep=True)
2✔
2229
                ods[vn + '_ext'].attrs['description'] += ' (extended with MB data)'
2✔
2230

2231
        vn = 'volume_fixed_geom_ext'
2✔
2232
        ods[vn] = ods['volume'].copy(deep=True)
2✔
2233
        ods[vn].attrs['description'] += ' (replaced with fixed geom data)'
2✔
2234

2235
        rho = cfg.PARAMS['ice_density']
2✔
2236
        # Loop over the ids
2237
        for i, rid in enumerate(ods.rgi_id.data):
2✔
2238
            # Both do not need to be same length but they need to start same
2239
            mb_ts = df.values[:, i]
2✔
2240
            orig_vol_ts = ods.volume_ext.data[:, i]
2✔
2241
            if not (np.isfinite(mb_ts[-1]) and np.isfinite(orig_vol_ts[-1])):
2✔
2242
                # Not a valid glacier
2243
                continue
×
2244
            if np.isfinite(orig_vol_ts[0]):
2✔
2245
                # Nothing to extend, really
2246
                continue
×
2247

2248
            # First valid id
2249
            fid = np.argmax(np.isfinite(orig_vol_ts))
2✔
2250

2251
            # Add calving to the mix
2252
            try:
2✔
2253
                calv_flux = stats_df.loc[rid, 'calving_flux'] * 1e9
2✔
2254
                calv_rate = stats_df.loc[rid, 'calving_rate_myr']
×
2255
            except KeyError:
2✔
2256
                calv_flux = 0
2✔
2257
                calv_rate = 0
2✔
2258
            if not np.isfinite(calv_flux):
2✔
2259
                calv_flux = 0
×
2260
            if not np.isfinite(calv_rate):
2✔
2261
                calv_rate = 0
×
2262

2263
            # Fill area and length which stays constant before date
2264
            orig_area_ts = ods.area_ext.data[:, i]
2✔
2265
            orig_area_ts[:fid] = orig_area_ts[fid]
2✔
2266

2267
            # We convert SMB to volume
2268
            mb_vol_ts = (mb_ts / rho * orig_area_ts[fid] - calv_flux).cumsum()
2✔
2269
            calv_ts = (mb_ts * 0 + calv_flux).cumsum()
2✔
2270

2271
            # The -1 is because the volume change is known at end of year
2272
            mb_vol_ts = mb_vol_ts + orig_vol_ts[fid] - mb_vol_ts[fid-1]
2✔
2273

2274
            # Now back to netcdf
2275
            ods.volume_fixed_geom_ext.data[1:, i] = mb_vol_ts
2✔
2276
            ods.volume_ext.data[1:fid, i] = mb_vol_ts[0:fid-1]
2✔
2277
            ods.area_ext.data[:, i] = orig_area_ts
2✔
2278

2279
            # Optional variables
2280
            if 'length' in ods.data_vars:
2✔
2281
                orig_length_ts = ods.length_ext.data[:, i]
2✔
2282
                orig_length_ts[:fid] = orig_length_ts[fid]
2✔
2283
                ods.length_ext.data[:, i] = orig_length_ts
2✔
2284

2285
            if 'calving' in ods.data_vars:
2✔
2286
                orig_calv_ts = ods.calving_ext.data[:, i]
1✔
2287
                # The -1 is because the volume change is known at end of year
2288
                calv_ts = calv_ts + orig_calv_ts[fid] - calv_ts[fid-1]
1✔
2289
                ods.calving_ext.data[1:fid, i] = calv_ts[0:fid-1]
1✔
2290

2291
            if 'calving_rate' in ods.data_vars:
2✔
2292
                orig_calv_rate_ts = ods.calving_rate_ext.data[:, i]
1✔
2293
                # +1 because calving rate at year 0 is unknown from the dyns model
2294
                orig_calv_rate_ts[:fid+1] = calv_rate
1✔
2295
                ods.calving_rate_ext.data[:, i] = orig_calv_rate_ts
1✔
2296

2297
            # Extend vol bsl by assuming that % stays constant
2298
            if 'volume_bsl' in ods.data_vars:
2✔
2299
                bsl = ods.volume_bsl.data[fid, i] / ods.volume.data[fid, i]
1✔
2300
                ods.volume_bsl_ext.data[:fid, i] = bsl * ods.volume_ext.data[:fid, i]
1✔
2301
            if 'volume_bwl' in ods.data_vars:
2✔
2302
                bwl = ods.volume_bwl.data[fid, i] / ods.volume.data[fid, i]
1✔
2303
                ods.volume_bwl_ext.data[:fid, i] = bwl * ods.volume_ext.data[:fid, i]
1✔
2304

2305
        # Remove old vars
2306
        for vn in list(ods.data_vars):
2✔
2307
            if '_ext' not in vn and 'time' in ods[vn].dims:
2✔
2308
                del ods[vn]
2✔
2309

2310
        # Rename vars to their old names
2311
        ods = ods.rename(dict((o, o.replace('_ext', ''))
2✔
2312
                              for o in ods.data_vars))
2313

2314
        # Remove t0 (which is NaN)
2315
        ods = ods.isel(time=slice(1, None))
2✔
2316

2317
        # To file?
2318
        if path:
2✔
2319
            enc_var = {'dtype': 'float32'}
2✔
2320
            if use_compression:
2✔
2321
                enc_var['complevel'] = 5
2✔
2322
                enc_var['zlib'] = True
2✔
2323
            encoding = {v: enc_var for v in ods.data_vars}
2✔
2324
            ods.to_netcdf(path, encoding=encoding)
2✔
2325

2326
    return ods
2✔
2327

2328

2329
def idealized_gdir(surface_h, widths_m, map_dx, flowline_dx=1,
9✔
2330
                   base_dir=None, reset=False):
2331
    """Creates a glacier directory with flowline input data only.
2332

2333
    This is useful for testing, or for idealized experiments.
2334

2335
    Parameters
2336
    ----------
2337
    surface_h : ndarray
2338
        the surface elevation of the flowline's grid points (in m).
2339
    widths_m : ndarray
2340
        the widths of the flowline's grid points (in m).
2341
    map_dx : float
2342
        the grid spacing (in m)
2343
    flowline_dx : int
2344
        the flowline grid spacing (in units of map_dx, often it should be 1)
2345
    base_dir : str
2346
        path to the directory where to open the directory.
2347
        Defaults to `cfg.PATHS['working_dir'] + /per_glacier/`
2348
    reset : bool, default=False
2349
        empties the directory at construction
2350

2351
    Returns
2352
    -------
2353
    a GlacierDirectory instance
2354
    """
2355

2356
    from oggm.core.centerlines import Centerline
1✔
2357

2358
    # Area from geometry
2359
    area_km2 = np.sum(widths_m * map_dx * flowline_dx) * 1e-6
1✔
2360

2361
    # Dummy entity - should probably also change the geometry
2362
    entity = gpd.read_file(get_demo_file('Hintereisferner_RGI5.shp')).iloc[0]
1✔
2363
    entity.Area = area_km2
1✔
2364
    entity.CenLat = 0
1✔
2365
    entity.CenLon = 0
1✔
2366
    entity.Name = ''
1✔
2367
    entity.RGIId = 'RGI50-00.00000'
1✔
2368
    entity.O1Region = '00'
1✔
2369
    entity.O2Region = '0'
1✔
2370
    gdir = GlacierDirectory(entity, base_dir=base_dir, reset=reset)
1✔
2371
    gdir.write_shapefile(gpd.GeoDataFrame([entity]), 'outlines')
1✔
2372

2373
    # Idealized flowline
2374
    coords = np.arange(0, len(surface_h) - 0.5, 1)
1✔
2375
    line = shpg.LineString(np.vstack([coords, coords * 0.]).T)
1✔
2376
    fl = Centerline(line, dx=flowline_dx, surface_h=surface_h, map_dx=map_dx)
1✔
2377
    fl.widths = widths_m / map_dx
1✔
2378
    fl.is_rectangular = np.ones(fl.nx).astype(bool)
1✔
2379
    gdir.write_pickle([fl], 'inversion_flowlines')
1✔
2380

2381
    # Idealized map
2382
    grid = salem.Grid(nxny=(1, 1), dxdy=(map_dx, map_dx), x0y0=(0, 0))
1✔
2383
    grid.to_json(gdir.get_filepath('glacier_grid'))
1✔
2384

2385
    return gdir
1✔
2386

2387

2388
def _back_up_retry(func, exceptions, max_count=5):
9✔
2389
    """Re-Try an action up to max_count times.
2390
    """
2391

2392
    count = 0
2✔
2393
    while count < max_count:
2✔
2394
        try:
2✔
2395
            if count > 0:
2✔
2396
                time.sleep(random.uniform(0.05, 0.1))
×
2397
            return func()
2✔
2398
        except exceptions:
1✔
2399
            count += 1
×
2400
            if count >= max_count:
×
2401
                raise
×
2402

2403

2404
def _robust_extract(to_dir, *args, **kwargs):
9✔
2405
    """For some obscure reason this operation randomly fails.
2406

2407
    Try to make it more robust.
2408
    """
2409

2410
    def func():
2✔
2411
        with tarfile.open(*args, **kwargs) as tf:
2✔
2412
            if not len(tf.getnames()):
2✔
2413
                raise RuntimeError("Empty tarfile")
×
2414
            tf.extractall(os.path.dirname(to_dir))
2✔
2415

2416
    _back_up_retry(func, FileExistsError)
2✔
2417

2418

2419
def robust_tar_extract(from_tar, to_dir, delete_tar=False):
9✔
2420
    """Extract a tar file - also checks for a "tar in tar" situation"""
2421

2422
    if os.path.isfile(from_tar):
2✔
2423
        _robust_extract(to_dir, from_tar, 'r')
1✔
2424
    else:
2425
        # maybe a tar in tar
2426
        base_tar = os.path.dirname(from_tar) + '.tar'
2✔
2427
        if not os.path.isfile(base_tar):
2✔
2428
            raise FileNotFoundError('Could not find a tarfile with path: '
×
2429
                                    '{}'.format(from_tar))
2430
        if delete_tar:
2✔
2431
            raise InvalidParamsError('Cannot delete tar in tar.')
×
2432
        # Open the tar
2433
        bname = os.path.basename(from_tar)
2✔
2434
        dirbname = os.path.basename(os.path.dirname(from_tar))
2✔
2435

2436
        def func():
2✔
2437
            with tarfile.open(base_tar, 'r') as tf:
2✔
2438
                i_from_tar = tf.getmember(os.path.join(dirbname, bname))
2✔
2439
                with tf.extractfile(i_from_tar) as fileobj:
2✔
2440
                    _robust_extract(to_dir, fileobj=fileobj)
2✔
2441

2442
        _back_up_retry(func, RuntimeError)
2✔
2443

2444
    if delete_tar:
2✔
2445
        os.remove(from_tar)
1✔
2446

2447

2448
class GlacierDirectory(object):
9✔
2449
    """Organizes read and write access to the glacier's files.
2450

2451
    It handles a glacier directory created in a base directory (default
2452
    is the "per_glacier" folder in the working directory). The role of a
2453
    GlacierDirectory is to give access to file paths and to I/O operations.
2454
    The user should not care about *where* the files are
2455
    located, but should know their name (see :ref:`basenames`).
2456

2457
    If the directory does not exist, it will be created.
2458

2459
    See :ref:`glacierdir` for more information.
2460

2461
    Attributes
2462
    ----------
2463
    dir : str
2464
        path to the directory
2465
    base_dir : str
2466
        path to the base directory
2467
    rgi_id : str
2468
        The glacier's RGI identifier
2469
    glims_id : str
2470
        The glacier's GLIMS identifier (when available)
2471
    rgi_area_km2 : float
2472
        The glacier's RGI area (km2)
2473
    cenlon, cenlat : float
2474
        The glacier centerpoint's lon/lat
2475
    rgi_date : int
2476
        The RGI's BGNDATE year attribute if available. Otherwise, defaults to
2477
        the median year for the RGI region
2478
    rgi_region : str
2479
        The RGI region ID
2480
    rgi_subregion : str
2481
        The RGI subregion ID
2482
    rgi_version : str
2483
        The RGI version name
2484
    rgi_region_name : str
2485
        The RGI region name
2486
    rgi_subregion_name : str
2487
        The RGI subregion name
2488
    name : str
2489
        The RGI glacier name (if available)
2490
    hemisphere : str
2491
        `nh` or `sh`
2492
    glacier_type : str
2493
        The RGI glacier type ('Glacier', 'Ice cap', 'Perennial snowfield',
2494
        'Seasonal snowfield')
2495
    terminus_type : str
2496
        The RGI terminus type ('Land-terminating', 'Marine-terminating',
2497
        'Lake-terminating', 'Dry calving', 'Regenerated', 'Shelf-terminating')
2498
    is_tidewater : bool
2499
        Is the glacier a calving glacier?
2500
    is_lake_terminating : bool
2501
        Is the glacier a lake terminating glacier?
2502
    is_nominal : bool
2503
        Is the glacier an RGI nominal glacier?
2504
    is_icecap : bool
2505
        Is the glacier an ice cap?
2506
    extent_ll : list
2507
        Extent of the glacier in lon/lat
2508
    logfile : str
2509
        Path to the log file (txt)
2510
    inversion_calving_rate : float
2511
        Calving rate used for the inversion
2512
    grid
2513
    dem_info
2514
    dem_daterange
2515
    intersects_ids
2516
    rgi_area_m2
2517
    rgi_area_km2
2518
    """
2519

2520
    def __init__(self, rgi_entity, base_dir=None, reset=False,
9✔
2521
                 from_tar=False, delete_tar=False):
2522
        """Creates a new directory or opens an existing one.
2523

2524
        Parameters
2525
        ----------
2526
        rgi_entity : a ``geopandas.GeoSeries`` or str
2527
            glacier entity read from the shapefile (or a valid RGI ID if the
2528
            directory exists)
2529
        base_dir : str
2530
            path to the directory where to open the directory.
2531
            Defaults to `cfg.PATHS['working_dir'] + /per_glacier/`
2532
        reset : bool, default=False
2533
            empties the directory at construction (careful!)
2534
        from_tar : str or bool, default=False
2535
            path to a tar file to extract the gdir data from. If set to `True`,
2536
            will check for a tar file at the expected location in `base_dir`.
2537
        delete_tar : bool, default=False
2538
            delete the original tar file after extraction.
2539
        """
2540

2541
        if base_dir is None:
7✔
2542
            if not cfg.PATHS.get('working_dir', None):
7✔
2543
                raise ValueError("Need a valid PATHS['working_dir']!")
×
2544
            base_dir = os.path.join(cfg.PATHS['working_dir'], 'per_glacier')
7✔
2545

2546
        # RGI IDs are also valid entries
2547
        if isinstance(rgi_entity, str):
7✔
2548
            # Get the meta from the shape file directly
2549
            if from_tar:
3✔
2550
                _dir = os.path.join(base_dir, rgi_entity[:-6], rgi_entity[:-3],
2✔
2551
                                    rgi_entity)
2552
                # Avoid bad surprises
2553
                if os.path.exists(_dir):
2✔
2554
                    shutil.rmtree(_dir)
2✔
2555
                if from_tar is True:
2✔
2556
                    from_tar = _dir + '.tar.gz'
×
2557
                robust_tar_extract(from_tar, _dir, delete_tar=delete_tar)
2✔
2558
                from_tar = False  # to not re-unpack later below
2✔
2559
                _shp = os.path.join(_dir, 'outlines.shp')
2✔
2560
            else:
2561
                _shp = os.path.join(base_dir, rgi_entity[:-6], rgi_entity[:-3],
3✔
2562
                                    rgi_entity, 'outlines.shp')
2563
            rgi_entity = self._read_shapefile_from_path(_shp)
3✔
2564
            crs = salem.check_crs(rgi_entity.crs)
3✔
2565
            rgi_entity = rgi_entity.iloc[0]
3✔
2566
            g = rgi_entity['geometry']
3✔
2567
            xx, yy = salem.transform_proj(crs, salem.wgs84,
3✔
2568
                                          [g.bounds[0], g.bounds[2]],
2569
                                          [g.bounds[1], g.bounds[3]])
2570
            write_shp = False
3✔
2571
        else:
2572
            g = rgi_entity['geometry']
7✔
2573
            xx, yy = ([g.bounds[0], g.bounds[2]],
7✔
2574
                      [g.bounds[1], g.bounds[3]])
2575
            write_shp = True
7✔
2576

2577
        # Extent of the glacier in lon/lat
2578
        self.extent_ll = [xx, yy]
7✔
2579

2580
        try:
7✔
2581
            # RGI V4?
2582
            rgi_entity.RGIID
7✔
2583
            raise ValueError('RGI Version 4 is not supported anymore')
×
2584
        except AttributeError:
7✔
2585
            pass
7✔
2586

2587
        try:
7✔
2588
            self.rgi_id = rgi_entity.rgi_id
7✔
2589
            self.glims_id = rgi_entity.glims_id
2✔
2590
        except AttributeError:
5✔
2591
            # RGI V6
2592
            self.rgi_id = rgi_entity.RGIId
5✔
2593
            self.glims_id = rgi_entity.GLIMSId
5✔
2594

2595
        # Do we want to use the RGI center point or ours?
2596
        if cfg.PARAMS['use_rgi_area']:
7✔
2597
            try:
7✔
2598
                self.cenlon = float(rgi_entity.cenlon)
7✔
2599
                self.cenlat = float(rgi_entity.cenlat)
2✔
2600
            except AttributeError:
5✔
2601
                # RGI V6
2602
                self.cenlon = float(rgi_entity.CenLon)
5✔
2603
                self.cenlat = float(rgi_entity.CenLat)
5✔
2604
        else:
2605
            cenlon, cenlat = rgi_entity.geometry.representative_point().xy
1✔
2606
            self.cenlon = float(cenlon[0])
1✔
2607
            self.cenlat = float(cenlat[0])
1✔
2608

2609
        try:
7✔
2610
            self.rgi_region = rgi_entity.o1region
7✔
2611
            self.rgi_subregion = rgi_entity.o2region
2✔
2612
        except AttributeError:
5✔
2613
            # RGI V6
2614
            self.rgi_region = '{:02d}'.format(int(rgi_entity.O1Region))
5✔
2615
            self.rgi_subregion = (self.rgi_region + '-' +
5✔
2616
                                  '{:02d}'.format(int(rgi_entity.O2Region)))
2617

2618
        try:
7✔
2619
            name = str(rgi_entity.name)
7✔
2620
            rgi_datestr = rgi_entity.src_date
7✔
2621
        except AttributeError:
5✔
2622
            # RGI V6
2623
            name = rgi_entity.Name
5✔
2624
            rgi_datestr = rgi_entity.BgnDate
5✔
2625

2626

2627
        try:
7✔
2628
            gtype = rgi_entity.GlacType
7✔
2629
        except AttributeError:
7✔
2630
            try:
7✔
2631
                # RGI V6
2632
                gtype = [str(rgi_entity.Form), str(rgi_entity.TermType)]
7✔
2633
            except AttributeError:
2✔
2634
                # temporary default for RGI V7:
2635
                gtype = ['0', '0']
2✔
2636

2637
        try:
7✔
2638
            gstatus = rgi_entity.RGIFlag[0]
7✔
2639
        except AttributeError:
7✔
2640
            try:
7✔
2641
                # RGI V6
2642
                gstatus = rgi_entity.Status
7✔
2643
            except AttributeError:
2✔
2644
                # temporary default for RGI V7:
2645
                gstatus = '0'
2✔
2646

2647
        # rgi version can be useful
2648
        # RGI2000-v7.0-G-06-00029
2649
        # RGI60-07.00245
2650
        if self.rgi_id.count('-') == 4:
7✔
2651
            self.rgi_version = '70'
2✔
2652
        else:
2653
            rgi_version = self.rgi_id.split('-')[0][-2:]
5✔
2654
            if rgi_version not in ['50', '60', '61']:
5✔
2655
                raise RuntimeError('RGI Version not supported: '
×
2656
                                   '{}'.format(self.rgi_version))
2657
            else:
2658
                self.rgi_version = rgi_version
5✔
2659
        # remove spurious characters and trailing blanks
2660
        self.name = filter_rgi_name(name)
7✔
2661

2662
        # region
2663
        reg_names, subreg_names = parse_rgi_meta(version=self.rgi_version[0])
7✔
2664
        reg_name = reg_names.loc[int(self.rgi_region)]
7✔
2665
        # RGI V6
2666
        if not isinstance(reg_name, str):
7✔
2667
            reg_name = reg_name.values[0]
5✔
2668
        self.rgi_region_name = self.rgi_region + ': ' + reg_name
7✔
2669
        try:
7✔
2670
            subreg_name = subreg_names.loc[self.rgi_subregion]
7✔
2671
            # RGI V6
2672
            if not isinstance(subreg_name, str):
7✔
2673
                subreg_name = subreg_name.values[0]
5✔
2674
            self.rgi_subregion_name = self.rgi_subregion + ': ' + subreg_name
7✔
2675
        except KeyError:
×
2676
            self.rgi_subregion_name = self.rgi_subregion + ': NoName'
×
2677

2678
        # Read glacier attrs
2679
        gtkeys = {'0': 'Glacier',
7✔
2680
                  '1': 'Ice cap',
2681
                  '2': 'Perennial snowfield',
2682
                  '3': 'Seasonal snowfield',
2683
                  '9': 'Not assigned',
2684
                  }
2685
        ttkeys = {'0': 'Land-terminating',
7✔
2686
                  '1': 'Marine-terminating',
2687
                  '2': 'Lake-terminating',
2688
                  '3': 'Dry calving',
2689
                  '4': 'Regenerated',
2690
                  '5': 'Shelf-terminating',
2691
                  '9': 'Not assigned',
2692
                  }
2693
        stkeys = {'0': 'Glacier or ice cap',
7✔
2694
                  '1': 'Glacier complex',
2695
                  '2': 'Nominal glacier',
2696
                  '9': 'Not assigned',
2697
                  }
2698
        self.glacier_type = gtkeys[gtype[0]]
7✔
2699
        self.terminus_type = ttkeys[gtype[1]]
7✔
2700
        self.status = stkeys['{}'.format(gstatus)]
7✔
2701

2702
        # Decide what is a tidewater glacier
2703
        user = cfg.PARAMS['tidewater_type']
7✔
2704
        if user == 1:
7✔
2705
            sel = ['Marine-terminating']
×
2706
        elif user == 2:
7✔
2707
            sel = ['Marine-terminating', 'Shelf-terminating']
7✔
UNCOV
2708
        elif user == 3:
×
2709
            sel = ['Marine-terminating', 'Lake-terminating']
×
UNCOV
2710
        elif user == 4:
×
UNCOV
2711
            sel = ['Marine-terminating', 'Lake-terminating', 'Shelf-terminating']
×
2712
        else:
2713
            raise InvalidParamsError("PARAMS['tidewater_type'] not understood")
×
2714
        self.is_tidewater = self.terminus_type in sel
7✔
2715
        self.is_lake_terminating = self.terminus_type == 'Lake-terminating'
7✔
2716
        self.is_marine_terminating = self.terminus_type == 'Marine-terminating'
7✔
2717
        self.is_shelf_terminating = self.terminus_type == 'Shelf-terminating'
7✔
2718
        self.is_nominal = self.status == 'Nominal glacier'
7✔
2719
        self.inversion_calving_rate = 0.
7✔
2720
        self.is_icecap = self.glacier_type == 'Ice cap'
7✔
2721

2722
        # Hemisphere
2723
        if self.cenlat < 0 or self.rgi_region == '16':
7✔
2724
            self.hemisphere = 'sh'
×
2725
        else:
2726
            self.hemisphere = 'nh'
7✔
2727

2728
        # convert the date
2729
        rgi_date = int(rgi_datestr[0:4])
7✔
2730
        if rgi_date < 0:
7✔
2731
            rgi_date = RGI_DATE[self.rgi_region]
1✔
2732
        self.rgi_date = rgi_date
7✔
2733
        # Root directory
2734
        self.base_dir = os.path.normpath(base_dir)
7✔
2735
        self.dir = os.path.join(self.base_dir, self.rgi_id[:-6],
7✔
2736
                                self.rgi_id[:-3], self.rgi_id)
2737

2738
        # Do we have to extract the files first?
2739
        if (reset or from_tar) and os.path.exists(self.dir):
7✔
2740
            shutil.rmtree(self.dir)
4✔
2741

2742
        if from_tar:
7✔
2743
            if from_tar is True:
1✔
2744
                from_tar = self.dir + '.tar.gz'
1✔
2745
            robust_tar_extract(from_tar, self.dir, delete_tar=delete_tar)
1✔
2746
            write_shp = False
1✔
2747
        else:
2748
            mkdir(self.dir)
7✔
2749

2750
        if not os.path.isdir(self.dir):
7✔
2751
            raise RuntimeError('GlacierDirectory %s does not exist!' % self.dir)
×
2752

2753
        # logging file
2754
        self.logfile = os.path.join(self.dir, 'log.txt')
7✔
2755

2756
        if write_shp:
7✔
2757
            # Write shapefile
2758
            self._reproject_and_write_shapefile(rgi_entity)
7✔
2759

2760
        # Optimization
2761
        self._mbdf = None
7✔
2762
        self._mbprofdf = None
7✔
2763
        self._mbprofdf_cte_dh = None
7✔
2764

2765
    def __repr__(self):
2766

2767
        summary = ['<oggm.GlacierDirectory>']
2768
        summary += ['  RGI id: ' + self.rgi_id]
2769
        summary += ['  Region: ' + self.rgi_region_name]
2770
        summary += ['  Subregion: ' + self.rgi_subregion_name]
2771
        if self.name:
2772
            summary += ['  Name: ' + self.name]
2773
        summary += ['  Glacier type: ' + str(self.glacier_type)]
2774
        summary += ['  Terminus type: ' + str(self.terminus_type)]
2775
        summary += ['  Status: ' + str(self.status)]
2776
        summary += ['  Area: ' + str(self.rgi_area_km2) + ' km2']
2777
        summary += ['  Lon, Lat: (' + str(self.cenlon) + ', ' +
2778
                    str(self.cenlat) + ')']
2779
        if os.path.isfile(self.get_filepath('glacier_grid')):
2780
            summary += ['  Grid (nx, ny): (' + str(self.grid.nx) + ', ' +
2781
                        str(self.grid.ny) + ')']
2782
            summary += ['  Grid (dx, dy): (' + str(self.grid.dx) + ', ' +
2783
                        str(self.grid.dy) + ')']
2784
        return '\n'.join(summary) + '\n'
2785

2786
    def _reproject_and_write_shapefile(self, entity):
9✔
2787

2788
        # Make a local glacier map
2789
        if cfg.PARAMS['map_proj'] == 'utm':
7✔
2790
            from pyproj.aoi import AreaOfInterest
×
2791
            from pyproj.database import query_utm_crs_info
×
2792
            utm_crs_list = query_utm_crs_info(
×
2793
                datum_name="WGS 84",
2794
                area_of_interest=AreaOfInterest(
2795
                    west_lon_degree=self.cenlon,
2796
                    south_lat_degree=self.cenlat,
2797
                    east_lon_degree=self.cenlon,
2798
                    north_lat_degree=self.cenlat,
2799
                ),
2800
            )
2801
            proj4_str = utm_crs_list[0].code
×
2802
        elif cfg.PARAMS['map_proj'] == 'tmerc':
7✔
2803
            params = dict(name='tmerc', lat_0=0., lon_0=self.cenlon,
7✔
2804
                          k=0.9996, x_0=0, y_0=0, datum='WGS84')
2805
            proj4_str = ("+proj={name} +lat_0={lat_0} +lon_0={lon_0} +k={k} " 
7✔
2806
                         "+x_0={x_0} +y_0={y_0} +datum={datum}".format(**params))
2807
        else:
2808
            raise InvalidParamsError("cfg.PARAMS['map_proj'] must be one of "
×
2809
                                     "'tmerc', 'utm'.")
2810
        # Reproject
2811
        proj_in = pyproj.Proj("epsg:4326", preserve_units=True)
7✔
2812
        proj_out = pyproj.Proj(proj4_str, preserve_units=True)
7✔
2813

2814
        # transform geometry to map
2815
        project = partial(transform_proj, proj_in, proj_out)
7✔
2816
        geometry = shp_trafo(project, entity['geometry'])
7✔
2817
        geometry = multipolygon_to_polygon(geometry, gdir=self)
7✔
2818

2819
        # Save transformed geometry to disk
2820
        entity = entity.copy()
7✔
2821
        entity['geometry'] = geometry
7✔
2822

2823
        # Do we want to use the RGI area or ours?
2824
        if not cfg.PARAMS['use_rgi_area']:
7✔
2825
            # Update Area
2826
            area = geometry.area * 1e-6
1✔
2827
            entity['Area'] = area
1✔
2828

2829
        # Avoid fiona bug: https://github.com/Toblerity/Fiona/issues/365
2830
        for k, s in entity.items():
7✔
2831
            if type(s) in [np.int32, np.int64]:
7✔
2832
                entity[k] = int(s)
5✔
2833
        towrite = gpd.GeoDataFrame(entity).T.set_geometry('geometry')
7✔
2834
        towrite.crs = proj4_str
7✔
2835

2836
        # Write shapefile
2837
        self.write_shapefile(towrite, 'outlines')
7✔
2838

2839
        # Also transform the intersects if necessary
2840
        gdf = cfg.PARAMS['intersects_gdf']
7✔
2841
        if len(gdf) > 0:
7✔
2842
            gdf = gdf.loc[((gdf.RGIId_1 == self.rgi_id) |
4✔
2843
                           (gdf.RGIId_2 == self.rgi_id))]
2844
            if len(gdf) > 0:
4✔
2845
                gdf = salem.transform_geopandas(gdf, to_crs=proj_out)
4✔
2846
                if hasattr(gdf.crs, 'srs'):
4✔
2847
                    # salem uses pyproj
2848
                    gdf.crs = gdf.crs.srs
4✔
2849
                self.write_shapefile(gdf, 'intersects')
4✔
2850
        else:
2851
            # Sanity check
2852
            if cfg.PARAMS['use_intersects']:
6✔
2853
                raise InvalidParamsError(
×
2854
                    'You seem to have forgotten to set the '
2855
                    'intersects file for this run. OGGM '
2856
                    'works better with such a file. If you '
2857
                    'know what your are doing, set '
2858
                    "cfg.PARAMS['use_intersects'] = False to "
2859
                    "suppress this error.")
2860

2861
    def grid_from_params(self):
9✔
2862
        """If the glacier_grid.json file is lost, reconstruct it."""
2863
        from oggm.core.gis import glacier_grid_params
1✔
2864
        utm_proj, nx, ny, ulx, uly, dx = glacier_grid_params(self)
1✔
2865
        x0y0 = (ulx+dx/2, uly-dx/2)  # To pixel center coordinates
1✔
2866
        return salem.Grid(proj=utm_proj, nxny=(nx, ny), dxdy=(dx, -dx),
1✔
2867
                          x0y0=x0y0)
2868

2869
    @lazy_property
9✔
2870
    def grid(self):
9✔
2871
        """A ``salem.Grid`` handling the georeferencing of the local grid"""
2872
        try:
7✔
2873
            return salem.Grid.from_json(self.get_filepath('glacier_grid'))
7✔
2874
        except FileNotFoundError:
×
2875
            raise InvalidWorkflowError('This glacier directory seems to '
×
2876
                                       'have lost its glacier_grid.json file.'
2877
                                       'Use .grid_from_params(), but make sure'
2878
                                       'that the PARAMS are the ones you '
2879
                                       'want.')
2880

2881
    @lazy_property
9✔
2882
    def rgi_area_km2(self):
9✔
2883
        """The glacier's RGI area (km2)."""
2884
        try:
7✔
2885
            _area = self.read_shapefile('outlines')['Area']
7✔
2886
        except OSError:
3✔
2887
            raise RuntimeError('No outlines available')
×
2888
        except KeyError:
3✔
2889
            # RGI V7
2890
            _area = self.read_shapefile('outlines')['area_km2']
2✔
2891
        return np.round(float(_area.iloc[0]), decimals=3)
7✔
2892

2893
    @lazy_property
9✔
2894
    def intersects_ids(self):
9✔
2895
        """The glacier's intersects RGI ids."""
2896
        try:
1✔
2897
            gdf = self.read_shapefile('intersects')
1✔
2898
            ids = np.append(gdf['RGIId_1'], gdf['RGIId_2'])
1✔
2899
            ids = list(np.unique(np.sort(ids)))
1✔
2900
            ids.remove(self.rgi_id)
1✔
2901
            return ids
1✔
2902
        except OSError:
×
2903
            return []
×
2904

2905
    @lazy_property
9✔
2906
    def dem_daterange(self):
9✔
2907
        """Years in which most of the DEM data was acquired"""
2908
        source_txt = self.get_filepath('dem_source')
1✔
2909
        if os.path.isfile(source_txt):
1✔
2910
            with open(source_txt, 'r') as f:
1✔
2911
                for line in f.readlines():
1✔
2912
                    if 'Date range:' in line:
1✔
2913
                        return tuple(map(int, line.split(':')[1].split('-')))
1✔
2914
        # we did not find the information in the dem_source file
2915
        log.warning('No DEM date range specified in `dem_source.txt`')
1✔
2916
        return None
1✔
2917

2918
    @lazy_property
9✔
2919
    def dem_info(self):
9✔
2920
        """More detailed information on the acquisition of the DEM data"""
2921
        source_file = self.get_filepath('dem_source')
1✔
2922
        source_text = ''
1✔
2923
        if os.path.isfile(source_file):
1✔
2924
            with open(source_file, 'r') as f:
1✔
2925
                for line in f.readlines():
1✔
2926
                    source_text += line
1✔
2927
        else:
2928
            log.warning('No DEM source file found.')
×
2929
        return source_text
1✔
2930

2931
    @property
9✔
2932
    def rgi_area_m2(self):
9✔
2933
        """The glacier's RGI area (m2)."""
2934
        return self.rgi_area_km2 * 10**6
7✔
2935

2936
    def get_filepath(self, filename, delete=False, filesuffix='',
9✔
2937
                     _deprecation_check=True):
2938
        """Absolute path to a specific file.
2939

2940
        Parameters
2941
        ----------
2942
        filename : str
2943
            file name (must be listed in cfg.BASENAME)
2944
        delete : bool
2945
            delete the file if exists
2946
        filesuffix : str
2947
            append a suffix to the filename (useful for model runs). Note
2948
            that the BASENAME remains same.
2949

2950
        Returns
2951
        -------
2952
        The absolute path to the desired file
2953
        """
2954

2955
        if filename not in cfg.BASENAMES:
7✔
2956
            raise ValueError(filename + ' not in cfg.BASENAMES.')
×
2957

2958
        fname = cfg.BASENAMES[filename]
7✔
2959
        if filesuffix:
7✔
2960
            fname = fname.split('.')
5✔
2961
            assert len(fname) == 2
5✔
2962
            fname = fname[0] + filesuffix + '.' + fname[1]
5✔
2963

2964
        out = os.path.join(self.dir, fname)
7✔
2965
        if delete and os.path.isfile(out):
7✔
2966
            os.remove(out)
2✔
2967
        return out
7✔
2968

2969
    def has_file(self, filename, filesuffix='', _deprecation_check=True):
9✔
2970
        """Checks if a file exists.
2971

2972
        Parameters
2973
        ----------
2974
        filename : str
2975
            file name (must be listed in cfg.BASENAME)
2976
        filesuffix : str
2977
            append a suffix to the filename (useful for model runs). Note
2978
            that the BASENAME remains same.
2979
        """
2980
        fp = self.get_filepath(filename, filesuffix=filesuffix,
5✔
2981
                               _deprecation_check=_deprecation_check)
2982
        if '.shp' in fp and cfg.PARAMS['use_tar_shapefiles']:
5✔
2983
            fp = fp.replace('.shp', '.tar')
5✔
2984
            if cfg.PARAMS['use_compression']:
5✔
2985
                fp += '.gz'
5✔
2986
        return os.path.exists(fp)
5✔
2987

2988
    def add_to_diagnostics(self, key, value):
9✔
2989
        """Write a key, value pair to the gdir's runtime diagnostics.
2990

2991
        Parameters
2992
        ----------
2993
        key : str
2994
            dict entry key
2995
        value : str or number
2996
            dict entry value
2997
        """
2998

2999
        d = self.get_diagnostics()
5✔
3000
        d[key] = value
5✔
3001
        with open(self.get_filepath('diagnostics'), 'w') as f:
5✔
3002
            json.dump(d, f)
5✔
3003

3004
    def get_diagnostics(self):
9✔
3005
        """Read the gdir's runtime diagnostics.
3006

3007
        Returns
3008
        -------
3009
        the diagnostics dict
3010
        """
3011
        # If not there, create an empty one
3012
        if not self.has_file('diagnostics'):
5✔
3013
            with open(self.get_filepath('diagnostics'), 'w') as f:
5✔
3014
                json.dump(dict(), f)
5✔
3015

3016
        # Read and return
3017
        with open(self.get_filepath('diagnostics'), 'r') as f:
5✔
3018
            out = json.load(f)
5✔
3019
        return out
5✔
3020

3021
    def read_pickle(self, filename, use_compression=None, filesuffix=''):
9✔
3022
        """Reads a pickle located in the directory.
3023

3024
        Parameters
3025
        ----------
3026
        filename : str
3027
            file name (must be listed in cfg.BASENAME)
3028
        use_compression : bool
3029
            whether or not the file ws compressed. Default is to use
3030
            cfg.PARAMS['use_compression'] for this (recommended)
3031
        filesuffix : str
3032
            append a suffix to the filename (useful for experiments).
3033

3034
        Returns
3035
        -------
3036
        An object read from the pickle
3037
        """
3038

3039
        use_comp = (use_compression if use_compression is not None
7✔
3040
                    else cfg.PARAMS['use_compression'])
3041
        _open = gzip.open if use_comp else open
7✔
3042
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
3043
        with _open(fp, 'rb') as f:
7✔
3044
            try:
7✔
3045
                out = pickle.load(f)
7✔
3046
            except ModuleNotFoundError as err:
×
3047
                if err.name == "shapely.io":
×
3048
                    err.msg = "You need shapely version 2.0 or higher for this to work."
×
3049
                raise
×
3050
                
3051
        # Some new attrs to add to old pre-processed directories
3052
        if filename == 'model_flowlines':
7✔
3053
            if getattr(out[0], 'map_trafo', None) is None:
7✔
3054
                try:
1✔
3055
                    # This may fail for very old gdirs
3056
                    grid = self.grid
1✔
3057
                except InvalidWorkflowError:
×
3058
                    return out
×
3059

3060
                # Add the trafo
3061
                trafo = partial(grid.ij_to_crs, crs=salem.wgs84)
1✔
3062
                for fl in out:
1✔
3063
                    fl.map_trafo = trafo
1✔
3064

3065
        return out
7✔
3066

3067
    def write_pickle(self, var, filename, use_compression=None, filesuffix=''):
9✔
3068
        """ Writes a variable to a pickle on disk.
3069

3070
        Parameters
3071
        ----------
3072
        var : object
3073
            the variable to write to disk
3074
        filename : str
3075
            file name (must be listed in cfg.BASENAME)
3076
        use_compression : bool
3077
            whether or not the file ws compressed. Default is to use
3078
            cfg.PARAMS['use_compression'] for this (recommended)
3079
        filesuffix : str
3080
            append a suffix to the filename (useful for experiments).
3081
        """
3082
        use_comp = (use_compression if use_compression is not None
7✔
3083
                    else cfg.PARAMS['use_compression'])
3084
        _open = gzip.open if use_comp else open
7✔
3085
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
3086
        with _open(fp, 'wb') as f:
7✔
3087
            pickle.dump(var, f, protocol=4)
7✔
3088

3089
    def read_json(self, filename, filesuffix='', allow_empty=False):
9✔
3090
        """Reads a JSON file located in the directory.
3091

3092
        Parameters
3093
        ----------
3094
        filename : str
3095
            file name (must be listed in cfg.BASENAME)
3096
        filesuffix : str
3097
            append a suffix to the filename (useful for experiments).
3098
        allow_empty : bool
3099
            if True, does not raise an error if the file is not there.
3100

3101
        Returns
3102
        -------
3103
        A dictionary read from the JSON file
3104
        """
3105

3106
        fp = self.get_filepath(filename, filesuffix=filesuffix)
5✔
3107
        if allow_empty:
5✔
3108
            try:
5✔
3109
                with open(fp, 'r') as f:
5✔
3110
                    out = json.load(f)
2✔
3111
            except FileNotFoundError:
5✔
3112
                out = {}
5✔
3113
        else:
3114
            with open(fp, 'r') as f:
5✔
3115
                out = json.load(f)
5✔
3116
        return out
5✔
3117

3118
    def write_json(self, var, filename, filesuffix=''):
9✔
3119
        """ Writes a variable to a pickle on disk.
3120

3121
        Parameters
3122
        ----------
3123
        var : object
3124
            the variable to write to JSON (must be a dictionary)
3125
        filename : str
3126
            file name (must be listed in cfg.BASENAME)
3127
        filesuffix : str
3128
            append a suffix to the filename (useful for experiments).
3129
        """
3130

3131
        def np_convert(o):
5✔
3132
            if isinstance(o, np.int64):
×
3133
                return int(o)
×
3134
            raise TypeError
×
3135

3136
        fp = self.get_filepath(filename, filesuffix=filesuffix)
5✔
3137
        with open(fp, 'w') as f:
5✔
3138
            json.dump(var, f, default=np_convert)
5✔
3139

3140
    def get_climate_info(self, input_filesuffix=''):
9✔
3141
        """Convenience function to read attributes of the historical climate.
3142

3143
        Parameters
3144
        ----------
3145
        input_filesuffix : str
3146
            input_filesuffix of the climate_historical that should be used.
3147
        """
3148
        out = {}
5✔
3149
        try:
5✔
3150
            f = self.get_filepath('climate_historical',
5✔
3151
                                  filesuffix=input_filesuffix)
3152
            with ncDataset(f) as nc:
5✔
3153
                out['baseline_climate_source'] = nc.climate_source
5✔
3154
                try:
5✔
3155
                    out['baseline_yr_0'] = nc.yr_0
5✔
3156
                except AttributeError:
1✔
3157
                    # needed for back-compatibility before v1.6
3158
                    out['baseline_yr_0'] = nc.hydro_yr_0
1✔
3159
                try:
5✔
3160
                    out['baseline_yr_1'] = nc.yr_1
5✔
3161
                except AttributeError:
1✔
3162
                    # needed for back-compatibility before v1.6
3163
                    out['baseline_yr_1'] = nc.hydro_yr_1
1✔
3164
                out['baseline_climate_ref_hgt'] = nc.ref_hgt
5✔
3165
                out['baseline_climate_ref_pix_lon'] = nc.ref_pix_lon
5✔
3166
                out['baseline_climate_ref_pix_lat'] = nc.ref_pix_lat
5✔
3167
        except FileNotFoundError:
2✔
3168
            pass
2✔
3169

3170
        return out
5✔
3171

3172
    def read_text(self, filename, filesuffix=''):
9✔
3173
        """Reads a text file located in the directory.
3174

3175
        Parameters
3176
        ----------
3177
        filename : str
3178
            file name (must be listed in cfg.BASENAME)
3179
        filesuffix : str
3180
            append a suffix to the filename (useful for experiments).
3181

3182
        Returns
3183
        -------
3184
        the text
3185
        """
3186

3187
        fp = self.get_filepath(filename, filesuffix=filesuffix)
×
3188
        with open(fp, 'r') as f:
×
3189
            out = f.read()
×
3190
        return out
×
3191

3192
    @classmethod
9✔
3193
    def _read_shapefile_from_path(cls, fp):
9✔
3194
        if '.shp' not in fp:
7✔
3195
            raise ValueError('File ending not that of a shapefile')
×
3196

3197
        if cfg.PARAMS['use_tar_shapefiles']:
7✔
3198
            fp = 'tar://' + fp.replace('.shp', '.tar')
7✔
3199
            if cfg.PARAMS['use_compression']:
7✔
3200
                fp += '.gz'
7✔
3201

3202
        shp = gpd.read_file(fp)
7✔
3203

3204
        # .properties file is created for compressed shapefiles. github: #904
3205
        _properties = fp.replace('tar://', '') + '.properties'
7✔
3206
        if os.path.isfile(_properties):
7✔
3207
            # remove it, to keep GDir slim
3208
            os.remove(_properties)
7✔
3209

3210
        return shp
7✔
3211

3212
    def read_shapefile(self, filename, filesuffix=''):
9✔
3213
        """Reads a shapefile located in the directory.
3214

3215
        Parameters
3216
        ----------
3217
        filename : str
3218
            file name (must be listed in cfg.BASENAME)
3219
        filesuffix : str
3220
            append a suffix to the filename (useful for experiments).
3221

3222
        Returns
3223
        -------
3224
        A geopandas.DataFrame
3225
        """
3226
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
3227
        return self._read_shapefile_from_path(fp)
7✔
3228

3229
    def write_shapefile(self, var, filename, filesuffix=''):
9✔
3230
        """ Writes a variable to a shapefile on disk.
3231

3232
        Parameters
3233
        ----------
3234
        var : object
3235
            the variable to write to shapefile (must be a geopandas.DataFrame)
3236
        filename : str
3237
            file name (must be listed in cfg.BASENAME)
3238
        filesuffix : str
3239
            append a suffix to the filename (useful for experiments).
3240
        """
3241
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
3242
        _write_shape_to_disk(var, fp, to_tar=cfg.PARAMS['use_tar_shapefiles'])
7✔
3243

3244
    def write_monthly_climate_file(self, time, prcp, temp,
9✔
3245
                                   ref_pix_hgt, ref_pix_lon, ref_pix_lat, *,
3246
                                   temp_std=None,
3247
                                   time_unit=None,
3248
                                   calendar=None,
3249
                                   source=None,
3250
                                   file_name='climate_historical',
3251
                                   filesuffix=''):
3252
        """Creates a netCDF4 file with climate data timeseries.
3253

3254
        Parameters
3255
        ----------
3256
        time : ndarray
3257
            the time array, in a format understood by netCDF4
3258
        prcp : ndarray
3259
            the precipitation array (unit: 'kg m-2 month-1')
3260
        temp : ndarray
3261
            the temperature array (unit: 'degC')
3262
        ref_pix_hgt : float
3263
            the elevation of the dataset's reference altitude
3264
            (for correction). In practice, it is the same altitude as the
3265
            baseline climate.
3266
        ref_pix_lon : float
3267
            the location of the gridded data's grid point
3268
        ref_pix_lat : float
3269
            the location of the gridded data's grid point
3270
        temp_std : ndarray, optional
3271
            the daily standard deviation of temperature (useful for PyGEM)
3272
        time_unit : str
3273
            the reference time unit for your time array. This should be chosen
3274
            depending on the length of your data. The default is to choose
3275
            it ourselves based on the starting year.
3276
        calendar : str
3277
            If you use an exotic calendar (e.g. 'noleap')
3278
        source : str
3279
            the climate data source (required)
3280
        file_name : str
3281
            How to name the file
3282
        filesuffix : str
3283
            Apply a suffix to the file
3284
        """
3285

3286
        # overwrite as default
3287
        fpath = self.get_filepath(file_name, filesuffix=filesuffix)
5✔
3288
        if os.path.exists(fpath):
5✔
3289
            os.remove(fpath)
2✔
3290

3291
        if source is None:
5✔
3292
            raise InvalidParamsError('`source` kwarg is required')
×
3293

3294
        zlib = cfg.PARAMS['compress_climate_netcdf']
5✔
3295

3296
        try:
5✔
3297
            y0 = time[0].year
5✔
3298
            y1 = time[-1].year
5✔
3299
        except AttributeError:
2✔
3300
            time = pd.DatetimeIndex(time)
2✔
3301
            y0 = time[0].year
2✔
3302
            y1 = time[-1].year
2✔
3303

3304
        if time_unit is None:
5✔
3305
            # http://pandas.pydata.org/pandas-docs/stable/timeseries.html
3306
            # #timestamp-limitations
3307
            if y0 > 1800:
5✔
3308
                time_unit = 'days since 1801-01-01 00:00:00'
5✔
3309
            elif y0 >= 0:
1✔
3310
                time_unit = ('days since {:04d}-01-01 '
1✔
3311
                             '00:00:00'.format(time[0].year))
3312
            else:
3313
                raise InvalidParamsError('Time format not supported')
×
3314

3315
        with ncDataset(fpath, 'w', format='NETCDF4') as nc:
5✔
3316
            nc.ref_hgt = ref_pix_hgt
5✔
3317
            nc.ref_pix_lon = ref_pix_lon
5✔
3318
            nc.ref_pix_lat = ref_pix_lat
5✔
3319
            nc.ref_pix_dis = haversine(self.cenlon, self.cenlat,
5✔
3320
                                       ref_pix_lon, ref_pix_lat)
3321
            nc.climate_source = source
5✔
3322

3323
            nc.yr_0 = y0
5✔
3324
            nc.yr_1 = y1
5✔
3325

3326
            nc.createDimension('time', None)
5✔
3327

3328
            nc.author = 'OGGM'
5✔
3329
            nc.author_info = 'Open Global Glacier Model'
5✔
3330

3331
            timev = nc.createVariable('time', 'i4', ('time',))
5✔
3332

3333
            tatts = {'units': time_unit}
5✔
3334
            if calendar is None:
5✔
3335
                calendar = 'standard'
5✔
3336

3337
            tatts['calendar'] = calendar
5✔
3338
            try:
5✔
3339
                numdate = netCDF4.date2num([t for t in time], time_unit,
5✔
3340
                                           calendar=calendar)
3341
            except TypeError:
×
3342
                # numpy's broken datetime only works for us precision
3343
                time = time.astype('M8[us]').astype(datetime.datetime)
×
3344
                numdate = netCDF4.date2num(time, time_unit, calendar=calendar)
×
3345

3346
            timev.setncatts(tatts)
5✔
3347
            timev[:] = numdate
5✔
3348

3349
            v = nc.createVariable('prcp', 'f4', ('time',), zlib=zlib)
5✔
3350
            v.units = 'kg m-2'
5✔
3351
            v.long_name = 'total monthly precipitation amount'
5✔
3352

3353
            v[:] = prcp
5✔
3354

3355
            v = nc.createVariable('temp', 'f4', ('time',), zlib=zlib)
5✔
3356
            v.units = 'degC'
5✔
3357
            v.long_name = '2m temperature at height ref_hgt'
5✔
3358
            v[:] = temp
5✔
3359

3360
            if temp_std is not None:
5✔
3361
                v = nc.createVariable('temp_std', 'f4', ('time',), zlib=zlib)
2✔
3362
                v.units = 'degC'
2✔
3363
                v.long_name = 'standard deviation of daily temperatures'
2✔
3364
                v[:] = temp_std
2✔
3365

3366
    def get_inversion_flowline_hw(self):
9✔
3367
        """ Shortcut function to read the heights and widths of the glacier.
3368

3369
        Parameters
3370
        ----------
3371

3372
        Returns
3373
        -------
3374
        (height, widths) in units of m
3375
        """
3376

3377
        h = np.array([])
4✔
3378
        w = np.array([])
4✔
3379
        fls = self.read_pickle('inversion_flowlines')
4✔
3380
        for fl in fls:
4✔
3381
            w = np.append(w, fl.widths)
4✔
3382
            h = np.append(h, fl.surface_h)
4✔
3383
        return h, w * self.grid.dx
4✔
3384

3385
    def set_ref_mb_data(self, mb_df=None):
9✔
3386
        """Adds reference mass balance data to this glacier.
3387

3388
        The format should be a dataframe with the years as index and
3389
        'ANNUAL_BALANCE' as values in mm yr-1.
3390
        """
3391

3392
        if self.is_tidewater:
4✔
3393
            log.warning('You are trying to set MB data on a tidewater glacier!'
×
3394
                        ' These data will be ignored by the MB model '
3395
                        'calibration routine.')
3396

3397
        if mb_df is None:
4✔
3398

3399
            flink, mbdatadir = get_wgms_files()
4✔
3400
            c = 'RGI{}0_ID'.format(self.rgi_version[0])
4✔
3401
            wid = flink.loc[flink[c] == self.rgi_id]
4✔
3402
            if len(wid) == 0:
4✔
3403
                raise RuntimeError('Not a reference glacier!')
1✔
3404
            wid = wid.WGMS_ID.values[0]
4✔
3405

3406
            # file
3407
            reff = os.path.join(mbdatadir,
4✔
3408
                                'mbdata_WGMS-{:05d}.csv'.format(wid))
3409
            # list of years
3410
            mb_df = pd.read_csv(reff).set_index('YEAR')
4✔
3411

3412
        # Quality checks
3413
        if 'ANNUAL_BALANCE' not in mb_df:
4✔
3414
            raise InvalidParamsError('Need an "ANNUAL_BALANCE" column in the '
×
3415
                                     'dataframe.')
3416
        mb_df.index.name = 'YEAR'
4✔
3417
        self._mbdf = mb_df
4✔
3418

3419
    def get_ref_mb_data(self, y0=None, y1=None, input_filesuffix=''):
9✔
3420
        """Get the reference mb data from WGMS (for some glaciers only!).
3421

3422
        Raises an Error if it isn't a reference glacier at all.
3423

3424
        Parameters
3425
        ----------
3426
        y0 : int
3427
            override the default behavior which is to check the available
3428
            climate data (or PARAMS['ref_mb_valid_window']) and decide
3429
        y1 : int
3430
            override the default behavior which is to check the available
3431
            climate data (or PARAMS['ref_mb_valid_window']) and decide
3432
        input_filesuffix : str
3433
            input_filesuffix of the climate_historical that should be used
3434
            if y0 and y1 are not given. The default is to take the
3435
            climate_historical without input_filesuffix
3436
        """
3437

3438
        if self._mbdf is None:
4✔
3439
            self.set_ref_mb_data()
4✔
3440

3441
        # logic for period
3442
        t0, t1 = cfg.PARAMS['ref_mb_valid_window']
4✔
3443
        if t0 > 0 and y0 is None:
4✔
3444
            y0 = t0
×
3445
        if t1 > 0 and y1 is None:
4✔
3446
            y1 = t1
×
3447

3448
        if y0 is None or y1 is None:
4✔
3449
            ci = self.get_climate_info(input_filesuffix=input_filesuffix)
4✔
3450
            if 'baseline_yr_0' not in ci:
4✔
3451
                raise InvalidWorkflowError('Please process some climate data '
1✔
3452
                                           'before call')
3453
            y0 = ci['baseline_yr_0'] if y0 is None else y0
4✔
3454
            y1 = ci['baseline_yr_1'] if y1 is None else y1
4✔
3455

3456
        if len(self._mbdf) > 1:
4✔
3457
            out = self._mbdf.loc[y0:y1]
4✔
3458
        else:
3459
            # Some files are just empty
3460
            out = self._mbdf
×
3461
        return out.dropna(subset=['ANNUAL_BALANCE'])
4✔
3462

3463
    def get_ref_mb_profile(self, input_filesuffix='', constant_dh=False, obs_ratio_needed=0):
9✔
3464
        """Get the reference mb profile data from WGMS (if available!).
3465

3466
        Returns None if this glacier has no profile and an Error if it isn't
3467
        a reference glacier at all.
3468

3469
        Parameters
3470
        ----------
3471
        input_filesuffix : str
3472
            input_filesuffix of the climate_historical that should be used. The
3473
            default is to take the climate_historical without input_filesuffix
3474
        constant_dh : boolean
3475
            If set to True, it outputs the MB profiles with a constant step size
3476
            of dh=50m by using interpolation. This can be useful for comparisons
3477
            between years. Default is False which gives the raw
3478
            elevation-dependent point MB
3479
        obs_ratio_needed : float
3480
            necessary relative amount of observations per elevation band in order
3481
            to be included in the MB profile (0<=obs_ratio_needed<=1).
3482
            If obs_ratio_needed set to 0, the output shows all elevation-band
3483
            observations (default is 0).
3484
            When estimating mean MB profiles, it is advisable to set obs_ratio_needed
3485
            to 0.6. E.g. if there are in total 5 years of measurements only those elevation
3486
            bands with at least 3 years of measurements are used. If obs_ratio_needed is not
3487
            0, constant_dh has to be set to True.
3488
        """
3489

3490
        if obs_ratio_needed != 0 and constant_dh is False:
1✔
3491
            raise InvalidParamsError('If a filter is applied, you have to set'
×
3492
                                     ' constant_dh to True')
3493
        if obs_ratio_needed < 0 or obs_ratio_needed > 1:
1✔
3494
            raise InvalidParamsError('obs_ratio_needed is the ratio of necessary relative amount'
×
3495
                                     'of observations per elevation band. It has to be between'
3496
                                     '0 and 1!')
3497

3498
        if self._mbprofdf is None and not constant_dh:
1✔
3499
            flink, mbdatadir = get_wgms_files()
1✔
3500
            c = 'RGI{}0_ID'.format(self.rgi_version[0])
1✔
3501
            wid = flink.loc[flink[c] == self.rgi_id]
1✔
3502
            if len(wid) == 0:
1✔
3503
                raise RuntimeError('Not a reference glacier!')
×
3504
            wid = wid.WGMS_ID.values[0]
1✔
3505

3506
            # file
3507
            mbdatadir = os.path.join(os.path.dirname(mbdatadir), 'mb_profiles')
1✔
3508
            reff = os.path.join(mbdatadir,
1✔
3509
                                'profile_WGMS-{:05d}.csv'.format(wid))
3510
            if not os.path.exists(reff):
1✔
3511
                return None
×
3512
            # list of years
3513
            self._mbprofdf = pd.read_csv(reff, index_col=0)
1✔
3514

3515
        if self._mbprofdf_cte_dh is None and constant_dh:
1✔
3516
            flink, mbdatadir = get_wgms_files()
1✔
3517
            c = 'RGI{}0_ID'.format(self.rgi_version[0])
1✔
3518
            wid = flink.loc[flink[c] == self.rgi_id]
1✔
3519
            if len(wid) == 0:
1✔
3520
                raise RuntimeError('Not a reference glacier!')
×
3521
            wid = wid.WGMS_ID.values[0]
1✔
3522

3523
            # file
3524
            mbdatadir = os.path.join(os.path.dirname(mbdatadir), 'mb_profiles_constant_dh')
1✔
3525
            reff = os.path.join(mbdatadir,
1✔
3526
                                'profile_constant_dh_WGMS-{:05d}.csv'.format(wid))
3527
            if not os.path.exists(reff):
1✔
3528
                return None
×
3529
            # list of years
3530
            self._mbprofdf_cte_dh = pd.read_csv(reff, index_col=0)
1✔
3531

3532
        ci = self.get_climate_info(input_filesuffix=input_filesuffix)
1✔
3533
        if 'baseline_yr_0' not in ci:
1✔
3534
            raise RuntimeError('Please process some climate data before call')
×
3535
        y0 = ci['baseline_yr_0']
1✔
3536
        y1 = ci['baseline_yr_1']
1✔
3537
        if not constant_dh:
1✔
3538
            if len(self._mbprofdf) > 1:
1✔
3539
                out = self._mbprofdf.loc[y0:y1]
1✔
3540
            else:
3541
                # Some files are just empty
3542
                out = self._mbprofdf
×
3543
        else:
3544
            if len(self._mbprofdf_cte_dh) > 1:
1✔
3545
                out = self._mbprofdf_cte_dh.loc[y0:y1]
1✔
3546
                if obs_ratio_needed != 0:
1✔
3547
                    # amount of years with any observation
3548
                    n_obs = len(out.index)
1✔
3549
                    # amount of years with observations for each elevation band
3550
                    n_obs_h = out.describe().loc['count']
1✔
3551
                    # relative amount of observations per elevation band
3552
                    rel_obs_h = n_obs_h / n_obs
1✔
3553
                    # select only those elevation bands with a specific ratio
3554
                    # of years with available measurements
3555
                    out = out[rel_obs_h[rel_obs_h >= obs_ratio_needed].index]
1✔
3556

3557
            else:
3558
                # Some files are just empty
3559
                out = self._mbprofdf_cte_dh
×
3560
        out.columns = [float(c) for c in out.columns]
1✔
3561
        return out.dropna(axis=1, how='all').dropna(axis=0, how='all')
1✔
3562

3563

3564
    def get_ref_length_data(self):
9✔
3565
        """Get the glacier length data from P. Leclercq's data base.
3566

3567
         https://folk.uio.no/paulwl/data.php
3568

3569
         For some glaciers only!
3570
         """
3571

3572
        df = pd.read_csv(get_demo_file('rgi_leclercq_links_2014_RGIV6.csv'))
1✔
3573
        df = df.loc[df.RGI_ID == self.rgi_id]
1✔
3574
        if len(df) == 0:
1✔
3575
            raise RuntimeError('No length data found for this glacier!')
×
3576
        ide = df.LID.values[0]
1✔
3577

3578
        f = get_demo_file('Glacier_Lengths_Leclercq.nc')
1✔
3579
        with xr.open_dataset(f) as dsg:
1✔
3580
            # The database is not sorted by ID. Don't ask me...
3581
            grp_id = np.argwhere(dsg['index'].values == ide)[0][0] + 1
1✔
3582
        with xr.open_dataset(f, group=str(grp_id)) as ds:
1✔
3583
            df = ds.to_dataframe()
1✔
3584
            df.name = ds.glacier_name
1✔
3585
        return df
1✔
3586

3587
    def log(self, task_name, *, err=None, task_time=None):
9✔
3588
        """Logs a message to the glacier directory.
3589

3590
        It is usually called by the :py:class:`entity_task` decorator, normally
3591
        you shouldn't take care about that.
3592

3593
        Parameters
3594
        ----------
3595
        func : a function
3596
            the function which wants to log
3597
        err : Exception
3598
            the exception which has been raised by func (if no exception was
3599
            raised, a success is logged)
3600
        time : float
3601
            the time (in seconds) that the task needed to run
3602
        """
3603

3604
        # a line per function call
3605
        nowsrt = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
7✔
3606
        line = nowsrt + ';' + task_name + ';'
7✔
3607

3608
        if task_time is not None:
7✔
3609
            line += 'time:{};'.format(task_time)
7✔
3610

3611
        if err is None:
7✔
3612
            line += 'SUCCESS'
7✔
3613
        else:
3614
            line += err.__class__.__name__ + ': {}'.format(err)\
6✔
3615

3616
        line = line.replace('\n', ' ')
7✔
3617

3618
        count = 0
7✔
3619
        while count < 5:
7✔
3620
            try:
7✔
3621
                with open(self.logfile, 'a') as logfile:
7✔
3622
                    logfile.write(line + '\n')
7✔
3623
                break
7✔
3624
            except FileNotFoundError:
×
3625
                # I really don't know when this error happens
3626
                # In this case sleep and try again
3627
                time.sleep(0.05)
×
3628
                count += 1
×
3629

3630
        if count == 5:
7✔
3631
            log.warning('Could not write to logfile: ' + line)
×
3632

3633
    def get_task_status(self, task_name):
9✔
3634
        """Opens this directory's log file to check for a task's outcome.
3635

3636
        Parameters
3637
        ----------
3638
        task_name : str
3639
            the name of the task which has to be tested for
3640

3641
        Returns
3642
        -------
3643
        The last message for this task (SUCCESS if was successful),
3644
        None if the task was not run yet
3645
        """
3646

3647
        if not os.path.isfile(self.logfile):
7✔
3648
            return None
7✔
3649

3650
        with open(self.logfile) as logfile:
7✔
3651
            lines = logfile.readlines()
7✔
3652

3653
        lines = [l.replace('\n', '') for l in lines
7✔
3654
                 if ';' in l and (task_name == l.split(';')[1])]
3655
        if lines:
7✔
3656
            # keep only the last log
3657
            return lines[-1].split(';')[-1]
7✔
3658
        else:
3659
            return None
5✔
3660

3661
    def get_task_time(self, task_name):
9✔
3662
        """Opens this directory's log file to check for a task's run time.
3663

3664
        Parameters
3665
        ----------
3666
        task_name : str
3667
            the name of the task which has to be tested for
3668

3669
        Returns
3670
        -------
3671
        The timing that the last call of this task needed.
3672
        None if the task was not run yet, or if it errored
3673
        """
3674

3675
        if not os.path.isfile(self.logfile):
1✔
3676
            return None
×
3677

3678
        with open(self.logfile) as logfile:
1✔
3679
            lines = logfile.readlines()
1✔
3680

3681
        lines = [l.replace('\n', '') for l in lines
1✔
3682
                 if task_name == l.split(';')[1]]
3683
        if lines:
1✔
3684
            line = lines[-1]
1✔
3685
            # Last log is message
3686
            if 'ERROR' in line.split(';')[-1] or 'time:' not in line:
1✔
3687
                return None
1✔
3688
            # Get the time
3689
            return float(line.split('time:')[-1].split(';')[0])
1✔
3690
        else:
3691
            return None
1✔
3692

3693
    def get_error_log(self):
9✔
3694
        """Reads the directory's log file to find the invalid task (if any).
3695

3696
        Returns
3697
        -------
3698
        The first error message in this log, None if all good
3699
        """
3700

3701
        if not os.path.isfile(self.logfile):
4✔
3702
            return None
1✔
3703

3704
        with open(self.logfile) as logfile:
4✔
3705
            lines = logfile.readlines()
4✔
3706

3707
        for l in lines:
4✔
3708
            if 'SUCCESS' in l:
4✔
3709
                continue
4✔
3710
            return l.replace('\n', '')
1✔
3711

3712
        # OK all good
3713
        return None
4✔
3714

3715

3716
@entity_task(log)
9✔
3717
def copy_to_basedir(gdir, base_dir=None, setup='run'):
9✔
3718
    """Copies the glacier directories and their content to a new location.
3719

3720
    This utility function allows to select certain files only, thus
3721
    saving time at copy.
3722

3723
    Parameters
3724
    ----------
3725
    gdir : :py:class:`oggm.GlacierDirectory`
3726
        the glacier directory to copy
3727
    base_dir : str
3728
        path to the new base directory (should end with "per_glacier"
3729
        most of the time)
3730
    setup : str
3731
        set up you want the copied directory to be useful for. Currently
3732
        supported are 'all' (copy the entire directory), 'inversion'
3733
        (copy the necessary files for the inversion AND the run)
3734
        , 'run' (copy the necessary files for a dynamical run) or 'run/spinup'
3735
        (copy the necessary files and all already conducted model runs, e.g.
3736
        from a dynamic spinup).
3737

3738
    Returns
3739
    -------
3740
    New glacier directories from the copied folders
3741
    """
3742
    base_dir = os.path.abspath(base_dir)
2✔
3743
    new_dir = os.path.join(base_dir, gdir.rgi_id[:8], gdir.rgi_id[:11],
2✔
3744
                           gdir.rgi_id)
3745
    if setup == 'run':
2✔
3746
        paths = ['model_flowlines', 'inversion_params', 'outlines',
1✔
3747
                 'mb_calib', 'climate_historical', 'glacier_grid',
3748
                 'gcm_data', 'diagnostics', 'log']
3749
        paths = ('*' + p + '*' for p in paths)
1✔
3750
        shutil.copytree(gdir.dir, new_dir,
1✔
3751
                        ignore=include_patterns(*paths))
3752
    elif setup == 'inversion':
2✔
3753
        paths = ['inversion_params', 'downstream_line', 'outlines',
1✔
3754
                 'inversion_flowlines', 'glacier_grid', 'diagnostics',
3755
                 'mb_calib', 'climate_historical', 'gridded_data',
3756
                 'gcm_data', 'log']
3757
        paths = ('*' + p + '*' for p in paths)
1✔
3758
        shutil.copytree(gdir.dir, new_dir,
1✔
3759
                        ignore=include_patterns(*paths))
3760
    elif setup == 'run/spinup':
2✔
3761
        paths = ['model_flowlines', 'inversion_params', 'outlines',
1✔
3762
                 'mb_calib', 'climate_historical', 'glacier_grid',
3763
                 'gcm_data', 'diagnostics', 'log', 'model_run',
3764
                 'model_diagnostics', 'model_geometry']
3765
        paths = ('*' + p + '*' for p in paths)
1✔
3766
        shutil.copytree(gdir.dir, new_dir,
1✔
3767
                        ignore=include_patterns(*paths))
3768
    elif setup == 'all':
2✔
3769
        shutil.copytree(gdir.dir, new_dir)
2✔
3770
    else:
3771
        raise ValueError('setup not understood: {}'.format(setup))
×
3772
    return GlacierDirectory(gdir.rgi_id, base_dir=base_dir)
2✔
3773

3774

3775
def initialize_merged_gdir(main, tribs=[], glcdf=None,
9✔
3776
                           filename='climate_historical',
3777
                           input_filesuffix='',
3778
                           dem_source=None):
3779
    """Creates a new GlacierDirectory if tributaries are merged to a glacier
3780

3781
    This function should be called after centerlines.intersect_downstream_lines
3782
    and before flowline.merge_tributary_flowlines.
3783
    It will create a new GlacierDirectory, with a suitable DEM and reproject
3784
    the flowlines of the main glacier.
3785

3786
    Parameters
3787
    ----------
3788
    main : oggm.GlacierDirectory
3789
        the main glacier
3790
    tribs : list or dictionary containing oggm.GlacierDirectories
3791
        true tributary glaciers to the main glacier
3792
    glcdf: geopandas.GeoDataFrame
3793
        which contains the main glacier, will be downloaded if None
3794
    filename: str
3795
        Baseline climate file
3796
    input_filesuffix: str
3797
        Filesuffix to the climate file
3798
    dem_source: str
3799
        the DEM source to use
3800
    Returns
3801
    -------
3802
    merged : oggm.GlacierDirectory
3803
        the new GDir
3804
    """
3805
    from oggm.core.gis import define_glacier_region, merged_glacier_masks
×
3806

3807
    # If its a dict, select the relevant ones
3808
    if isinstance(tribs, dict):
×
3809
        tribs = tribs[main.rgi_id]
×
3810
    # make sure tributaries are iterable
3811
    tribs = tolist(tribs)
×
3812

3813
    # read flowlines of the Main glacier
3814
    mfls = main.read_pickle('model_flowlines')
×
3815

3816
    # ------------------------------
3817
    # 0. create the new GlacierDirectory from main glaciers GeoDataFrame
3818
    # Should be passed along, if not download it
3819
    if glcdf is None:
×
3820
        glcdf = get_rgi_glacier_entities([main.rgi_id])
×
3821
    # Get index location of the specific glacier
3822
    idx = glcdf.loc[glcdf.RGIId == main.rgi_id].index
×
3823
    maindf = glcdf.loc[idx].copy()
×
3824

3825
    # add tributary geometries to maindf
3826
    merged_geometry = maindf.loc[idx, 'geometry'].iloc[0].buffer(0)
×
3827
    for trib in tribs:
×
3828
        geom = trib.read_pickle('geometries')['polygon_hr']
×
3829
        geom = salem.transform_geometry(geom, crs=trib.grid)
×
3830
        merged_geometry = merged_geometry.union(geom).buffer(0)
×
3831

3832
    # to get the center point, maximal extensions for DEM and single Polygon:
3833
    new_geometry = merged_geometry.convex_hull
×
3834
    maindf.loc[idx, 'geometry'] = new_geometry
×
3835

3836
    # make some adjustments to the rgi dataframe
3837
    # 1. calculate central point of new glacier
3838
    #    reproject twice to avoid Warning, first to flat projection
3839
    flat_centroid = salem.transform_geometry(new_geometry,
×
3840
                                             to_crs=main.grid).centroid
3841
    #    second reprojection of centroid to wgms
3842
    new_centroid = salem.transform_geometry(flat_centroid, crs=main.grid)
×
3843
    maindf.loc[idx, 'CenLon'] = new_centroid.x
×
3844
    maindf.loc[idx, 'CenLat'] = new_centroid.y
×
3845
    # 2. update names
3846
    maindf.loc[idx, 'RGIId'] += '_merged'
×
3847
    if maindf.loc[idx, 'Name'].iloc[0] is None:
×
3848
        maindf.loc[idx, 'Name'] = main.name + ' (merged)'
×
3849
    else:
3850
        maindf.loc[idx, 'Name'] += ' (merged)'
×
3851

3852
    # finally create new Glacier Directory
3853
    # 1. set dx spacing to the one used for the flowlines
3854
    dx_method = cfg.PARAMS['grid_dx_method']
×
3855
    dx_spacing = cfg.PARAMS['fixed_dx']
×
3856
    cfg.PARAMS['grid_dx_method'] = 'fixed'
×
3857
    cfg.PARAMS['fixed_dx'] = mfls[-1].map_dx
×
3858
    merged = GlacierDirectory(maindf.loc[idx].iloc[0])
×
3859

3860
    # run define_glacier_region to get a fitting DEM and proper grid
3861
    define_glacier_region(merged, entity=maindf.loc[idx].iloc[0],
×
3862
                          source=dem_source)
3863

3864
    # write gridded data and geometries for visualization
3865
    merged_glacier_masks(merged, merged_geometry)
×
3866

3867
    # reset dx method
3868
    cfg.PARAMS['grid_dx_method'] = dx_method
×
3869
    cfg.PARAMS['fixed_dx'] = dx_spacing
×
3870

3871
    # copy main climate file, climate info and calib to new gdir
3872
    climfilename = filename + '_' + main.rgi_id + input_filesuffix + '.nc'
×
3873
    climfile = os.path.join(merged.dir, climfilename)
×
3874
    shutil.copyfile(main.get_filepath(filename, filesuffix=input_filesuffix),
×
3875
                    climfile)
3876
    _mufile = os.path.basename(merged.get_filepath('mb_calib')).split('.')
×
3877
    mufile = _mufile[0] + '_' + main.rgi_id + '.' + _mufile[1]
×
3878
    shutil.copyfile(main.get_filepath('mb_calib'),
×
3879
                    os.path.join(merged.dir, mufile))
3880

3881
    # reproject the flowlines to the new grid
3882
    for nr, fl in reversed(list(enumerate(mfls))):
×
3883

3884
        # 1. Step: Change projection to the main glaciers grid
3885
        _line = salem.transform_geometry(fl.line,
×
3886
                                         crs=main.grid, to_crs=merged.grid)
3887
        # 2. set new line
3888
        fl.set_line(_line)
×
3889

3890
        # 3. set flow to attributes
3891
        if fl.flows_to is not None:
×
3892
            fl.set_flows_to(fl.flows_to)
×
3893
        # remove inflow points, will be set by other flowlines if need be
3894
        fl.inflow_points = []
×
3895

3896
        # 5. set grid size attributes
3897
        dx = [shpg.Point(fl.line.coords[i]).distance(
×
3898
            shpg.Point(fl.line.coords[i+1]))
3899
            for i, pt in enumerate(fl.line.coords[:-1])]  # get distance
3900
        # and check if equally spaced
3901
        if not np.allclose(dx, np.mean(dx), atol=1e-2):
×
3902
            raise RuntimeError('Flowline is not evenly spaced.')
×
3903
        # dx might very slightly change, but should not
3904
        fl.dx = np.mean(dx).round(2)
×
3905
        # map_dx should stay exactly the same
3906
        # fl.map_dx = mfls[-1].map_dx
3907
        fl.dx_meter = fl.map_dx * fl.dx
×
3908

3909
        # replace flowline within the list
3910
        mfls[nr] = fl
×
3911

3912
    # Write the reprojecflowlines
3913
    merged.write_pickle(mfls, 'model_flowlines')
×
3914

3915
    return merged
×
3916

3917

3918
@entity_task(log)
9✔
3919
def gdir_to_tar(gdir, base_dir=None, delete=True):
9✔
3920
    """Writes the content of a glacier directory to a tar file.
3921

3922
    The tar file is located at the same location of the original directory.
3923
    The glacier directory objects are useless if deleted!
3924

3925
    Parameters
3926
    ----------
3927
    base_dir : str
3928
        path to the basedir where to write the directory (defaults to the
3929
        same location of the original directory)
3930
    delete : bool
3931
        delete the original directory afterwards (default)
3932

3933
    Returns
3934
    -------
3935
    the path to the tar file
3936
    """
3937

3938
    source_dir = os.path.normpath(gdir.dir)
1✔
3939
    opath = source_dir + '.tar.gz'
1✔
3940
    if base_dir is not None:
1✔
3941
        opath = os.path.join(base_dir, os.path.relpath(opath, gdir.base_dir))
1✔
3942
        mkdir(os.path.dirname(opath))
1✔
3943

3944
    with tarfile.open(opath, "w:gz") as tar:
1✔
3945
        tar.add(source_dir, arcname=os.path.basename(source_dir))
1✔
3946

3947
    if delete:
1✔
3948
        shutil.rmtree(source_dir)
1✔
3949

3950
    return opath
1✔
3951

3952

3953
def base_dir_to_tar(base_dir=None, delete=True):
9✔
3954
    """Merge the directories into 1000 bundles as tar files.
3955

3956
    The tar file is located at the same location of the original directory.
3957

3958
    Parameters
3959
    ----------
3960
    base_dir : str
3961
        path to the basedir to parse (defaults to the working directory)
3962
    to_base_dir : str
3963
        path to the basedir where to write the directory (defaults to the
3964
        same location of the original directory)
3965
    delete : bool
3966
        delete the original directory tars afterwards (default)
3967
    """
3968

3969
    if base_dir is None:
1✔
3970
        if not cfg.PATHS.get('working_dir', None):
1✔
3971
            raise ValueError("Need a valid PATHS['working_dir']!")
×
3972
        base_dir = os.path.join(cfg.PATHS['working_dir'], 'per_glacier')
1✔
3973

3974
    to_delete = []
1✔
3975
    for dirname, subdirlist, filelist in os.walk(base_dir):
1✔
3976
        # RGI60-01.00
3977
        bname = os.path.basename(dirname)
1✔
3978
        # second argument for RGI7 naming convention
3979
        if not ((len(bname) == 11 and bname[-3] == '.') or (len(bname) == 20 and bname[-3] == '-')) :
1✔
3980
            continue
1✔
3981
        opath = dirname + '.tar'
1✔
3982
        with tarfile.open(opath, 'w') as tar:
1✔
3983
            tar.add(dirname, arcname=os.path.basename(dirname))
1✔
3984
        if delete:
1✔
3985
            to_delete.append(dirname)
1✔
3986

3987
    for dirname in to_delete:
1✔
3988
        shutil.rmtree(dirname)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc