• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OGGM / oggm / 3836586642

pending completion
3836586642

Pull #1510

github

GitHub
Merge 2a19b5c72 into 8ab12c504
Pull Request #1510: Fix issue with TANDEM DEM and minor RGI7 issue

2 of 4 new or added lines in 2 files covered. (50.0%)

249 existing lines in 7 files now uncovered.

12099 of 13681 relevant lines covered (88.44%)

3.61 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.61
/oggm/utils/_workflow.py
1
"""Classes and functions used by the OGGM workflow"""
2

3
# Builtins
4
import glob
9✔
5
import os
9✔
6
import tempfile
9✔
7
import gzip
9✔
8
import json
9✔
9
import time
9✔
10
import random
9✔
11
import shutil
9✔
12
import tarfile
9✔
13
import sys
9✔
14
import signal
9✔
15
import datetime
9✔
16
import logging
9✔
17
import pickle
9✔
18
import warnings
9✔
19
import itertools
9✔
20
from collections import OrderedDict
9✔
21
from functools import partial, wraps
9✔
22
from time import gmtime, strftime
9✔
23
import fnmatch
9✔
24
import platform
9✔
25
import struct
9✔
26
import importlib
9✔
27
import re as regexp
9✔
28

29
# External libs
30
import pandas as pd
9✔
31
import numpy as np
9✔
32
from scipy import stats
9✔
33
import xarray as xr
9✔
34
import shapely.geometry as shpg
9✔
35
import shapely.affinity as shpa
9✔
36
from shapely.ops import transform as shp_trafo
9✔
37
import netCDF4
9✔
38

39
# Optional libs
40
try:
9✔
41
    import geopandas as gpd
9✔
42
except ImportError:
43
    pass
44
try:
9✔
45
    import salem
9✔
46
except ImportError:
47
    pass
48
try:
9✔
49
    from salem import wgs84
9✔
50
    from salem.gis import transform_proj
8✔
51
except ImportError:
52
    pass
53
try:
9✔
54
    import pyproj
9✔
55
except ImportError:
56
    pass
57

58

59
# Locals
60
from oggm import __version__
9✔
61
from oggm.utils._funcs import (calendardate_to_hydrodate, date_to_floatyear,
9✔
62
                               tolist, filter_rgi_name, parse_rgi_meta,
63
                               haversine, multipolygon_to_polygon, clip_scalar)
64
from oggm.utils._downloads import (get_demo_file, get_wgms_files,
9✔
65
                                   get_rgi_glacier_entities)
66
from oggm import cfg
9✔
67
from oggm.exceptions import InvalidParamsError, InvalidWorkflowError
9✔
68

69

70
# Default RGI date (median per region in RGI6)
71
RGI_DATE = {'01': 2009,
9✔
72
            '02': 2004,
73
            '03': 1999,
74
            '04': 2001,
75
            '05': 2001,
76
            '06': 2000,
77
            '07': 2008,
78
            '08': 2002,
79
            '09': 2001,
80
            '10': 2011,
81
            '11': 2003,
82
            '12': 2001,
83
            '13': 2006,
84
            '14': 2001,
85
            '15': 2001,
86
            '16': 2000,
87
            '17': 2000,
88
            '18': 1978,
89
            '19': 1989,
90
            }
91

92
# Module logger
93
log = logging.getLogger('.'.join(__name__.split('.')[:-1]))
9✔
94

95

96
def empty_cache():
9✔
97
    """Empty oggm's cache directory."""
98

99
    if os.path.exists(cfg.CACHE_DIR):
×
100
        shutil.rmtree(cfg.CACHE_DIR)
×
101
    os.makedirs(cfg.CACHE_DIR)
×
102

103

104
def expand_path(p):
9✔
105
    """Helper function for os.path.expanduser and os.path.expandvars"""
106

107
    return os.path.expandvars(os.path.expanduser(p))
×
108

109

110
def gettempdir(dirname='', reset=False, home=False):
9✔
111
    """Get a temporary directory.
112

113
    The default is to locate it in the system's temporary directory as
114
    given by python's `tempfile.gettempdir()/OGGM'. You can set `home=True` for
115
    a directory in the user's `home/tmp` folder instead (this isn't really
116
    a temporary folder but well...)
117

118
    Parameters
119
    ----------
120
    dirname : str
121
        if you want to give it a name
122
    reset : bool
123
        if it has to be emptied first.
124
    home : bool
125
        if True, returns `HOME/tmp/OGGM` instead
126

127
    Returns
128
    -------
129
    the path to the temporary directory
130
    """
131

132
    basedir = (os.path.join(os.path.expanduser('~'), 'tmp') if home
1✔
133
               else tempfile.gettempdir())
134
    return mkdir(os.path.join(basedir, 'OGGM', dirname), reset=reset)
1✔
135

136

137
# alias
138
get_temp_dir = gettempdir
9✔
139

140

141
def get_sys_info():
9✔
142
    """Returns system information as a list of tuples"""
143

144
    blob = []
8✔
145
    try:
8✔
146
        (sysname, nodename, release,
8✔
147
         version, machine, processor) = platform.uname()
148
        blob.extend([
8✔
149
            ("python", "%d.%d.%d.%s.%s" % sys.version_info[:]),
150
            ("python-bits", struct.calcsize("P") * 8),
151
            ("OS", "%s" % (sysname)),
152
            ("OS-release", "%s" % (release)),
153
            ("machine", "%s" % (machine)),
154
            ("processor", "%s" % (processor)),
155
        ])
156
    except BaseException:
×
157
        pass
×
158

159
    return blob
8✔
160

161

162
def get_env_info():
9✔
163
    """Returns env information as a list of tuples"""
164

165
    deps = [
8✔
166
        # (MODULE_NAME, f(mod) -> mod version)
167
        ("oggm", lambda mod: mod.__version__),
168
        ("numpy", lambda mod: mod.__version__),
169
        ("scipy", lambda mod: mod.__version__),
170
        ("pandas", lambda mod: mod.__version__),
171
        ("geopandas", lambda mod: mod.__version__),
172
        ("netCDF4", lambda mod: mod.__version__),
173
        ("matplotlib", lambda mod: mod.__version__),
174
        ("rasterio", lambda mod: mod.__version__),
175
        ("fiona", lambda mod: mod.__version__),
176
        ("pyproj", lambda mod: mod.__version__),
177
        ("shapely", lambda mod: mod.__version__),
178
        ("xarray", lambda mod: mod.__version__),
179
        ("dask", lambda mod: mod.__version__),
180
        ("salem", lambda mod: mod.__version__),
181
    ]
182

183
    deps_blob = list()
8✔
184
    for (modname, ver_f) in deps:
8✔
185
        try:
8✔
186
            if modname in sys.modules:
8✔
187
                mod = sys.modules[modname]
8✔
188
            else:
189
                mod = importlib.import_module(modname)
×
190
            ver = ver_f(mod)
8✔
191
            deps_blob.append((modname, ver))
8✔
192
        except BaseException:
×
193
            deps_blob.append((modname, None))
×
194

195
    return deps_blob
8✔
196

197

198
def get_git_ident():
9✔
199
    ident_str = '$Id$'
8✔
200
    if ":" not in ident_str:
8✔
201
        return 'no_git_id'
×
202
    return ident_str.replace("$", "").replace("Id:", "").replace(" ", "")
8✔
203

204

205
def show_versions(logger=None):
9✔
206
    """Prints the OGGM version and other system information.
207

208
    Parameters
209
    ----------
210
    logger : optional
211
        the logger you want to send the printouts to. If None, will use stdout
212

213
    Returns
214
    -------
215
    the output string
216
    """
217

218
    sys_info = get_sys_info()
1✔
219
    deps_blob = get_env_info()
1✔
220

221
    out = ['# OGGM environment: ']
1✔
222
    out.append("## System info:")
1✔
223
    for k, stat in sys_info:
1✔
224
        out.append("    %s: %s" % (k, stat))
1✔
225
    out.append("## Packages info:")
1✔
226
    for k, stat in deps_blob:
1✔
227
        out.append("    %s: %s" % (k, stat))
1✔
228
    out.append("    OGGM git identifier: " + get_git_ident())
1✔
229

230
    if logger is not None:
1✔
231
        logger.workflow('\n'.join(out))
1✔
232

233
    return '\n'.join(out)
1✔
234

235

236
class SuperclassMeta(type):
9✔
237
    """Metaclass for abstract base classes.
238

239
    http://stackoverflow.com/questions/40508492/python-sphinx-inherit-
240
    method-documentation-from-superclass
241
    """
242
    def __new__(mcls, classname, bases, cls_dict):
9✔
243
        cls = super().__new__(mcls, classname, bases, cls_dict)
9✔
244
        for name, member in cls_dict.items():
9✔
245
            if not getattr(member, '__doc__'):
9✔
246
                try:
9✔
247
                    member.__doc__ = getattr(bases[-1], name).__doc__
9✔
248
                except AttributeError:
9✔
249
                    pass
9✔
250
        return cls
9✔
251

252

253
class LRUFileCache():
9✔
254
    """A least recently used cache for temporary files.
255

256
    The files which are no longer used are deleted from the disk.
257
    """
258

259
    def __init__(self, l0=None, maxsize=None):
9✔
260
        """Instantiate.
261

262
        Parameters
263
        ----------
264
        l0 : list
265
            a list of file paths
266
        maxsize : int
267
            the max number of files to keep
268
        """
269
        self.files = [] if l0 is None else l0
7✔
270
        # if no maxsize is specified, use value from configuration
271
        maxsize = cfg.PARAMS['lru_maxsize'] if maxsize is None else maxsize
7✔
272
        self.maxsize = maxsize
7✔
273
        self.purge()
7✔
274

275
    def purge(self):
9✔
276
        """Remove expired entries."""
277
        if len(self.files) > self.maxsize:
7✔
278
            fpath = self.files.pop(0)
1✔
279
            if os.path.exists(fpath):
1✔
280
                os.remove(fpath)
1✔
281

282
    def append(self, fpath):
9✔
283
        """Append a file to the list."""
284
        if fpath not in self.files:
7✔
285
            self.files.append(fpath)
4✔
286
        self.purge()
7✔
287

288

289
def lazy_property(fn):
9✔
290
    """Decorator that makes a property lazy-evaluated."""
291

292
    attr_name = '_lazy_' + fn.__name__
9✔
293

294
    @property
9✔
295
    @wraps(fn)
9✔
296
    def _lazy_property(self):
9✔
297
        if not hasattr(self, attr_name):
8✔
298
            setattr(self, attr_name, fn(self))
8✔
299
        return getattr(self, attr_name)
8✔
300

301
    return _lazy_property
9✔
302

303

304
def mkdir(path, reset=False):
9✔
305
    """Checks if directory exists and if not, create one.
306

307
    Parameters
308
    ----------
309
    reset: erase the content of the directory if exists
310

311
    Returns
312
    -------
313
    the path
314
    """
315

316
    if reset and os.path.exists(path):
8✔
317
        shutil.rmtree(path)
2✔
318
        # deleting stuff takes time
319
        while os.path.exists(path):  # check if it still exists
2✔
320
            pass
×
321
    try:
8✔
322
        os.makedirs(path)
8✔
323
    except FileExistsError:
6✔
324
        pass
6✔
325
    return path
8✔
326

327

328
def include_patterns(*patterns):
9✔
329
    """Factory function that can be used with copytree() ignore parameter.
330

331
    Arguments define a sequence of glob-style patterns
332
    that are used to specify what files to NOT ignore.
333
    Creates and returns a function that determines this for each directory
334
    in the file hierarchy rooted at the source directory when used with
335
    shutil.copytree().
336

337
    https://stackoverflow.com/questions/35155382/copying-specific-files-to-a-
338
    new-folder-while-maintaining-the-original-subdirect
339
    """
340

341
    def _ignore_patterns(path, names):
2✔
342
        # This is our cuisine
343
        bname = os.path.basename(path)
2✔
344
        if 'divide' in bname or 'log' in bname:
2✔
345
            keep = []
×
346
        else:
347
            keep = set(name for pattern in patterns
2✔
348
                       for name in fnmatch.filter(names, pattern))
349
        ignore = set(name for name in names
2✔
350
                     if name not in keep and not
351
                     os.path.isdir(os.path.join(path, name)))
352
        return ignore
2✔
353

354
    return _ignore_patterns
2✔
355

356

357
class ncDataset(netCDF4.Dataset):
9✔
358
    """Wrapper around netCDF4 setting auto_mask to False"""
359

360
    def __init__(self, *args, **kwargs):
9✔
361
        super(ncDataset, self).__init__(*args, **kwargs)
7✔
362
        self.set_auto_mask(False)
7✔
363

364

365
def pipe_log(gdir, task_func_name, err=None):
9✔
366
    """Log the error in a specific directory."""
367

368
    time_str = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
5✔
369

370
    # Defaults to working directory: it must be set!
371
    if not cfg.PATHS['working_dir']:
5✔
372
        warnings.warn("Cannot log to file without a valid "
×
373
                      "cfg.PATHS['working_dir']!", RuntimeWarning)
374
        return
×
375

376
    fpath = os.path.join(cfg.PATHS['working_dir'], 'log')
5✔
377
    mkdir(fpath)
5✔
378

379
    fpath = os.path.join(fpath, gdir.rgi_id)
5✔
380

381
    sep = '; '
5✔
382

383
    if err is not None:
5✔
384
        fpath += '.ERROR'
5✔
385
    else:
386
        return  # for now
×
387
        fpath += '.SUCCESS'
388

389
    with open(fpath, 'a') as f:
5✔
390
        f.write(time_str + sep + task_func_name + sep)
5✔
391
        if err is not None:
5✔
392
            f.write(err.__class__.__name__ + sep + '{}\n'.format(err))
5✔
393
        else:
394
            f.write(sep + '\n')
×
395

396

397
class DisableLogger():
9✔
398
    """Context manager to temporarily disable all loggers."""
399

400
    def __enter__(self):
9✔
401
        logging.disable(logging.CRITICAL)
6✔
402

403
    def __exit__(self, a, b, c):
9✔
404
        logging.disable(logging.NOTSET)
6✔
405

406

407
def _timeout_handler(signum, frame):
9✔
408
    raise TimeoutError('This task was killed because of timeout')
×
409

410

411
class entity_task(object):
9✔
412
    """Decorator for common job-controlling logic.
413

414
    All tasks share common operations. This decorator is here to handle them:
415
    exceptions, logging, and (some day) database for job-controlling.
416
    """
417

418
    def __init__(self, log, writes=[], fallback=None):
9✔
419
        """Decorator syntax: ``@entity_task(log, writes=['dem', 'outlines'])``
420

421
        Parameters
422
        ----------
423
        log: logger
424
            module logger
425
        writes: list
426
            list of files that the task will write down to disk (must be
427
            available in ``cfg.BASENAMES``)
428
        fallback: python function
429
            will be executed on gdir if entity_task fails
430
        return_value: bool
431
            whether the return value from the task should be passed over
432
            to the caller or not. In general you will always want this to
433
            be true, but sometimes the task return things which are not
434
            useful in production and my use a lot of memory, etc,
435
        """
436
        self.log = log
9✔
437
        self.writes = writes
9✔
438
        self.fallback = fallback
9✔
439

440
        cnt = ['    Notes']
9✔
441
        cnt += ['    -----']
9✔
442
        cnt += ['    Files written to the glacier directory:']
9✔
443

444
        for k in sorted(writes):
9✔
445
            cnt += [cfg.BASENAMES.doc_str(k)]
9✔
446
        self.iodoc = '\n'.join(cnt)
9✔
447

448
    def __call__(self, task_func):
9✔
449
        """Decorate."""
450

451
        # Add to the original docstring
452
        if task_func.__doc__ is None:
9✔
453
            raise RuntimeError('Entity tasks should have a docstring!')
×
454

455
        task_func.__doc__ = '\n'.join((task_func.__doc__, self.iodoc))
9✔
456

457
        @wraps(task_func)
9✔
458
        def _entity_task(gdir, *, reset=None, print_log=True,
9✔
459
                         return_value=True, continue_on_error=None,
460
                         add_to_log_file=True, **kwargs):
461

462
            if reset is None:
7✔
463
                reset = not cfg.PARAMS['auto_skip_task']
7✔
464

465
            if continue_on_error is None:
7✔
466
                continue_on_error = cfg.PARAMS['continue_on_error']
7✔
467

468
            task_name = task_func.__name__
7✔
469

470
            # Filesuffix are typically used to differentiate tasks
471
            fsuffix = (kwargs.get('filesuffix', False) or
7✔
472
                       kwargs.get('output_filesuffix', False))
473
            if fsuffix:
7✔
474
                task_name += fsuffix
4✔
475

476
            # Do we need to run this task?
477
            s = gdir.get_task_status(task_name)
7✔
478
            if not reset and s and ('SUCCESS' in s):
7✔
479
                return
1✔
480

481
            # Log what we are doing
482
            if print_log:
7✔
483
                self.log.info('(%s) %s', gdir.rgi_id, task_name)
7✔
484

485
            # Run the task
486
            try:
7✔
487
                if cfg.PARAMS['task_timeout'] > 0:
7✔
488
                    signal.signal(signal.SIGALRM, _timeout_handler)
×
489
                    signal.alarm(cfg.PARAMS['task_timeout'])
×
490
                ex_t = time.time()
7✔
491
                out = task_func(gdir, **kwargs)
7✔
492
                ex_t = time.time() - ex_t
7✔
493
                if cfg.PARAMS['task_timeout'] > 0:
7✔
494
                    signal.alarm(0)
×
495
                if task_name != 'gdir_to_tar':
7✔
496
                    if add_to_log_file:
7✔
497
                        gdir.log(task_name, task_time=ex_t)
7✔
498
            except Exception as err:
6✔
499
                # Something happened
500
                out = None
6✔
501
                if add_to_log_file:
6✔
502
                    gdir.log(task_name, err=err)
5✔
503
                    pipe_log(gdir, task_name, err=err)
5✔
504
                if print_log:
6✔
505
                    self.log.error('%s occurred during task %s on %s: %s',
6✔
506
                                   type(err).__name__, task_name,
507
                                   gdir.rgi_id, str(err))
508
                if not continue_on_error:
6✔
509
                    raise
6✔
510

511
                if self.fallback is not None:
1✔
512
                    self.fallback(gdir)
1✔
513
            if return_value:
7✔
514
                return out
7✔
515

516
        _entity_task.__dict__['is_entity_task'] = True
9✔
517
        return _entity_task
9✔
518

519

520
class global_task(object):
9✔
521
    """Decorator for common job-controlling logic.
522

523
    Indicates that this task expects a list of all GlacierDirs as parameter
524
    instead of being called once per dir.
525
    """
526

527
    def __init__(self, log):
9✔
528
        """Decorator syntax: ``@global_task(log)``
529

530
        Parameters
531
        ----------
532
        log: logger
533
            module logger
534
        """
535
        self.log = log
9✔
536

537
    def __call__(self, task_func):
9✔
538
        """Decorate."""
539

540
        @wraps(task_func)
9✔
541
        def _global_task(gdirs, **kwargs):
9✔
542

543
            # Should be iterable
544
            gdirs = tolist(gdirs)
7✔
545

546
            self.log.workflow('Applying global task %s on %s glaciers',
7✔
547
                              task_func.__name__, len(gdirs))
548

549
            # Run the task
550
            return task_func(gdirs, **kwargs)
7✔
551

552
        _global_task.__dict__['is_global_task'] = True
9✔
553
        return _global_task
9✔
554

555

556
def get_ref_mb_glaciers_candidates(rgi_version=None):
9✔
557
    """Reads in the WGMS list of glaciers with available MB data.
558

559
    Can be found afterwards (and extended) in cdf.DATA['RGIXX_ref_ids'].
560
    """
561

562
    if rgi_version is None:
2✔
563
        rgi_version = cfg.PARAMS['rgi_version']
1✔
564

565
    if len(rgi_version) == 2:
2✔
566
        # We might change this one day
567
        rgi_version = rgi_version[:1]
2✔
568

569
    key = 'RGI{}0_ref_ids'.format(rgi_version)
2✔
570

571
    if key not in cfg.DATA:
2✔
572
        flink, _ = get_wgms_files()
2✔
573
        cfg.DATA[key] = flink['RGI{}0_ID'.format(rgi_version)].tolist()
2✔
574

575
    return cfg.DATA[key]
2✔
576

577

578
@global_task(log)
9✔
579
def get_ref_mb_glaciers(gdirs, y0=None, y1=None):
9✔
580
    """Get the list of glaciers we have valid mass balance measurements for.
581

582
    To be valid glaciers must have more than 5 years of measurements and
583
    be land terminating. Therefore, the list depends on the time period of the
584
    baseline climate data and this method selects them out of a list
585
    of potential candidates (`gdirs` arg).
586

587
    Parameters
588
    ----------
589
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
590
        list of glaciers to check for valid reference mass balance data
591
    y0 : int
592
        override the default behavior which is to check the available
593
        climate data (or PARAMS['ref_mb_valid_window']) and decide
594
    y1 : int
595
        override the default behavior which is to check the available
596
        climate data (or PARAMS['ref_mb_valid_window']) and decide
597

598
    Returns
599
    -------
600
    ref_gdirs : list of :py:class:`oggm.GlacierDirectory` objects
601
        list of those glaciers with valid reference mass balance data
602

603
    See Also
604
    --------
605
    get_ref_mb_glaciers_candidates
606
    """
607

608
    # Get the links
609
    ref_ids = get_ref_mb_glaciers_candidates(gdirs[0].rgi_version)
2✔
610

611
    # We remove tidewater glaciers and glaciers with < 5 years
612
    ref_gdirs = []
2✔
613
    for g in gdirs:
2✔
614
        if g.rgi_id not in ref_ids or g.is_tidewater:
2✔
615
            continue
2✔
616
        try:
2✔
617
            mbdf = g.get_ref_mb_data(y0=y0, y1=y1)
2✔
618
            if len(mbdf) >= 5:
2✔
619
                ref_gdirs.append(g)
2✔
620
        except RuntimeError as e:
1✔
621
            if 'Please process some climate data before call' in str(e):
×
622
                raise
×
623
    return ref_gdirs
2✔
624

625

626
def _chaikins_corner_cutting(line, refinements=5):
9✔
627
    """Some magic here.
628

629
    https://stackoverflow.com/questions/47068504/where-to-find-python-
630
    implementation-of-chaikins-corner-cutting-algorithm
631
    """
632
    coords = np.array(line.coords)
1✔
633

634
    for _ in range(refinements):
1✔
635
        L = coords.repeat(2, axis=0)
1✔
636
        R = np.empty_like(L)
1✔
637
        R[0] = L[0]
1✔
638
        R[2::2] = L[1:-1:2]
1✔
639
        R[1:-1:2] = L[2::2]
1✔
640
        R[-1] = L[-1]
1✔
641
        coords = L * 0.75 + R * 0.25
1✔
642

643
    return shpg.LineString(coords)
1✔
644

645

646
@entity_task(log)
9✔
647
def get_centerline_lonlat(gdir,
9✔
648
                          keep_main_only=False,
649
                          flowlines_output=False,
650
                          ensure_exterior_match=False,
651
                          geometrical_widths_output=False,
652
                          corrected_widths_output=False,
653
                          to_crs='wgs84',
654
                          simplify_line=0,
655
                          corner_cutting=0):
656
    """Helper task to convert the centerlines to a shapefile
657

658
    Parameters
659
    ----------
660
    gdir : the glacier directory
661
    flowlines_output : create a shapefile for the flowlines
662
    ensure_exterior_match : per design, OGGM centerlines match the underlying
663
    DEM grid. This may imply that they do not "touch" the exterior outlines
664
    of the glacier in vector space. Set this to True to correct for that.
665
    geometrical_widths_output : for the geometrical widths
666
    corrected_widths_output : for the corrected widths
667

668
    Returns
669
    -------
670
    a shapefile
671
    """
672
    if flowlines_output or geometrical_widths_output or corrected_widths_output:
1✔
673
        cls = gdir.read_pickle('inversion_flowlines')
1✔
674
    else:
675
        cls = gdir.read_pickle('centerlines')
1✔
676

677
    exterior = None
1✔
678
    if ensure_exterior_match:
1✔
679
        exterior = gdir.read_shapefile('outlines')
1✔
680
        # Transform to grid
681
        tra_func = partial(gdir.grid.transform, crs=exterior.crs)
1✔
682
        exterior = shpg.Polygon(shp_trafo(tra_func, exterior.geometry[0].exterior))
1✔
683

684
    tra_func = partial(gdir.grid.ij_to_crs, crs=to_crs)
1✔
685

686
    olist = []
1✔
687
    for j, cl in enumerate(cls):
1✔
688
        mm = 1 if j == (len(cls)-1) else 0
1✔
689
        if keep_main_only and mm == 0:
1✔
690
            continue
×
691
        if corrected_widths_output:
1✔
692
            le_segment = np.rint(np.max(cl.dis_on_line) * gdir.grid.dx)
1✔
693
            for wi, cur, (n1, n2), wi_m in zip(cl.widths, cl.line.coords,
1✔
694
                                               cl.normals, cl.widths_m):
695
                _l = shpg.LineString([shpg.Point(cur + wi / 2. * n1),
1✔
696
                                      shpg.Point(cur + wi / 2. * n2)])
697
                gs = dict()
1✔
698
                gs['RGIID'] = gdir.rgi_id
1✔
699
                gs['SEGMENT_ID'] = j
1✔
700
                gs['LE_SEGMENT'] = le_segment
1✔
701
                gs['MAIN'] = mm
1✔
702
                gs['WIDTH_m'] = wi_m
1✔
703
                gs['geometry'] = shp_trafo(tra_func, _l)
1✔
704
                olist.append(gs)
1✔
705
        elif geometrical_widths_output:
1✔
706
            le_segment = np.rint(np.max(cl.dis_on_line) * gdir.grid.dx)
1✔
707
            for _l, wi_m in zip(cl.geometrical_widths, cl.widths_m):
1✔
708
                gs = dict()
1✔
709
                gs['RGIID'] = gdir.rgi_id
1✔
710
                gs['SEGMENT_ID'] = j
1✔
711
                gs['LE_SEGMENT'] = le_segment
1✔
712
                gs['MAIN'] = mm
1✔
713
                gs['WIDTH_m'] = wi_m
1✔
714
                gs['geometry'] = shp_trafo(tra_func, _l)
1✔
715
                olist.append(gs)
1✔
716
        else:
717
            gs = dict()
1✔
718
            gs['RGIID'] = gdir.rgi_id
1✔
719
            gs['SEGMENT_ID'] = j
1✔
720
            gs['LE_SEGMENT'] = np.rint(np.max(cl.dis_on_line) * gdir.grid.dx)
1✔
721
            gs['MAIN'] = mm
1✔
722
            line = cl.line
1✔
723
            if ensure_exterior_match:
1✔
724
                # Extend line at the start by 10
725
                fs = shpg.LineString(line.coords[:2])
1✔
726
                # First check if this is necessary - this segment should
727
                # be within the geometry or it's already good to go
728
                if fs.within(exterior):
1✔
729
                    fs = shpa.scale(fs, xfact=3, yfact=3, origin=fs.boundary.geoms[1])
1✔
730
                    line = shpg.LineString([*fs.coords, *line.coords[2:]])
1✔
731
                # If last also extend at the end
732
                if mm == 1:
1✔
733
                    ls = shpg.LineString(line.coords[-2:])
1✔
734
                    if ls.within(exterior):
1✔
735
                        ls = shpa.scale(ls, xfact=3, yfact=3, origin=ls.boundary.geoms[0])
1✔
736
                        line = shpg.LineString([*line.coords[:-2], *ls.coords])
1✔
737

738
                # Simplify and smooth?
739
                if simplify_line:
1✔
740
                    line = line.simplify(simplify_line)
1✔
741
                if corner_cutting:
1✔
742
                    line = _chaikins_corner_cutting(line, corner_cutting)
1✔
743

744
                # Intersect with exterior geom
745
                line = line.intersection(exterior)
1✔
746
                if line.type in ['MultiLineString', 'GeometryCollection']:
1✔
747
                    # Take the longest
748
                    lens = [il.length for il in line.geoms]
×
749
                    line = line.geoms[np.argmax(lens)]
×
750

751
                # Recompute length
752
                gs['LE_SEGMENT'] = np.rint(line.length * gdir.grid.dx)
1✔
753
            gs['geometry'] = shp_trafo(tra_func, line)
1✔
754
            olist.append(gs)
1✔
755

756
    return olist
1✔
757

758

759
def _write_shape_to_disk(gdf, fpath, to_tar=False):
9✔
760
    """Write a shapefile to disk with optional compression
761

762
    Parameters
763
    ----------
764
    gdf : gpd.GeoDataFrame
765
        the data to write
766
    fpath : str
767
        where to writ the file - should be ending in shp
768
    to_tar : bool
769
        put the files in a .tar file. If cfg.PARAMS['use_compression'],
770
        also compress to .gz
771
    """
772

773
    if '.shp' not in fpath:
7✔
774
        raise ValueError('File ending should be .shp')
×
775

776
    gdf.to_file(fpath)
7✔
777

778
    if not to_tar:
7✔
779
        # Done here
780
        return
3✔
781

782
    # Write them in tar
783
    fpath = fpath.replace('.shp', '.tar')
7✔
784
    mode = 'w'
7✔
785
    if cfg.PARAMS['use_compression']:
7✔
786
        fpath += '.gz'
7✔
787
        mode += ':gz'
7✔
788
    if os.path.exists(fpath):
7✔
789
        os.remove(fpath)
5✔
790

791
    # List all files that were written as shape
792
    fs = glob.glob(fpath.replace('.gz', '').replace('.tar', '.*'))
7✔
793
    # Add them to tar
794
    with tarfile.open(fpath, mode=mode) as tf:
7✔
795
        for ff in fs:
7✔
796
            tf.add(ff, arcname=os.path.basename(ff))
7✔
797

798
    # Delete the old ones
799
    for ff in fs:
7✔
800
        os.remove(ff)
7✔
801

802

803
@global_task(log)
9✔
804
def write_centerlines_to_shape(gdirs, *, path=True, to_tar=False,
9✔
805
                               to_crs='EPSG:4326',
806
                               filesuffix='', flowlines_output=False,
807
                               ensure_exterior_match=False,
808
                               geometrical_widths_output=False,
809
                               corrected_widths_output=False,
810
                               keep_main_only=False,
811
                               simplify_line=0,
812
                               corner_cutting=0):
813
    """Write the centerlines to a shapefile.
814

815
    Parameters
816
    ----------
817
    gdirs:
818
        the list of GlacierDir to process.
819
    path: str or bool
820
        Set to "True" in order  to store the shape in the working directory
821
        Set to a str path to store the file to your chosen location
822
    to_tar : bool
823
        put the files in a .tar file. If cfg.PARAMS['use_compression'],
824
        also compress to .gz
825
    filesuffix : str
826
        add a suffix to the output file
827
    flowlines_output : bool
828
        output the OGGM flowlines instead of the centerlines
829
    geometrical_widths_output : bool
830
        output the geometrical widths instead of the centerlines
831
    corrected_widths_output : bool
832
        output the corrected widths instead of the centerlines
833
    ensure_exterior_match : bool
834
        per design, the centerlines will match the underlying DEM grid.
835
        This may imply that they do not "touch" the exterior outlines of the
836
        glacier in vector space. Set this to True to correct for that.
837
    to_crs : str
838
        write the shape to another coordinate reference system (CRS)
839
    keep_main_only : bool
840
        write only the main flowlines to the output files
841
    simplify_line : float
842
        apply shapely's `simplify` method to the line before writing. It is
843
        a purely cosmetic option, although glacier length will be affected.
844
        All points in the simplified object will be within the tolerance
845
        distance of the original geometry (units: grid points). A good
846
        value to test first is 0.5
847
    corner_cutting : int
848
        apply the Chaikin's corner cutting algorithm to the geometry before
849
        writing. The integer represents the number of refinements to apply.
850
        A good first value to test is 5.
851
    """
852
    from oggm.workflow import execute_entity_task
3✔
853

854
    if path is True:
3✔
855
        path = os.path.join(cfg.PATHS['working_dir'],
2✔
856
                            'glacier_centerlines' + filesuffix + '.shp')
857

858
    _to_crs = salem.check_crs(to_crs)
3✔
859
    if not _to_crs:
3✔
860
        raise InvalidParamsError(f'CRS not understood: {to_crs}')
×
861

862
    log.workflow('write_centerlines_to_shape on {} ...'.format(path))
3✔
863

864
    olist = execute_entity_task(get_centerline_lonlat, gdirs,
3✔
865
                                flowlines_output=flowlines_output,
866
                                ensure_exterior_match=ensure_exterior_match,
867
                                geometrical_widths_output=geometrical_widths_output,
868
                                corrected_widths_output=corrected_widths_output,
869
                                keep_main_only=keep_main_only,
870
                                simplify_line=simplify_line,
871
                                corner_cutting=corner_cutting,
872
                                to_crs=_to_crs)
873
    # filter for none
874
    olist = [o for o in olist if o is not None]
3✔
875
    odf = gpd.GeoDataFrame(itertools.chain.from_iterable(olist))
3✔
876
    odf = odf.sort_values(by='RGIID')
3✔
877
    odf.crs = to_crs
3✔
878
    # Sanity checks to avoid bad surprises
879
    gtype = np.array([g.type for g in odf.geometry])
3✔
880
    if 'GeometryCollection' in gtype:
3✔
881
        errdf = odf.loc[gtype == 'GeometryCollection']
3✔
882
        with warnings.catch_warnings():
3✔
883
            # errdf.length warns because of use of wgs84
884
            warnings.filterwarnings("ignore", category=UserWarning)
3✔
885
            if not np.all(errdf.length) == 0:
3✔
886
                errdf = errdf.loc[errdf.length > 0]
×
887
                raise RuntimeError('Some geometries are non-empty GeometryCollection '
×
888
                                   f'at RGI Ids: {errdf.RGIID.values}')
889
    _write_shape_to_disk(odf, path, to_tar=to_tar)
3✔
890

891

892
def demo_glacier_id(key):
9✔
893
    """Get the RGI id of a glacier by name or key: None if not found."""
894

895
    df = cfg.DATA['demo_glaciers']
1✔
896

897
    # Is the name in key?
898
    s = df.loc[df.Key.str.lower() == key.lower()]
1✔
899
    if len(s) == 1:
1✔
900
        return s.index[0]
1✔
901

902
    # Is the name in name?
903
    s = df.loc[df.Name.str.lower() == key.lower()]
1✔
904
    if len(s) == 1:
1✔
905
        return s.index[0]
1✔
906

907
    # Is the name in Ids?
908
    try:
1✔
909
        s = df.loc[[key]]
1✔
910
        if len(s) == 1:
1✔
911
            return s.index[0]
1✔
912
    except KeyError:
1✔
913
        pass
1✔
914

915
    return None
1✔
916

917

918
class compile_to_netcdf(object):
9✔
919
    """Decorator for common compiling NetCDF files logic.
920

921
    All compile_* tasks can be optimized the same way, by using temporary
922
    files and merging them afterwards.
923
    """
924

925
    def __init__(self, log):
9✔
926
        """Decorator syntax: ``@compile_to_netcdf(log, n_tmp_files=1000)``
927

928
        Parameters
929
        ----------
930
        log: logger
931
            module logger
932
        tmp_file_size: int
933
            number of glacier directories per temporary files
934
        """
935
        self.log = log
9✔
936

937
    def __call__(self, task_func):
9✔
938
        """Decorate."""
939

940
        @wraps(task_func)
9✔
941
        def _compile_to_netcdf(gdirs, input_filesuffix='',
9✔
942
                               output_filesuffix='',
943
                               path=True,
944
                               tmp_file_size=1000,
945
                               **kwargs):
946

947
            if not output_filesuffix:
5✔
948
                output_filesuffix = input_filesuffix
5✔
949

950
            gdirs = tolist(gdirs)
5✔
951

952
            if cfg.PARAMS['hydro_month_nh'] != cfg.PARAMS['hydro_month_sh']:
5✔
953
                # Check some stuff
954
                hemisphere = [gd.hemisphere for gd in gdirs]
4✔
955
                if len(np.unique(hemisphere)) == 2:
4✔
956
                    if path is not True:
1✔
957
                        raise InvalidParamsError('With glaciers from both '
×
958
                                                 'hemispheres, set `path=True`.')
959
                    self.log.workflow('compile_*: you gave me a list of gdirs from '
1✔
960
                                      'both hemispheres. I am going to write two '
961
                                      'files out of it with _sh and _nh suffixes.')
962
                    _gdirs = [gd for gd in gdirs if gd.hemisphere == 'sh']
1✔
963
                    _compile_to_netcdf(_gdirs,
1✔
964
                                       input_filesuffix=input_filesuffix,
965
                                       output_filesuffix=output_filesuffix + '_sh',
966
                                       path=True,
967
                                       tmp_file_size=tmp_file_size,
968
                                       **kwargs)
969
                    _gdirs = [gd for gd in gdirs if gd.hemisphere == 'nh']
1✔
970
                    _compile_to_netcdf(_gdirs,
1✔
971
                                       input_filesuffix=input_filesuffix,
972
                                       output_filesuffix=output_filesuffix + '_nh',
973
                                       path=True,
974
                                       tmp_file_size=tmp_file_size,
975
                                       **kwargs)
976
                    return
1✔
977

978
            task_name = task_func.__name__
5✔
979
            output_base = task_name.replace('compile_', '')
5✔
980

981
            if path is True:
5✔
982
                path = os.path.join(cfg.PATHS['working_dir'],
5✔
983
                                    output_base + output_filesuffix + '.nc')
984

985
            self.log.workflow('Applying %s on %d gdirs.',
5✔
986
                              task_name, len(gdirs))
987

988
            # Run the task
989
            # If small gdir size, no need for temporary files
990
            if len(gdirs) < tmp_file_size or not path:
5✔
991
                return task_func(gdirs, input_filesuffix=input_filesuffix,
5✔
992
                                 path=path, **kwargs)
993

994
            # Otherwise, divide and conquer
995
            sub_gdirs = [gdirs[i: i + tmp_file_size] for i in
1✔
996
                         range(0, len(gdirs), tmp_file_size)]
997

998
            tmp_paths = [os.path.join(cfg.PATHS['working_dir'],
1✔
999
                                      'compile_tmp_{:06d}.nc'.format(i))
1000
                         for i in range(len(sub_gdirs))]
1001

1002
            try:
1✔
1003
                for spath, sgdirs in zip(tmp_paths, sub_gdirs):
1✔
1004
                    task_func(sgdirs, input_filesuffix=input_filesuffix,
1✔
1005
                              path=spath, **kwargs)
1006
            except BaseException:
×
1007
                # If something wrong, delete the tmp files
1008
                for f in tmp_paths:
×
1009
                    try:
×
1010
                        os.remove(f)
×
1011
                    except FileNotFoundError:
×
1012
                        pass
×
1013
                raise
×
1014

1015
            # Ok, now merge and return
1016
            try:
1✔
1017
                with xr.open_mfdataset(tmp_paths, combine='nested',
1✔
1018
                                       concat_dim='rgi_id') as ds:
1019
                    # the .load() is actually quite uncool here, but it solves
1020
                    # an unbelievable stalling problem in multiproc
1021
                    ds.load().to_netcdf(path)
1✔
1022
            except TypeError:
×
1023
                # xr < v 0.13
1024
                with xr.open_mfdataset(tmp_paths, concat_dim='rgi_id') as ds:
×
1025
                    # the .load() is actually quite uncool here, but it solves
1026
                    # an unbelievable stalling problem in multiproc
1027
                    ds.load().to_netcdf(path)
×
1028

1029
            # We can't return the dataset without loading it, so we don't
1030
            return None
1✔
1031

1032
        return _compile_to_netcdf
9✔
1033

1034

1035
@entity_task(log)
9✔
1036
def merge_consecutive_run_outputs(gdir,
9✔
1037
                                  input_filesuffix_1=None,
1038
                                  input_filesuffix_2=None,
1039
                                  output_filesuffix=None,
1040
                                  delete_input=False):
1041
    """Merges the output of two model_diagnostics files into one.
1042

1043
    It assumes that the last time of file1 is equal to the first time of file2.
1044

1045
    Parameters
1046
    ----------
1047
    gdir : the glacier directory
1048
    input_filesuffix_1 : str
1049
        how to recognize the first file
1050
    input_filesuffix_2 : str
1051
        how to recognize the second file
1052
    output_filesuffix : str
1053
        where to write the output (default: no suffix)
1054

1055
    Returns
1056
    -------
1057
    The merged dataset
1058
    """
1059

1060
    # Read in the input files and check
1061
    fp1 = gdir.get_filepath('model_diagnostics', filesuffix=input_filesuffix_1)
2✔
1062
    with xr.open_dataset(fp1) as ds:
2✔
1063
        ds1 = ds.load()
2✔
1064
    fp2 = gdir.get_filepath('model_diagnostics', filesuffix=input_filesuffix_2)
2✔
1065
    with xr.open_dataset(fp2) as ds:
2✔
1066
        ds2 = ds.load()
2✔
1067
    if ds1.time[-1] != ds2.time[0]:
2✔
1068
        raise InvalidWorkflowError('The two files are incompatible by time')
×
1069

1070
    # Samity check for all variables as well
1071
    for v in ds1:
2✔
1072
        if not np.all(np.isfinite(ds1[v].data[-1])):
2✔
1073
            # This is the last year of hydro output - we will discard anyway
1074
            continue
1✔
1075
        if np.allclose(ds1[v].data[-1], ds2[v].data[0]):
2✔
1076
            # This means that we're OK - the two match
1077
            continue
2✔
1078

1079
        # This has to be a bucket of some sort, probably snow or calving
1080
        if len(ds2[v].data.shape) == 1:
2✔
1081
            if ds2[v].data[0] != 0:
2✔
1082
                raise InvalidWorkflowError('The two files seem incompatible '
×
1083
                                           f'by data on variable : {v}')
1084
            bucket = ds1[v].data[-1]
2✔
1085
        elif len(ds2[v].data.shape) == 2:
1✔
1086
            if ds2[v].data[0, 0] != 0:
1✔
1087
                raise InvalidWorkflowError('The two files seem incompatible '
×
1088
                                           f'by data on variable : {v}')
1089
            bucket = ds1[v].data[-1, -1]
1✔
1090
        # Carry it to the rest
1091
        ds2[v] = ds2[v] + bucket
2✔
1092

1093
    # Merge by removing the last step of file 1 and delete the files if asked
1094
    out_ds = xr.concat([ds1.isel(time=slice(0, -1)), ds2], dim='time')
2✔
1095
    if delete_input:
2✔
1096
        os.remove(fp1)
1✔
1097
        os.remove(fp2)
1✔
1098
    # Write out and return
1099
    fp = gdir.get_filepath('model_diagnostics', filesuffix=output_filesuffix)
2✔
1100
    out_ds.to_netcdf(fp)
2✔
1101
    return out_ds
2✔
1102

1103

1104
@global_task(log)
9✔
1105
@compile_to_netcdf(log)
9✔
1106
def compile_run_output(gdirs, path=True, input_filesuffix='',
9✔
1107
                       use_compression=True):
1108
    """Compiles the output of the model runs of several gdirs into one file.
1109

1110
    Parameters
1111
    ----------
1112
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1113
        the glacier directories to process
1114
    path : str
1115
        where to store (default is on the working dir).
1116
        Set to `False` to disable disk storage.
1117
    input_filesuffix : str
1118
        the filesuffix of the files to be compiled
1119
    use_compression : bool
1120
        use zlib compression on the output netCDF files
1121

1122
    Returns
1123
    -------
1124
    ds : :py:class:`xarray.Dataset`
1125
        compiled output
1126
    """
1127

1128
    # Get the dimensions of all this
1129
    rgi_ids = [gd.rgi_id for gd in gdirs]
5✔
1130

1131
    # To find the longest time, we have to open all files unfortunately
1132
    time_info = {}
5✔
1133
    time_keys = ['hydro_year', 'hydro_month', 'calendar_year', 'calendar_month']
5✔
1134
    for gd in gdirs:
5✔
1135
        fp = gd.get_filepath('model_diagnostics', filesuffix=input_filesuffix)
5✔
1136
        try:
5✔
1137
            with ncDataset(fp) as ds:
5✔
1138
                time = ds.variables['time'][:]
5✔
1139
                if 'time' not in time_info:
5✔
1140
                    time_info['time'] = time
5✔
1141
                    for cn in time_keys:
5✔
1142
                        time_info[cn] = ds.variables[cn][:]
5✔
1143
                else:
1144
                    # Here we may need to append or add stuff
1145
                    ot = time_info['time']
4✔
1146
                    if time[0] > ot[-1] or ot[-1] < time[0]:
4✔
1147
                        raise InvalidWorkflowError('Trying to compile output '
×
1148
                                                   'without overlap.')
1149
                    if time[-1] > ot[-1]:
4✔
1150
                        p = np.nonzero(time == ot[-1])[0][0] + 1
×
1151
                        time_info['time'] = np.append(ot, time[p:])
×
1152
                        for cn in time_keys:
×
1153
                            time_info[cn] = np.append(time_info[cn],
×
1154
                                                      ds.variables[cn][p:])
1155
                    if time[0] < ot[0]:
4✔
1156
                        p = np.nonzero(time == ot[0])[0][0]
×
1157
                        time_info['time'] = np.append(time[:p], ot)
×
1158
                        for cn in time_keys:
×
1159
                            time_info[cn] = np.append(ds.variables[cn][:p],
×
1160
                                                      time_info[cn])
1161

1162
            # If this worked, keep it as template
1163
            ppath = fp
5✔
1164
        except FileNotFoundError:
×
1165
            pass
×
1166

1167
    if 'time' not in time_info:
5✔
1168
        raise RuntimeError('Found no valid glaciers!')
×
1169

1170
    # OK found it, open it and prepare the output
1171
    with xr.open_dataset(ppath) as ds_diag:
5✔
1172

1173
        # Prepare output
1174
        ds = xr.Dataset()
5✔
1175

1176
        # Global attributes
1177
        ds.attrs['description'] = 'OGGM model output'
5✔
1178
        ds.attrs['oggm_version'] = __version__
5✔
1179
        ds.attrs['calendar'] = '365-day no leap'
5✔
1180
        ds.attrs['creation_date'] = strftime("%Y-%m-%d %H:%M:%S", gmtime())
5✔
1181

1182
        # Copy coordinates
1183
        time = time_info['time']
5✔
1184
        ds.coords['time'] = ('time', time)
5✔
1185
        ds['time'].attrs['description'] = 'Floating hydrological year'
5✔
1186
        # New coord
1187
        ds.coords['rgi_id'] = ('rgi_id', rgi_ids)
5✔
1188
        ds['rgi_id'].attrs['description'] = 'RGI glacier identifier'
5✔
1189
        # This is just taken from there
1190
        for cn in ['hydro_year', 'hydro_month',
5✔
1191
                   'calendar_year', 'calendar_month']:
1192
            ds.coords[cn] = ('time', time_info[cn])
5✔
1193
            ds[cn].attrs['description'] = ds_diag[cn].attrs['description']
5✔
1194

1195
        # We decide on the name of "3d" variables in case we have daily
1196
        # output
1197
        name_2d_dim = 'day_2d' if 'day_2d' in ds_diag.dims else 'month_2d'
5✔
1198

1199
        # Prepare the 2D variables
1200
        shape = (len(time), len(rgi_ids))
5✔
1201
        out_2d = dict()
5✔
1202
        for vn in ds_diag.data_vars:
5✔
1203
            if name_2d_dim in ds_diag[vn].dims:
5✔
1204
                continue
1✔
1205
            var = dict()
5✔
1206
            var['data'] = np.full(shape, np.nan)
5✔
1207
            var['attrs'] = ds_diag[vn].attrs
5✔
1208
            out_2d[vn] = var
5✔
1209

1210
        # 1D Variables
1211
        out_1d = dict()
5✔
1212
        for vn, attrs in [('water_level', {'description': 'Calving water level',
5✔
1213
                                           'units': 'm'}),
1214
                          ('glen_a', {'description': 'Simulation Glen A',
1215
                                      'units': ''}),
1216
                          ('fs', {'description': 'Simulation sliding parameter',
1217
                                  'units': ''}),
1218
                          ]:
1219
            var = dict()
5✔
1220
            var['data'] = np.full(len(rgi_ids), np.nan)
5✔
1221
            var['attrs'] = attrs
5✔
1222
            out_1d[vn] = var
5✔
1223

1224
        # Maybe 3D?
1225
        out_3d = dict()
5✔
1226
        if name_2d_dim in ds_diag.dims:
5✔
1227
            # We have some 3d vars
1228
            month_2d = ds_diag[name_2d_dim]
1✔
1229
            ds.coords[name_2d_dim] = (name_2d_dim, month_2d.data)
1✔
1230
            cn = f'calendar_{name_2d_dim}'
1✔
1231
            ds.coords[cn] = (name_2d_dim, ds_diag[cn].values)
1✔
1232

1233
            shape = (len(time), len(month_2d), len(rgi_ids))
1✔
1234
            for vn in ds_diag.data_vars:
1✔
1235
                if name_2d_dim not in ds_diag[vn].dims:
1✔
1236
                    continue
1✔
1237
                var = dict()
1✔
1238
                var['data'] = np.full(shape, np.nan)
1✔
1239
                var['attrs'] = ds_diag[vn].attrs
1✔
1240
                out_3d[vn] = var
1✔
1241

1242
    # Read out
1243
    for i, gdir in enumerate(gdirs):
5✔
1244
        try:
5✔
1245
            ppath = gdir.get_filepath('model_diagnostics',
5✔
1246
                                      filesuffix=input_filesuffix)
1247
            with ncDataset(ppath) as ds_diag:
5✔
1248
                it = ds_diag.variables['time'][:]
5✔
1249
                a = np.nonzero(time == it[0])[0][0]
5✔
1250
                b = np.nonzero(time == it[-1])[0][0] + 1
5✔
1251
                for vn, var in out_2d.items():
5✔
1252
                    var['data'][a:b, i] = ds_diag.variables[vn][:]
5✔
1253
                for vn, var in out_3d.items():
5✔
1254
                    var['data'][a:b, :, i] = ds_diag.variables[vn][:]
1✔
1255
                for vn, var in out_1d.items():
5✔
1256
                    var['data'][i] = ds_diag.getncattr(vn)
5✔
1257
        except FileNotFoundError:
×
1258
            pass
×
1259

1260
    # To xarray
1261
    for vn, var in out_2d.items():
5✔
1262
        # Backwards compatibility - to remove one day...
1263
        for r in ['_m3', '_m2', '_myr', '_m']:
5✔
1264
            # Order matters
1265
            vn = regexp.sub(r + '$', '', vn)
5✔
1266
        ds[vn] = (('time', 'rgi_id'), var['data'])
5✔
1267
        ds[vn].attrs = var['attrs']
5✔
1268
    for vn, var in out_3d.items():
5✔
1269
        ds[vn] = (('time', name_2d_dim, 'rgi_id'), var['data'])
1✔
1270
        ds[vn].attrs = var['attrs']
1✔
1271
    for vn, var in out_1d.items():
5✔
1272
        ds[vn] = (('rgi_id', ), var['data'])
5✔
1273
        ds[vn].attrs = var['attrs']
5✔
1274

1275
    # To file?
1276
    if path:
5✔
1277
        enc_var = {'dtype': 'float32'}
5✔
1278
        if use_compression:
5✔
1279
            enc_var['complevel'] = 5
5✔
1280
            enc_var['zlib'] = True
5✔
1281
        encoding = {v: enc_var for v in ds.data_vars}
5✔
1282
        ds.to_netcdf(path, encoding=encoding)
5✔
1283

1284
    return ds
5✔
1285

1286

1287
@global_task(log)
9✔
1288
@compile_to_netcdf(log)
9✔
1289
def compile_climate_input(gdirs, path=True, filename='climate_historical',
9✔
1290
                          input_filesuffix='', use_compression=True):
1291
    """Merge the climate input files in the glacier directories into one file.
1292

1293
    Parameters
1294
    ----------
1295
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1296
        the glacier directories to process
1297
    path : str
1298
        where to store (default is on the working dir).
1299
        Set to `False` to disable disk storage.
1300
    filename : str
1301
        BASENAME of the climate input files
1302
    input_filesuffix : str
1303
        the filesuffix of the files to be compiled
1304
    use_compression : bool
1305
        use zlib compression on the output netCDF files
1306

1307
    Returns
1308
    -------
1309
    ds : :py:class:`xarray.Dataset`
1310
        compiled climate data
1311
    """
1312

1313
    # Get the dimensions of all this
1314
    rgi_ids = [gd.rgi_id for gd in gdirs]
2✔
1315

1316
    # The first gdir might have blown up, try some others
1317
    i = 0
2✔
1318
    while True:
2✔
1319
        if i >= len(gdirs):
2✔
1320
            raise RuntimeError('Found no valid glaciers!')
×
1321
        try:
2✔
1322
            pgdir = gdirs[i]
2✔
1323
            ppath = pgdir.get_filepath(filename=filename,
2✔
1324
                                       filesuffix=input_filesuffix)
1325
            with xr.open_dataset(ppath) as ds_clim:
2✔
1326
                ds_clim.time.values
2✔
1327
            # If this worked, we have a valid gdir
1328
            break
2✔
1329
        except BaseException:
×
1330
            i += 1
×
1331

1332
    with xr.open_dataset(ppath) as ds_clim:
2✔
1333
        cyrs = ds_clim['time.year']
2✔
1334
        cmonths = ds_clim['time.month']
2✔
1335
        has_grad = 'gradient' in ds_clim.variables
2✔
1336
        sm = cfg.PARAMS['hydro_month_' + pgdir.hemisphere]
2✔
1337
        yrs, months = calendardate_to_hydrodate(cyrs, cmonths, start_month=sm)
2✔
1338
        assert months[0] == 1, 'Expected the first hydro month to be 1'
2✔
1339
        time = date_to_floatyear(yrs, months)
2✔
1340

1341
    # Prepare output
1342
    ds = xr.Dataset()
2✔
1343

1344
    # Global attributes
1345
    ds.attrs['description'] = 'OGGM model output'
2✔
1346
    ds.attrs['oggm_version'] = __version__
2✔
1347
    ds.attrs['calendar'] = '365-day no leap'
2✔
1348
    ds.attrs['creation_date'] = strftime("%Y-%m-%d %H:%M:%S", gmtime())
2✔
1349

1350
    # Coordinates
1351
    ds.coords['time'] = ('time', time)
2✔
1352
    ds.coords['rgi_id'] = ('rgi_id', rgi_ids)
2✔
1353
    ds.coords['hydro_year'] = ('time', yrs)
2✔
1354
    ds.coords['hydro_month'] = ('time', months)
2✔
1355
    ds.coords['calendar_year'] = ('time', cyrs.data)
2✔
1356
    ds.coords['calendar_month'] = ('time', cmonths.data)
2✔
1357
    ds['time'].attrs['description'] = 'Floating hydrological year'
2✔
1358
    ds['rgi_id'].attrs['description'] = 'RGI glacier identifier'
2✔
1359
    ds['hydro_year'].attrs['description'] = 'Hydrological year'
2✔
1360
    ds['hydro_month'].attrs['description'] = 'Hydrological month'
2✔
1361
    ds['calendar_year'].attrs['description'] = 'Calendar year'
2✔
1362
    ds['calendar_month'].attrs['description'] = 'Calendar month'
2✔
1363

1364
    shape = (len(time), len(rgi_ids))
2✔
1365
    temp = np.zeros(shape) * np.NaN
2✔
1366
    prcp = np.zeros(shape) * np.NaN
2✔
1367
    if has_grad:
2✔
1368
        grad = np.zeros(shape) * np.NaN
×
1369
    ref_hgt = np.zeros(len(rgi_ids)) * np.NaN
2✔
1370
    ref_pix_lon = np.zeros(len(rgi_ids)) * np.NaN
2✔
1371
    ref_pix_lat = np.zeros(len(rgi_ids)) * np.NaN
2✔
1372

1373
    for i, gdir in enumerate(gdirs):
2✔
1374
        try:
2✔
1375
            ppath = gdir.get_filepath(filename=filename,
2✔
1376
                                      filesuffix=input_filesuffix)
1377
            with xr.open_dataset(ppath) as ds_clim:
2✔
1378
                prcp[:, i] = ds_clim.prcp.values
2✔
1379
                temp[:, i] = ds_clim.temp.values
2✔
1380
                if has_grad:
2✔
1381
                    grad[:, i] = ds_clim.gradient
×
1382
                ref_hgt[i] = ds_clim.ref_hgt
2✔
1383
                ref_pix_lon[i] = ds_clim.ref_pix_lon
2✔
1384
                ref_pix_lat[i] = ds_clim.ref_pix_lat
2✔
1385
        except BaseException:
×
1386
            pass
×
1387

1388
    ds['temp'] = (('time', 'rgi_id'), temp)
2✔
1389
    ds['temp'].attrs['units'] = 'DegC'
2✔
1390
    ds['temp'].attrs['description'] = '2m Temperature at height ref_hgt'
2✔
1391
    ds['prcp'] = (('time', 'rgi_id'), prcp)
2✔
1392
    ds['prcp'].attrs['units'] = 'kg m-2'
2✔
1393
    ds['prcp'].attrs['description'] = 'total monthly precipitation amount'
2✔
1394
    if has_grad:
2✔
1395
        ds['grad'] = (('time', 'rgi_id'), grad)
×
1396
        ds['grad'].attrs['units'] = 'degC m-1'
×
1397
        ds['grad'].attrs['description'] = 'temperature gradient'
×
1398
    ds['ref_hgt'] = ('rgi_id', ref_hgt)
2✔
1399
    ds['ref_hgt'].attrs['units'] = 'm'
2✔
1400
    ds['ref_hgt'].attrs['description'] = 'reference height'
2✔
1401
    ds['ref_pix_lon'] = ('rgi_id', ref_pix_lon)
2✔
1402
    ds['ref_pix_lon'].attrs['description'] = 'longitude'
2✔
1403
    ds['ref_pix_lat'] = ('rgi_id', ref_pix_lat)
2✔
1404
    ds['ref_pix_lat'].attrs['description'] = 'latitude'
2✔
1405

1406
    if path:
2✔
1407
        enc_var = {'dtype': 'float32'}
2✔
1408
        if use_compression:
2✔
1409
            enc_var['complevel'] = 5
2✔
1410
            enc_var['zlib'] = True
2✔
1411
        vars = ['temp', 'prcp']
2✔
1412
        if has_grad:
2✔
1413
            vars += ['grad']
×
1414
        encoding = {v: enc_var for v in vars}
2✔
1415
        ds.to_netcdf(path, encoding=encoding)
2✔
1416
    return ds
2✔
1417

1418

1419
@global_task(log)
9✔
1420
def compile_task_log(gdirs, task_names=[], filesuffix='', path=True,
9✔
1421
                     append=True):
1422
    """Gathers the log output for the selected task(s)
1423

1424
    Parameters
1425
    ----------
1426
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1427
        the glacier directories to process
1428
    task_names : list of str
1429
        The tasks to check for
1430
    filesuffix : str
1431
        add suffix to output file
1432
    path:
1433
        Set to `True` in order  to store the info in the working directory
1434
        Set to a path to store the file to your chosen location
1435
        Set to `False` to omit disk storage
1436
    append:
1437
        If a task log file already exists in the working directory, the new
1438
        logs will be added to the existing file
1439

1440
    Returns
1441
    -------
1442
    out : :py:class:`pandas.DataFrame`
1443
        log output
1444
    """
1445

1446
    out_df = []
3✔
1447
    for gdir in gdirs:
3✔
1448
        d = OrderedDict()
3✔
1449
        d['rgi_id'] = gdir.rgi_id
3✔
1450
        for task_name in task_names:
3✔
1451
            ts = gdir.get_task_status(task_name)
3✔
1452
            if ts is None:
3✔
1453
                ts = ''
1✔
1454
            d[task_name] = ts.replace(',', ' ')
3✔
1455
        out_df.append(d)
3✔
1456

1457
    out = pd.DataFrame(out_df).set_index('rgi_id')
3✔
1458
    if path:
3✔
1459
        if path is True:
3✔
1460
            path = os.path.join(cfg.PATHS['working_dir'],
3✔
1461
                                'task_log' + filesuffix + '.csv')
1462
        if os.path.exists(path) and append:
3✔
1463
            odf = pd.read_csv(path, index_col=0)
1✔
1464
            out = odf.join(out, rsuffix='_n')
1✔
1465
        out.to_csv(path)
3✔
1466
    return out
3✔
1467

1468

1469
@global_task(log)
9✔
1470
def compile_task_time(gdirs, task_names=[], filesuffix='', path=True,
9✔
1471
                      append=True):
1472
    """Gathers the time needed for the selected task(s) to run
1473

1474
    Parameters
1475
    ----------
1476
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1477
        the glacier directories to process
1478
    task_names : list of str
1479
        The tasks to check for
1480
    filesuffix : str
1481
        add suffix to output file
1482
    path:
1483
        Set to `True` in order  to store the info in the working directory
1484
        Set to a path to store the file to your chosen location
1485
        Set to `False` to omit disk storage
1486
    append:
1487
        If a task log file already exists in the working directory, the new
1488
        logs will be added to the existing file
1489

1490
    Returns
1491
    -------
1492
    out : :py:class:`pandas.DataFrame`
1493
        log output
1494
    """
1495

1496
    out_df = []
1✔
1497
    for gdir in gdirs:
1✔
1498
        d = OrderedDict()
1✔
1499
        d['rgi_id'] = gdir.rgi_id
1✔
1500
        for task_name in task_names:
1✔
1501
            d[task_name] = gdir.get_task_time(task_name)
1✔
1502
        out_df.append(d)
1✔
1503

1504
    out = pd.DataFrame(out_df).set_index('rgi_id')
1✔
1505
    if path:
1✔
1506
        if path is True:
1✔
1507
            path = os.path.join(cfg.PATHS['working_dir'],
1✔
1508
                                'task_time' + filesuffix + '.csv')
1509
        if os.path.exists(path) and append:
1✔
1510
            odf = pd.read_csv(path, index_col=0)
1✔
1511
            out = odf.join(out, rsuffix='_n')
1✔
1512
        out.to_csv(path)
1✔
1513
    return out
1✔
1514

1515

1516
@entity_task(log)
9✔
1517
def glacier_statistics(gdir, inversion_only=False, apply_func=None):
9✔
1518
    """Gather as much statistics as possible about this glacier.
1519

1520
    It can be used to do result diagnostics and other stuffs. If the data
1521
    necessary for a statistic is not available (e.g.: flowlines length) it
1522
    will simply be ignored.
1523

1524
    Parameters
1525
    ----------
1526
    inversion_only : bool
1527
        if one wants to summarize the inversion output only (including calving)
1528
    apply_func : function
1529
        if one wants to summarize further information about a glacier, set
1530
        this kwarg to a function that accepts a glacier directory as first
1531
        positional argument, and the directory to fill in with data as
1532
        second argument. The directory should only store scalar values (strings,
1533
        float, int)
1534
    """
1535

1536
    d = OrderedDict()
5✔
1537

1538
    # Easy stats - this should always be possible
1539
    d['rgi_id'] = gdir.rgi_id
5✔
1540
    d['rgi_region'] = gdir.rgi_region
5✔
1541
    d['rgi_subregion'] = gdir.rgi_subregion
5✔
1542
    d['name'] = gdir.name
5✔
1543
    d['cenlon'] = gdir.cenlon
5✔
1544
    d['cenlat'] = gdir.cenlat
5✔
1545
    d['rgi_area_km2'] = gdir.rgi_area_km2
5✔
1546
    d['rgi_year'] = gdir.rgi_date
5✔
1547
    d['glacier_type'] = gdir.glacier_type
5✔
1548
    d['terminus_type'] = gdir.terminus_type
5✔
1549
    d['is_tidewater'] = gdir.is_tidewater
5✔
1550
    d['status'] = gdir.status
5✔
1551

1552
    # The rest is less certain. We put these in a try block and see
1553
    # We're good with any error - we store the dict anyway below
1554
    # TODO: should be done with more preselected errors
1555
    with warnings.catch_warnings():
5✔
1556
        warnings.filterwarnings("ignore", category=RuntimeWarning)
5✔
1557

1558
        try:
5✔
1559
            # Inversion
1560
            if gdir.has_file('inversion_output'):
5✔
1561
                vol = []
5✔
1562
                vol_bsl = []
5✔
1563
                vol_bwl = []
5✔
1564
                cl = gdir.read_pickle('inversion_output')
5✔
1565
                for c in cl:
5✔
1566
                    vol.extend(c['volume'])
5✔
1567
                    vol_bsl.extend(c.get('volume_bsl', [0]))
5✔
1568
                    vol_bwl.extend(c.get('volume_bwl', [0]))
5✔
1569
                d['inv_volume_km3'] = np.nansum(vol) * 1e-9
5✔
1570
                area = gdir.rgi_area_km2
5✔
1571
                d['vas_volume_km3'] = 0.034 * (area ** 1.375)
5✔
1572
                # BSL / BWL
1573
                d['inv_volume_bsl_km3'] = np.nansum(vol_bsl) * 1e-9
5✔
1574
                d['inv_volume_bwl_km3'] = np.nansum(vol_bwl) * 1e-9
5✔
1575
        except BaseException:
×
1576
            pass
×
1577

1578
        try:
5✔
1579
            # Diagnostics
1580
            diags = gdir.get_diagnostics()
5✔
1581
            for k, v in diags.items():
5✔
1582
                d[k] = v
5✔
1583
        except BaseException:
×
1584
            pass
×
1585

1586
        if inversion_only:
5✔
1587
            return d
1✔
1588

1589
        try:
5✔
1590
            # Error log
1591
            errlog = gdir.get_error_log()
5✔
1592
            if errlog is not None:
5✔
1593
                d['error_task'] = errlog.split(';')[-2]
2✔
1594
                d['error_msg'] = errlog.split(';')[-1]
2✔
1595
            else:
1596
                d['error_task'] = None
5✔
1597
                d['error_msg'] = None
5✔
1598
        except BaseException:
×
1599
            pass
×
1600

1601
        try:
5✔
1602
            # Masks related stuff
1603
            fpath = gdir.get_filepath('gridded_data')
5✔
1604
            with ncDataset(fpath) as nc:
5✔
1605
                mask = nc.variables['glacier_mask'][:] == 1
5✔
1606
                topo = nc.variables['topo'][:][mask]
5✔
1607
            d['dem_mean_elev'] = np.mean(topo)
5✔
1608
            d['dem_med_elev'] = np.median(topo)
5✔
1609
            d['dem_min_elev'] = np.min(topo)
5✔
1610
            d['dem_max_elev'] = np.max(topo)
5✔
1611
        except BaseException:
2✔
1612
            pass
2✔
1613

1614
        try:
5✔
1615
            # Ext related stuff
1616
            fpath = gdir.get_filepath('gridded_data')
5✔
1617
            with ncDataset(fpath) as nc:
5✔
1618
                ext = nc.variables['glacier_ext'][:] == 1
5✔
1619
                mask = nc.variables['glacier_mask'][:] == 1
5✔
1620
                topo = nc.variables['topo'][:]
5✔
1621
            d['dem_max_elev_on_ext'] = np.max(topo[ext])
5✔
1622
            d['dem_min_elev_on_ext'] = np.min(topo[ext])
5✔
1623
            a = np.sum(mask & (topo > d['dem_max_elev_on_ext']))
5✔
1624
            d['dem_perc_area_above_max_elev_on_ext'] = a / np.sum(mask)
5✔
1625
            # Terminus loc
1626
            j, i = np.nonzero((topo[ext].min() == topo) & ext)
5✔
1627
            lon, lat = gdir.grid.ij_to_crs(i[0], j[0], crs=salem.wgs84)
5✔
1628
            d['terminus_lon'] = lon
5✔
1629
            d['terminus_lat'] = lat
5✔
1630
        except BaseException:
2✔
1631
            pass
2✔
1632

1633
        try:
5✔
1634
            # Centerlines
1635
            cls = gdir.read_pickle('centerlines')
5✔
1636
            longest = 0.
5✔
1637
            for cl in cls:
5✔
1638
                longest = np.max([longest, cl.dis_on_line[-1]])
5✔
1639
            d['n_centerlines'] = len(cls)
5✔
1640
            d['longest_centerline_km'] = longest * gdir.grid.dx / 1000.
5✔
1641
        except BaseException:
2✔
1642
            pass
2✔
1643

1644
        try:
5✔
1645
            # Flowline related stuff
1646
            h = np.array([])
5✔
1647
            widths = np.array([])
5✔
1648
            slope = np.array([])
5✔
1649
            fls = gdir.read_pickle('inversion_flowlines')
5✔
1650
            dx = fls[0].dx * gdir.grid.dx
5✔
1651
            for fl in fls:
5✔
1652
                hgt = fl.surface_h
5✔
1653
                h = np.append(h, hgt)
5✔
1654
                widths = np.append(widths, fl.widths * gdir.grid.dx)
5✔
1655
                slope = np.append(slope, np.arctan(-np.gradient(hgt, dx)))
5✔
1656
                length = len(hgt) * dx
5✔
1657
            d['main_flowline_length'] = length
5✔
1658
            d['inv_flowline_glacier_area'] = np.sum(widths * dx)
5✔
1659
            d['flowline_mean_elev'] = np.average(h, weights=widths)
5✔
1660
            d['flowline_max_elev'] = np.max(h)
5✔
1661
            d['flowline_min_elev'] = np.min(h)
5✔
1662
            d['flowline_avg_slope'] = np.mean(slope)
5✔
1663
            d['flowline_avg_width'] = np.mean(widths)
5✔
1664
            d['flowline_last_width'] = fls[-1].widths[-1] * gdir.grid.dx
5✔
1665
            d['flowline_last_5_widths'] = np.mean(fls[-1].widths[-5:] *
5✔
1666
                                                  gdir.grid.dx)
1667
        except BaseException:
2✔
1668
            pass
2✔
1669

1670
        try:
5✔
1671
            # MB calib
1672
            df = gdir.read_json('local_mustar')
5✔
1673
            d['t_star'] = df['t_star']
5✔
1674
            d['mu_star_glacierwide'] = df['mu_star_glacierwide']
5✔
1675
            d['mu_star_flowline_avg'] = df['mu_star_flowline_avg']
5✔
1676
            d['mu_star_allsame'] = df['mu_star_allsame']
5✔
1677
            d['mb_bias'] = df['bias']
5✔
1678
            try:
5✔
1679
                # is only saved if we use prcp-fac that depend on glacier
1680
                d['glacier_prcp_scaling_factor'] = df['glacier_prcp_scaling_factor']
5✔
1681
            except KeyError:
5✔
1682
                pass
5✔
1683
        except BaseException:
2✔
1684
            pass
2✔
1685

1686
        if apply_func:
5✔
1687
            # User defined statistics
1688
            try:
2✔
1689
                apply_func(gdir, d)
2✔
1690
            except BaseException:
×
1691
                pass
×
1692

1693
    return d
5✔
1694

1695

1696
@global_task(log)
9✔
1697
def compile_glacier_statistics(gdirs, filesuffix='', path=True,
9✔
1698
                               inversion_only=False, apply_func=None):
1699
    """Gather as much statistics as possible about a list of glaciers.
1700

1701
    It can be used to do result diagnostics and other stuffs. If the data
1702
    necessary for a statistic is not available (e.g.: flowlines length) it
1703
    will simply be ignored.
1704

1705
    Parameters
1706
    ----------
1707
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1708
        the glacier directories to process
1709
    filesuffix : str
1710
        add suffix to output file
1711
    path : str, bool
1712
        Set to "True" in order  to store the info in the working directory
1713
        Set to a path to store the file to your chosen location
1714
    inversion_only : bool
1715
        if one wants to summarize the inversion output only (including calving)
1716
    apply_func : function
1717
        if one wants to summarize further information about a glacier, set
1718
        this kwarg to a function that accepts a glacier directory as first
1719
        positional argument, and the directory to fill in with data as
1720
        second argument. The directory should only store scalar values (strings,
1721
        float, int).
1722
        !Careful! For multiprocessing, the function cannot be located at the
1723
        top level, i.e. you may need to import it from a module for this to work,
1724
        or from a dummy class (https://stackoverflow.com/questions/8804830)
1725
    """
1726
    from oggm.workflow import execute_entity_task
6✔
1727

1728
    out_df = execute_entity_task(glacier_statistics, gdirs,
6✔
1729
                                 apply_func=apply_func,
1730
                                 inversion_only=inversion_only)
1731

1732
    out = pd.DataFrame(out_df).set_index('rgi_id')
6✔
1733

1734
    if path:
6✔
1735
        if path is True:
6✔
1736
            out.to_csv(os.path.join(cfg.PATHS['working_dir'],
5✔
1737
                                    ('glacier_statistics' +
1738
                                     filesuffix + '.csv')))
1739
        else:
1740
            out.to_csv(path)
3✔
1741
    return out
6✔
1742

1743

1744
@global_task(log)
9✔
1745
def compile_fixed_geometry_mass_balance(gdirs, filesuffix='',
9✔
1746
                                        path=True, csv=False,
1747
                                        use_inversion_flowlines=True,
1748
                                        ys=None, ye=None, years=None,
1749
                                        climate_filename='climate_historical',
1750
                                        climate_input_filesuffix='',
1751
                                        temperature_bias=None,
1752
                                        precipitation_factor=None):
1753

1754
    """Compiles a table of specific mass balance timeseries for all glaciers.
1755

1756
    The file is stored in a hdf file (not csv) per default. Use pd.read_hdf
1757
    to open it.
1758

1759
    Parameters
1760
    ----------
1761
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1762
        the glacier directories to process
1763
    filesuffix : str
1764
        add suffix to output file
1765
    path : str, bool
1766
        Set to "True" in order  to store the info in the working directory
1767
        Set to a path to store the file to your chosen location (file
1768
        extension matters)
1769
    csv : bool
1770
        Set to store the data in csv instead of hdf.
1771
    use_inversion_flowlines : bool
1772
        whether to use the inversion flowlines or the model flowlines
1773
    ys : int
1774
        start year of the model run (default: from the climate file)
1775
        date)
1776
    ye : int
1777
        end year of the model run (default: from the climate file)
1778
    years : array of ints
1779
        override ys and ye with the years of your choice
1780
    climate_filename : str
1781
        name of the climate file, e.g. 'climate_historical' (default) or
1782
        'gcm_data'
1783
    climate_input_filesuffix: str
1784
        filesuffix for the input climate file
1785
    temperature_bias : float
1786
        add a bias to the temperature timeseries
1787
    precipitation_factor: float
1788
        multiply a factor to the precipitation time series
1789
        default is None and means that the precipitation factor from the
1790
        calibration is applied which is cfg.PARAMS['prcp_scaling_factor']
1791
    """
1792

1793
    from oggm.workflow import execute_entity_task
4✔
1794
    from oggm.core.massbalance import fixed_geometry_mass_balance
4✔
1795

1796
    out_df = execute_entity_task(fixed_geometry_mass_balance, gdirs,
4✔
1797
                                 use_inversion_flowlines=use_inversion_flowlines,
1798
                                 ys=ys, ye=ye, years=years, climate_filename=climate_filename,
1799
                                 climate_input_filesuffix=climate_input_filesuffix,
1800
                                 temperature_bias=temperature_bias,
1801
                                 precipitation_factor=precipitation_factor)
1802

1803
    for idx, s in enumerate(out_df):
4✔
1804
        if s is None:
4✔
1805
            out_df[idx] = pd.Series(np.NaN)
×
1806

1807
    out = pd.concat(out_df, axis=1, keys=[gd.rgi_id for gd in gdirs])
4✔
1808
    out = out.dropna(axis=0, how='all')
4✔
1809

1810
    if path:
4✔
1811
        if path is True:
4✔
1812
            fpath = os.path.join(cfg.PATHS['working_dir'],
2✔
1813
                                 'fixed_geometry_mass_balance' + filesuffix)
1814
            if csv:
2✔
1815
                out.to_csv(fpath + '.csv')
×
1816
            else:
1817
                out.to_hdf(fpath + '.hdf', key='df')
2✔
1818
        else:
1819
            ext = os.path.splitext(path)[-1]
3✔
1820
            if ext.lower() == '.csv':
3✔
1821
                out.to_csv(path)
3✔
1822
            elif ext.lower() == '.hdf':
×
1823
                out.to_hdf(path, key='df')
×
1824
    return out
4✔
1825

1826

1827
@global_task(log)
9✔
1828
def compile_ela(gdirs, filesuffix='', path=True, csv=False, ys=None, ye=None, years=None,
9✔
1829
                climate_filename='climate_historical', temperature_bias=None,
1830
                precipitation_factor=None, climate_input_filesuffix=''):
1831
    """Compiles a table of ELA timeseries for all glaciers for a given years,
1832
    using the PastMassBalance model.
1833

1834
    The file is stored in a hdf file (not csv) per default. Use pd.read_hdf
1835
    to open it.
1836

1837
    Parameters
1838
    ----------
1839
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1840
        the glacier directories to process
1841
    filesuffix : str
1842
        add suffix to output file
1843
    path : str, bool
1844
        Set to "True" in order  to store the info in the working directory
1845
        Set to a path to store the file to your chosen location (file
1846
        extension matters)
1847
    csv: bool
1848
        Set to store the data in csv instead of hdf.
1849
    ys : int
1850
        start year
1851
    ye : int
1852
        end year
1853
    years : array of ints
1854
        override ys and ye with the years of your choice
1855
    climate_filename : str
1856
        name of the climate file, e.g. 'climate_historical' (default) or
1857
        'gcm_data'
1858
    climate_input_filesuffix : str
1859
        filesuffix for the input climate file
1860
    temperature_bias : float
1861
        add a bias to the temperature timeseries
1862
    precipitation_factor: float
1863
        multiply a factor to the precipitation time series
1864
        default is None and means that the precipitation factor from the
1865
        calibration is applied which is cfg.PARAMS['prcp_scaling_factor']
1866
    """
1867
    from oggm.workflow import execute_entity_task
1✔
1868
    from oggm.core.massbalance import compute_ela
1✔
1869

1870
    out_df = execute_entity_task(compute_ela, gdirs, ys=ys, ye=ye, years=years,
1✔
1871
                                 climate_filename=climate_filename,
1872
                                 climate_input_filesuffix=climate_input_filesuffix,
1873
                                 temperature_bias=temperature_bias,
1874
                                 precipitation_factor=precipitation_factor)
1875

1876
    for idx, s in enumerate(out_df):
1✔
1877
        if s is None:
1✔
1878
            out_df[idx] = pd.Series(np.NaN)
×
1879

1880
    out = pd.concat(out_df, axis=1, keys=[gd.rgi_id for gd in gdirs])
1✔
1881
    out = out.dropna(axis=0, how='all')
1✔
1882

1883
    if path:
1✔
1884
        if path is True:
1✔
1885
            fpath = os.path.join(cfg.PATHS['working_dir'],
1✔
1886
                                 'ELA' + filesuffix)
1887
            if csv:
1✔
1888
                out.to_csv(fpath + '.csv')
1✔
1889
            else:
1890
                out.to_hdf(fpath + '.hdf', key='df')
1✔
1891
        else:
1892
            ext = os.path.splitext(path)[-1]
×
1893
            if ext.lower() == '.csv':
×
1894
                out.to_csv(path)
×
1895
            elif ext.lower() == '.hdf':
×
1896
                out.to_hdf(path, key='df')
×
1897
    return out
1✔
1898

1899

1900
@entity_task(log)
9✔
1901
def climate_statistics(gdir, add_climate_period=1995, halfsize=15,
9✔
1902
                       input_filesuffix=''):
1903
    """Gather as much statistics as possible about this glacier.
1904

1905
    It can be used to do result diagnostics and other stuffs. If the data
1906
    necessary for a statistic is not available (e.g.: flowlines length) it
1907
    will simply be ignored.
1908

1909
    Important note: the climate is extracted from the mass-balance model and
1910
    is therefore "corrected" according to the mass-balance calibration scheme
1911
    (e.g. the precipitation factor and the temp bias correction). For more
1912
    flexible information about the raw climate data, use `compile_climate_input`
1913
    or `raw_climate_statistics`.
1914

1915
    Parameters
1916
    ----------
1917
    add_climate_period : int or list of ints
1918
        compile climate statistics for the halfsize*2 + 1 yrs period
1919
        around the selected date.
1920
    halfsize : int
1921
        the half size of the window
1922
    """
1923
    from oggm.core.massbalance import (ConstantMassBalance,
2✔
1924
                                       MultipleFlowlineMassBalance)
1925

1926
    d = OrderedDict()
2✔
1927

1928
    # Easy stats - this should always be possible
1929
    d['rgi_id'] = gdir.rgi_id
2✔
1930
    d['rgi_region'] = gdir.rgi_region
2✔
1931
    d['rgi_subregion'] = gdir.rgi_subregion
2✔
1932
    d['name'] = gdir.name
2✔
1933
    d['cenlon'] = gdir.cenlon
2✔
1934
    d['cenlat'] = gdir.cenlat
2✔
1935
    d['rgi_area_km2'] = gdir.rgi_area_km2
2✔
1936
    d['glacier_type'] = gdir.glacier_type
2✔
1937
    d['terminus_type'] = gdir.terminus_type
2✔
1938
    d['status'] = gdir.status
2✔
1939

1940
    # The rest is less certain
1941
    with warnings.catch_warnings():
2✔
1942
        warnings.filterwarnings("ignore", category=RuntimeWarning)
2✔
1943
        try:
2✔
1944
            # Flowline related stuff
1945
            h = np.array([])
2✔
1946
            widths = np.array([])
2✔
1947
            fls = gdir.read_pickle('inversion_flowlines')
2✔
1948
            dx = fls[0].dx * gdir.grid.dx
2✔
1949
            for fl in fls:
2✔
1950
                hgt = fl.surface_h
2✔
1951
                h = np.append(h, hgt)
2✔
1952
                widths = np.append(widths, fl.widths * dx)
2✔
1953
            d['flowline_mean_elev'] = np.average(h, weights=widths)
2✔
1954
            d['flowline_max_elev'] = np.max(h)
2✔
1955
            d['flowline_min_elev'] = np.min(h)
2✔
1956
        except BaseException:
1✔
1957
            pass
1✔
1958

1959
        try:
2✔
1960
            # Climate and MB at t*
1961
            mbcl = ConstantMassBalance
2✔
1962
            mbmod = MultipleFlowlineMassBalance(gdir, mb_model_class=mbcl,
2✔
1963
                                                bias=0, halfsize=halfsize,
1964
                                                use_inversion_flowlines=True)
1965
            h, w, mbh = mbmod.get_annual_mb_on_flowlines()
2✔
1966
            mbh = mbh * cfg.SEC_IN_YEAR * cfg.PARAMS['ice_density']
2✔
1967
            pacc = np.where(mbh >= 0)
2✔
1968
            pab = np.where(mbh < 0)
2✔
1969
            d['tstar_aar'] = np.sum(w[pacc]) / np.sum(w)
2✔
1970
            try:
2✔
1971
                # Try to get the slope
1972
                mb_slope, _, _, _, _ = stats.linregress(h[pab], mbh[pab])
2✔
1973
                d['tstar_mb_grad'] = mb_slope
2✔
1974
            except BaseException:
×
1975
                # we don't mind if something goes wrong
1976
                d['tstar_mb_grad'] = np.NaN
×
1977
            d['tstar_ela_h'] = mbmod.get_ela()
2✔
1978
            # Climate
1979
            t, tm, p, ps = mbmod.flowline_mb_models[0].get_annual_climate(
2✔
1980
                [d['tstar_ela_h'],
1981
                 d['flowline_mean_elev'],
1982
                 d['flowline_max_elev'],
1983
                 d['flowline_min_elev']])
1984
            for n, v in zip(['temp', 'tempmelt', 'prcpsol'], [t, tm, ps]):
2✔
1985
                d['tstar_avg_' + n + '_ela_h'] = v[0]
2✔
1986
                d['tstar_avg_' + n + '_mean_elev'] = v[1]
2✔
1987
                d['tstar_avg_' + n + '_max_elev'] = v[2]
2✔
1988
                d['tstar_avg_' + n + '_min_elev'] = v[3]
2✔
1989
            d['tstar_avg_prcp'] = p[0]
2✔
1990
        except BaseException:
1✔
1991
            pass
1✔
1992

1993
        # Climate and MB at specified dates
1994
        add_climate_period = tolist(add_climate_period)
2✔
1995
        for y0 in add_climate_period:
2✔
1996
            try:
2✔
1997
                fs = '{}-{}'.format(y0 - halfsize, y0 + halfsize)
2✔
1998
                mbcl = ConstantMassBalance
2✔
1999
                mbmod = MultipleFlowlineMassBalance(gdir, mb_model_class=mbcl,
2✔
2000
                                                    y0=y0, halfsize=halfsize,
2001
                                                    use_inversion_flowlines=True,
2002
                                                    input_filesuffix=input_filesuffix)
2003
                h, w, mbh = mbmod.get_annual_mb_on_flowlines()
2✔
2004
                mbh = mbh * cfg.SEC_IN_YEAR * cfg.PARAMS['ice_density']
2✔
2005
                pacc = np.where(mbh >= 0)
2✔
2006
                pab = np.where(mbh < 0)
2✔
2007
                d[fs + '_aar'] = np.sum(w[pacc]) / np.sum(w)
2✔
2008
                try:
2✔
2009
                    # Try to get the slope
2010
                    mb_slope, _, _, _, _ = stats.linregress(h[pab], mbh[pab])
2✔
2011
                    d[fs + '_mb_grad'] = mb_slope
2✔
2012
                except BaseException:
×
2013
                    # we don't mind if something goes wrong
2014
                    d[fs + '_mb_grad'] = np.NaN
×
2015
                d[fs + '_ela_h'] = mbmod.get_ela()
2✔
2016
                # Climate
2017
                t, tm, p, ps = mbmod.flowline_mb_models[0].get_annual_climate(
2✔
2018
                    [d[fs + '_ela_h'],
2019
                     d['flowline_mean_elev'],
2020
                     d['flowline_max_elev'],
2021
                     d['flowline_min_elev']])
2022
                for n, v in zip(['temp', 'tempmelt', 'prcpsol'], [t, tm, ps]):
2✔
2023
                    d[fs + '_avg_' + n + '_ela_h'] = v[0]
2✔
2024
                    d[fs + '_avg_' + n + '_mean_elev'] = v[1]
2✔
2025
                    d[fs + '_avg_' + n + '_max_elev'] = v[2]
2✔
2026
                    d[fs + '_avg_' + n + '_min_elev'] = v[3]
2✔
2027
                d[fs + '_avg_prcp'] = p[0]
2✔
2028
            except BaseException:
1✔
2029
                pass
1✔
2030

2031
    return d
2✔
2032

2033

2034
@entity_task(log)
9✔
2035
def raw_climate_statistics(gdir, add_climate_period=1995, halfsize=15,
9✔
2036
                           input_filesuffix=''):
2037
    """Gather as much statistics as possible about this glacier.
2038

2039
    This is like "climate_statistics" but without relying on the
2040
    mass-balance model, i.e. closer to the actual data (uncorrected)
2041

2042
    Parameters
2043
    ----------
2044
    add_climate_period : int or list of ints
2045
        compile climate statistics for the 30 yrs period around the selected
2046
        date.
2047
    """
2048
    d = OrderedDict()
1✔
2049
    # Easy stats - this should always be possible
2050
    d['rgi_id'] = gdir.rgi_id
1✔
2051
    d['rgi_region'] = gdir.rgi_region
1✔
2052
    d['rgi_subregion'] = gdir.rgi_subregion
1✔
2053
    d['name'] = gdir.name
1✔
2054
    d['cenlon'] = gdir.cenlon
1✔
2055
    d['cenlat'] = gdir.cenlat
1✔
2056
    d['rgi_area_km2'] = gdir.rgi_area_km2
1✔
2057
    d['glacier_type'] = gdir.glacier_type
1✔
2058
    d['terminus_type'] = gdir.terminus_type
1✔
2059
    d['status'] = gdir.status
1✔
2060

2061
    # The rest is less certain
2062
    with warnings.catch_warnings():
1✔
2063
        warnings.filterwarnings("ignore", category=RuntimeWarning)
1✔
2064

2065
        # Climate and MB at specified dates
2066
        add_climate_period = tolist(add_climate_period)
1✔
2067

2068
        # get non-corrected winter daily mean prcp (kg m-2 day-1) for
2069
        # the chosen time period
2070
        for y0 in add_climate_period:
1✔
2071
            fs = '{}-{}'.format(y0 - halfsize, y0 + halfsize)
1✔
2072
            try:
1✔
2073
                # get non-corrected winter daily mean prcp (kg m-2 day-1)
2074
                # it is easier to get this directly from the raw climate files
2075
                fp = gdir.get_filepath('climate_historical',
1✔
2076
                                       filesuffix=input_filesuffix)
2077
                with xr.open_dataset(fp).prcp as ds_pr:
1✔
2078
                    # just select winter months
2079
                    if gdir.hemisphere == 'nh':
1✔
2080
                        m_winter = [10, 11, 12, 1, 2, 3, 4]
1✔
2081
                    else:
2082
                        m_winter = [4, 5, 6, 7, 8, 9, 10]
×
2083
                    ds_pr_winter = ds_pr.where(ds_pr['time.month'].isin(m_winter), drop=True)
1✔
2084
                    # select the correct year time period
2085
                    ds_pr_winter = ds_pr_winter.sel(time=slice(f'{fs[:4]}-01-01',
1✔
2086
                                                               f'{fs[-4:]}-12-01'))
2087
                    # check if we have the full time period
2088
                    n_years = int(fs[-4:]) - int(fs[:4]) + 1
1✔
2089
                    assert len(ds_pr_winter.time) == n_years * 7, 'chosen time-span invalid'
1✔
2090
                    ds_d_pr_winter_mean = (ds_pr_winter / ds_pr_winter.time.dt.daysinmonth).mean()
1✔
2091
                    d[f'{fs}_uncorrected_winter_daily_mean_prcp'] = ds_d_pr_winter_mean.values
1✔
2092
            except BaseException:
1✔
2093
                pass
1✔
2094
    return d
1✔
2095

2096

2097
@global_task(log)
9✔
2098
def compile_climate_statistics(gdirs, filesuffix='', path=True,
9✔
2099
                               add_climate_period=1995,
2100
                               halfsize=15,
2101
                               add_raw_climate_statistics=False,
2102
                               input_filesuffix=''):
2103
    """Gather as much statistics as possible about a list of glaciers.
2104

2105
    It can be used to do result diagnostics and other stuffs. If the data
2106
    necessary for a statistic is not available (e.g.: flowlines length) it
2107
    will simply be ignored.
2108

2109
    Parameters
2110
    ----------
2111
    gdirs: the list of GlacierDir to process.
2112
    filesuffix : str
2113
        add suffix to output file
2114
    path : str, bool
2115
        Set to "True" in order  to store the info in the working directory
2116
        Set to a path to store the file to your chosen location
2117
    add_climate_period : int or list of ints
2118
        compile climate statistics for the 30 yrs period around the selected
2119
        date.
2120
    input_filesuffix : str
2121
        filesuffix of the used climate_historical file, default is no filesuffix
2122
    """
2123
    from oggm.workflow import execute_entity_task
3✔
2124

2125
    out_df = execute_entity_task(climate_statistics, gdirs,
3✔
2126
                                 add_climate_period=add_climate_period,
2127
                                 halfsize=halfsize,
2128
                                 input_filesuffix=input_filesuffix)
2129
    out = pd.DataFrame(out_df).set_index('rgi_id')
3✔
2130

2131
    if add_raw_climate_statistics:
3✔
2132
        out_df = execute_entity_task(raw_climate_statistics, gdirs,
1✔
2133
                                     add_climate_period=add_climate_period,
2134
                                     halfsize=halfsize,
2135
                                     input_filesuffix=input_filesuffix)
2136
        out = out.merge(pd.DataFrame(out_df).set_index('rgi_id'))
1✔
2137

2138
    if path:
3✔
2139
        if path is True:
3✔
2140
            out.to_csv(os.path.join(cfg.PATHS['working_dir'],
3✔
2141
                                    ('climate_statistics' +
2142
                                     filesuffix + '.csv')))
2143
        else:
2144
            out.to_csv(path)
1✔
2145
    return out
3✔
2146

2147

2148
def extend_past_climate_run(past_run_file=None,
9✔
2149
                            fixed_geometry_mb_file=None,
2150
                            glacier_statistics_file=None,
2151
                            path=False,
2152
                            use_compression=True):
2153
    """Utility function to extend past MB runs prior to the RGI date.
2154

2155
    We use a fixed geometry (and a fixed calving rate) for all dates prior
2156
    to the RGI date.
2157

2158
    This is not parallelized, i.e a bit slow.
2159

2160
    Parameters
2161
    ----------
2162
    past_run_file : str
2163
        path to the historical run (nc)
2164
    fixed_geometry_mb_file : str
2165
        path to the MB file (csv)
2166
    glacier_statistics_file : str
2167
        path to the glacier stats file (csv)
2168
    path : str
2169
        where to store the file
2170
    use_compression : bool
2171

2172
    Returns
2173
    -------
2174
    the extended dataset
2175
    """
2176

2177
    log.workflow('Applying extend_past_climate_run on '
3✔
2178
                 '{}'.format(past_run_file))
2179

2180
    fixed_geometry_mb_df = pd.read_csv(fixed_geometry_mb_file, index_col=0,
3✔
2181
                                       low_memory=False)
2182
    stats_df = pd.read_csv(glacier_statistics_file, index_col=0,
3✔
2183
                           low_memory=False)
2184

2185
    with xr.open_dataset(past_run_file) as past_ds:
3✔
2186

2187
        # We need at least area and vol to do something
2188
        if 'volume' not in past_ds.data_vars or 'area' not in past_ds.data_vars:
3✔
2189
            raise InvalidWorkflowError('Need both volume and area to proceed')
×
2190

2191
        y0_run = int(past_ds.time[0])
3✔
2192
        y1_run = int(past_ds.time[-1])
3✔
2193
        if (y1_run - y0_run + 1) != len(past_ds.time):
3✔
2194
            raise NotImplementedError('Currently only supports annual outputs')
2195
        y0_clim = int(fixed_geometry_mb_df.index[0])
3✔
2196
        y1_clim = int(fixed_geometry_mb_df.index[-1])
3✔
2197
        if y0_clim > y0_run or y1_clim < y0_run:
3✔
2198
            raise InvalidWorkflowError('Dates do not match.')
×
2199
        if y1_clim != y1_run - 1:
3✔
2200
            raise InvalidWorkflowError('Dates do not match.')
×
2201
        if len(past_ds.rgi_id) != len(fixed_geometry_mb_df.columns):
3✔
2202
            raise InvalidWorkflowError('Nb of glaciers do not match.')
×
2203
        if len(past_ds.rgi_id) != len(stats_df.index):
3✔
2204
            raise InvalidWorkflowError('Nb of glaciers do not match.')
×
2205

2206
        # Make sure we agree on order
2207
        df = fixed_geometry_mb_df[past_ds.rgi_id]
3✔
2208

2209
        # Output data
2210
        years = np.arange(y0_clim, y1_run+1)
3✔
2211
        ods = past_ds.reindex({'time': years})
3✔
2212

2213
        # Time
2214
        ods['hydro_year'].data[:] = years
3✔
2215
        ods['hydro_month'].data[:] = ods['hydro_month'][-1]
3✔
2216
        if ods['hydro_month'][-1] == 1:
3✔
2217
            ods['calendar_year'].data[:] = years
3✔
2218
        else:
2219
            ods['calendar_year'].data[:] = years - 1
×
2220
        ods['calendar_month'].data[:] = ods['calendar_month'][-1]
3✔
2221
        for vn in ['hydro_year', 'hydro_month',
3✔
2222
                   'calendar_year', 'calendar_month']:
2223
            ods[vn] = ods[vn].astype(int)
3✔
2224

2225
        # New vars
2226
        for vn in ['volume', 'volume_m3_min_h', 'volume_bsl', 'volume_bwl',
3✔
2227
                   'area', 'area_m2_min_h', 'length', 'calving',
2228
                   'calving_rate']:
2229
            if vn in ods.data_vars:
3✔
2230
                ods[vn + '_ext'] = ods[vn].copy(deep=True)
3✔
2231
                ods[vn + '_ext'].attrs['description'] += ' (extended with MB data)'
3✔
2232

2233
        vn = 'volume_fixed_geom_ext'
3✔
2234
        ods[vn] = ods['volume'].copy(deep=True)
3✔
2235
        ods[vn].attrs['description'] += ' (replaced with fixed geom data)'
3✔
2236

2237
        rho = cfg.PARAMS['ice_density']
3✔
2238
        # Loop over the ids
2239
        for i, rid in enumerate(ods.rgi_id.data):
3✔
2240
            # Both do not need to be same length but they need to start same
2241
            mb_ts = df.values[:, i]
3✔
2242
            orig_vol_ts = ods.volume_ext.data[:, i]
3✔
2243
            if not (np.isfinite(mb_ts[-1]) and np.isfinite(orig_vol_ts[-1])):
3✔
2244
                # Not a valid glacier
2245
                continue
×
2246
            if np.isfinite(orig_vol_ts[0]):
3✔
2247
                # Nothing to extend, really
2248
                continue
×
2249

2250
            # First valid id
2251
            fid = np.argmax(np.isfinite(orig_vol_ts))
3✔
2252

2253
            # Add calving to the mix
2254
            try:
3✔
2255
                calv_flux = stats_df.loc[rid, 'calving_flux'] * 1e9
3✔
2256
                calv_rate = stats_df.loc[rid, 'calving_rate_myr']
1✔
2257
            except KeyError:
2✔
2258
                calv_flux = 0
2✔
2259
                calv_rate = 0
2✔
2260
            if not np.isfinite(calv_flux):
3✔
2261
                calv_flux = 0
×
2262
            if not np.isfinite(calv_rate):
3✔
2263
                calv_rate = 0
×
2264

2265
            # Fill area and length which stays constant before date
2266
            orig_area_ts = ods.area_ext.data[:, i]
3✔
2267
            orig_area_ts[:fid] = orig_area_ts[fid]
3✔
2268

2269
            # We convert SMB to volume
2270
            mb_vol_ts = (mb_ts / rho * orig_area_ts[fid] - calv_flux).cumsum()
3✔
2271
            calv_ts = (mb_ts * 0 + calv_flux).cumsum()
3✔
2272

2273
            # The -1 is because the volume change is known at end of year
2274
            mb_vol_ts = mb_vol_ts + orig_vol_ts[fid] - mb_vol_ts[fid-1]
3✔
2275

2276
            # Now back to netcdf
2277
            ods.volume_fixed_geom_ext.data[1:, i] = mb_vol_ts
3✔
2278
            ods.volume_ext.data[1:fid, i] = mb_vol_ts[0:fid-1]
3✔
2279
            ods.area_ext.data[:, i] = orig_area_ts
3✔
2280

2281
            # Optional variables
2282
            if 'length' in ods.data_vars:
3✔
2283
                orig_length_ts = ods.length_ext.data[:, i]
3✔
2284
                orig_length_ts[:fid] = orig_length_ts[fid]
3✔
2285
                ods.length_ext.data[:, i] = orig_length_ts
3✔
2286

2287
            if 'calving' in ods.data_vars:
3✔
2288
                orig_calv_ts = ods.calving_ext.data[:, i]
2✔
2289
                # The -1 is because the volume change is known at end of year
2290
                calv_ts = calv_ts + orig_calv_ts[fid] - calv_ts[fid-1]
2✔
2291
                ods.calving_ext.data[1:fid, i] = calv_ts[0:fid-1]
2✔
2292

2293
            if 'calving_rate' in ods.data_vars:
3✔
2294
                orig_calv_rate_ts = ods.calving_rate_ext.data[:, i]
2✔
2295
                # +1 because calving rate at year 0 is unknown from the dyns model
2296
                orig_calv_rate_ts[:fid+1] = calv_rate
2✔
2297
                ods.calving_rate_ext.data[:, i] = orig_calv_rate_ts
2✔
2298

2299
            # Extend vol bsl by assuming that % stays constant
2300
            if 'volume_bsl' in ods.data_vars:
3✔
2301
                bsl = ods.volume_bsl.data[fid, i] / ods.volume.data[fid, i]
2✔
2302
                ods.volume_bsl_ext.data[:fid, i] = bsl * ods.volume_ext.data[:fid, i]
2✔
2303
            if 'volume_bwl' in ods.data_vars:
3✔
2304
                bwl = ods.volume_bwl.data[fid, i] / ods.volume.data[fid, i]
2✔
2305
                ods.volume_bwl_ext.data[:fid, i] = bwl * ods.volume_ext.data[:fid, i]
2✔
2306

2307
        # Remove old vars
2308
        for vn in list(ods.data_vars):
3✔
2309
            if '_ext' not in vn and 'time' in ods[vn].dims:
3✔
2310
                del ods[vn]
3✔
2311

2312
        # Rename vars to their old names
2313
        ods = ods.rename(dict((o, o.replace('_ext', ''))
3✔
2314
                              for o in ods.data_vars))
2315

2316
        # Remove t0 (which is NaN)
2317
        ods = ods.isel(time=slice(1, None))
3✔
2318

2319
        # To file?
2320
        if path:
3✔
2321
            enc_var = {'dtype': 'float32'}
3✔
2322
            if use_compression:
3✔
2323
                enc_var['complevel'] = 5
3✔
2324
                enc_var['zlib'] = True
3✔
2325
            encoding = {v: enc_var for v in ods.data_vars}
3✔
2326
            ods.to_netcdf(path, encoding=encoding)
3✔
2327

2328
    return ods
3✔
2329

2330

2331
def idealized_gdir(surface_h, widths_m, map_dx, flowline_dx=1,
9✔
2332
                   base_dir=None, reset=False):
2333
    """Creates a glacier directory with flowline input data only.
2334

2335
    This is useful for testing, or for idealized experiments.
2336

2337
    Parameters
2338
    ----------
2339
    surface_h : ndarray
2340
        the surface elevation of the flowline's grid points (in m).
2341
    widths_m : ndarray
2342
        the widths of the flowline's grid points (in m).
2343
    map_dx : float
2344
        the grid spacing (in m)
2345
    flowline_dx : int
2346
        the flowline grid spacing (in units of map_dx, often it should be 1)
2347
    base_dir : str
2348
        path to the directory where to open the directory.
2349
        Defaults to `cfg.PATHS['working_dir'] + /per_glacier/`
2350
    reset : bool, default=False
2351
        empties the directory at construction
2352

2353
    Returns
2354
    -------
2355
    a GlacierDirectory instance
2356
    """
2357

2358
    from oggm.core.centerlines import Centerline
1✔
2359

2360
    # Area from geometry
2361
    area_km2 = np.sum(widths_m * map_dx * flowline_dx) * 1e-6
1✔
2362

2363
    # Dummy entity - should probably also change the geometry
2364
    entity = gpd.read_file(get_demo_file('Hintereisferner_RGI5.shp')).iloc[0]
1✔
2365
    entity.Area = area_km2
1✔
2366
    entity.CenLat = 0
1✔
2367
    entity.CenLon = 0
1✔
2368
    entity.Name = ''
1✔
2369
    entity.RGIId = 'RGI50-00.00000'
1✔
2370
    entity.O1Region = '00'
1✔
2371
    entity.O2Region = '0'
1✔
2372
    gdir = GlacierDirectory(entity, base_dir=base_dir, reset=reset)
1✔
2373
    gdir.write_shapefile(gpd.GeoDataFrame([entity]), 'outlines')
1✔
2374

2375
    # Idealized flowline
2376
    coords = np.arange(0, len(surface_h) - 0.5, 1)
1✔
2377
    line = shpg.LineString(np.vstack([coords, coords * 0.]).T)
1✔
2378
    fl = Centerline(line, dx=flowline_dx, surface_h=surface_h, map_dx=map_dx)
1✔
2379
    fl.widths = widths_m / map_dx
1✔
2380
    fl.is_rectangular = np.ones(fl.nx).astype(bool)
1✔
2381
    gdir.write_pickle([fl], 'inversion_flowlines')
1✔
2382

2383
    # Idealized map
2384
    grid = salem.Grid(nxny=(1, 1), dxdy=(map_dx, map_dx), x0y0=(0, 0))
1✔
2385
    grid.to_json(gdir.get_filepath('glacier_grid'))
1✔
2386

2387
    return gdir
1✔
2388

2389

2390
def _back_up_retry(func, exceptions, max_count=5):
9✔
2391
    """Re-Try an action up to max_count times.
2392
    """
2393

2394
    count = 0
2✔
2395
    while count < max_count:
2✔
2396
        try:
2✔
2397
            if count > 0:
2✔
2398
                time.sleep(random.uniform(0.05, 0.1))
×
2399
            return func()
2✔
2400
        except exceptions:
1✔
2401
            count += 1
×
2402
            if count >= max_count:
×
2403
                raise
×
2404

2405

2406
def _robust_extract(to_dir, *args, **kwargs):
9✔
2407
    """For some obscure reason this operation randomly fails.
2408

2409
    Try to make it more robust.
2410
    """
2411

2412
    def func():
2✔
2413
        with tarfile.open(*args, **kwargs) as tf:
2✔
2414
            if not len(tf.getnames()):
2✔
2415
                raise RuntimeError("Empty tarfile")
×
2416
            tf.extractall(os.path.dirname(to_dir))
2✔
2417

2418
    _back_up_retry(func, FileExistsError)
2✔
2419

2420

2421
def robust_tar_extract(from_tar, to_dir, delete_tar=False):
9✔
2422
    """Extract a tar file - also checks for a "tar in tar" situation"""
2423

2424
    if os.path.isfile(from_tar):
2✔
2425
        _robust_extract(to_dir, from_tar, 'r')
1✔
2426
    else:
2427
        # maybe a tar in tar
2428
        base_tar = os.path.dirname(from_tar) + '.tar'
2✔
2429
        if not os.path.isfile(base_tar):
2✔
2430
            raise FileNotFoundError('Could not find a tarfile with path: '
×
2431
                                    '{}'.format(from_tar))
2432
        if delete_tar:
2✔
2433
            raise InvalidParamsError('Cannot delete tar in tar.')
×
2434
        # Open the tar
2435
        bname = os.path.basename(from_tar)
2✔
2436
        dirbname = os.path.basename(os.path.dirname(from_tar))
2✔
2437

2438
        def func():
2✔
2439
            with tarfile.open(base_tar, 'r') as tf:
2✔
2440
                i_from_tar = tf.getmember(os.path.join(dirbname, bname))
2✔
2441
                with tf.extractfile(i_from_tar) as fileobj:
2✔
2442
                    _robust_extract(to_dir, fileobj=fileobj)
2✔
2443

2444
        _back_up_retry(func, RuntimeError)
2✔
2445

2446
    if delete_tar:
2✔
2447
        os.remove(from_tar)
1✔
2448

2449

2450
class GlacierDirectory(object):
9✔
2451
    """Organizes read and write access to the glacier's files.
2452

2453
    It handles a glacier directory created in a base directory (default
2454
    is the "per_glacier" folder in the working directory). The role of a
2455
    GlacierDirectory is to give access to file paths and to I/O operations.
2456
    The user should not care about *where* the files are
2457
    located, but should know their name (see :ref:`basenames`).
2458

2459
    If the directory does not exist, it will be created.
2460

2461
    See :ref:`glacierdir` for more information.
2462

2463
    Attributes
2464
    ----------
2465
    dir : str
2466
        path to the directory
2467
    base_dir : str
2468
        path to the base directory
2469
    rgi_id : str
2470
        The glacier's RGI identifier
2471
    glims_id : str
2472
        The glacier's GLIMS identifier (when available)
2473
    rgi_area_km2 : float
2474
        The glacier's RGI area (km2)
2475
    cenlon, cenlat : float
2476
        The glacier centerpoint's lon/lat
2477
    rgi_date : int
2478
        The RGI's BGNDATE year attribute if available. Otherwise, defaults to
2479
        the median year for the RGI region
2480
    rgi_region : str
2481
        The RGI region ID
2482
    rgi_subregion : str
2483
        The RGI subregion ID
2484
    rgi_version : str
2485
        The RGI version name
2486
    rgi_region_name : str
2487
        The RGI region name
2488
    rgi_subregion_name : str
2489
        The RGI subregion name
2490
    name : str
2491
        The RGI glacier name (if available)
2492
    hemisphere : str
2493
        `nh` or `sh`
2494
    glacier_type : str
2495
        The RGI glacier type ('Glacier', 'Ice cap', 'Perennial snowfield',
2496
        'Seasonal snowfield')
2497
    terminus_type : str
2498
        The RGI terminus type ('Land-terminating', 'Marine-terminating',
2499
        'Lake-terminating', 'Dry calving', 'Regenerated', 'Shelf-terminating')
2500
    is_tidewater : bool
2501
        Is the glacier a calving glacier?
2502
    is_lake_terminating : bool
2503
        Is the glacier a lake terminating glacier?
2504
    is_nominal : bool
2505
        Is the glacier an RGI nominal glacier?
2506
    is_icecap : bool
2507
        Is the glacier an ice cap?
2508
    extent_ll : list
2509
        Extent of the glacier in lon/lat
2510
    logfile : str
2511
        Path to the log file (txt)
2512
    inversion_calving_rate : float
2513
        Calving rate used for the inversion
2514
    grid
2515
    dem_info
2516
    dem_daterange
2517
    intersects_ids
2518
    rgi_area_m2
2519
    rgi_area_km2
2520
    """
2521

2522
    def __init__(self, rgi_entity, base_dir=None, reset=False,
9✔
2523
                 from_tar=False, delete_tar=False):
2524
        """Creates a new directory or opens an existing one.
2525

2526
        Parameters
2527
        ----------
2528
        rgi_entity : a ``geopandas.GeoSeries`` or str
2529
            glacier entity read from the shapefile (or a valid RGI ID if the
2530
            directory exists)
2531
        base_dir : str
2532
            path to the directory where to open the directory.
2533
            Defaults to `cfg.PATHS['working_dir'] + /per_glacier/`
2534
        reset : bool, default=False
2535
            empties the directory at construction (careful!)
2536
        from_tar : str or bool, default=False
2537
            path to a tar file to extract the gdir data from. If set to `True`,
2538
            will check for a tar file at the expected location in `base_dir`.
2539
        delete_tar : bool, default=False
2540
            delete the original tar file after extraction.
2541
        """
2542

2543
        if base_dir is None:
7✔
2544
            if not cfg.PATHS.get('working_dir', None):
7✔
2545
                raise ValueError("Need a valid PATHS['working_dir']!")
×
2546
            base_dir = os.path.join(cfg.PATHS['working_dir'], 'per_glacier')
7✔
2547

2548
        # RGI IDs are also valid entries
2549
        if isinstance(rgi_entity, str):
7✔
2550
            # Get the meta from the shape file directly
2551
            if from_tar:
3✔
2552
                _dir = os.path.join(base_dir, rgi_entity[:-6], rgi_entity[:-3],
2✔
2553
                                    rgi_entity)
2554
                # Avoid bad surprises
2555
                if os.path.exists(_dir):
2✔
2556
                    shutil.rmtree(_dir)
2✔
2557
                if from_tar is True:
2✔
2558
                    from_tar = _dir + '.tar.gz'
×
2559
                robust_tar_extract(from_tar, _dir, delete_tar=delete_tar)
2✔
2560
                from_tar = False  # to not re-unpack later below
2✔
2561
                _shp = os.path.join(_dir, 'outlines.shp')
2✔
2562
            else:
2563
                _shp = os.path.join(base_dir, rgi_entity[:-6], rgi_entity[:-3],
3✔
2564
                                    rgi_entity, 'outlines.shp')
2565
            rgi_entity = self._read_shapefile_from_path(_shp)
3✔
2566
            crs = salem.check_crs(rgi_entity.crs)
3✔
2567
            rgi_entity = rgi_entity.iloc[0]
3✔
2568
            g = rgi_entity['geometry']
3✔
2569
            xx, yy = salem.transform_proj(crs, salem.wgs84,
3✔
2570
                                          [g.bounds[0], g.bounds[2]],
2571
                                          [g.bounds[1], g.bounds[3]])
2572
            write_shp = False
3✔
2573
        else:
2574
            g = rgi_entity['geometry']
7✔
2575
            xx, yy = ([g.bounds[0], g.bounds[2]],
7✔
2576
                      [g.bounds[1], g.bounds[3]])
2577
            write_shp = True
7✔
2578

2579
        # Extent of the glacier in lon/lat
2580
        self.extent_ll = [xx, yy]
7✔
2581

2582
        try:
7✔
2583
            # RGI V4?
2584
            rgi_entity.RGIID
7✔
2585
            raise ValueError('RGI Version 4 is not supported anymore')
×
2586
        except AttributeError:
7✔
2587
            pass
7✔
2588

2589
        try:
7✔
2590
            self.rgi_id = rgi_entity.rgi_id
7✔
2591
            self.glims_id = rgi_entity.glims_id
×
2592
        except AttributeError:
7✔
2593
            # RGI V6
2594
            self.rgi_id = rgi_entity.RGIId
7✔
2595
            self.glims_id = rgi_entity.GLIMSId
7✔
2596

2597
        # Do we want to use the RGI center point or ours?
2598
        if cfg.PARAMS['use_rgi_area']:
7✔
2599
            try:
7✔
2600
                self.cenlon = float(rgi_entity.cenlon)
7✔
2601
                self.cenlat = float(rgi_entity.cenlat)
×
2602
            except AttributeError:
7✔
2603
                # RGI V6
2604
                self.cenlon = float(rgi_entity.CenLon)
7✔
2605
                self.cenlat = float(rgi_entity.CenLat)
7✔
2606
        else:
2607
            cenlon, cenlat = rgi_entity.geometry.representative_point().xy
1✔
2608
            self.cenlon = float(cenlon[0])
1✔
2609
            self.cenlat = float(cenlat[0])
1✔
2610

2611
        try:
7✔
2612
            self.rgi_region = rgi_entity.o1region
7✔
2613
            self.rgi_subregion = rgi_entity.o2region
×
2614
        except AttributeError:
7✔
2615
            # RGI V6
2616
            self.rgi_region = '{:02d}'.format(int(rgi_entity.O1Region))
7✔
2617
            self.rgi_subregion = (self.rgi_region + '-' +
7✔
2618
                                  '{:02d}'.format(int(rgi_entity.O2Region)))
2619

2620

2621
        try:
7✔
2622
            name = str(rgi_entity.name)
7✔
2623
            rgi_datestr = rgi_entity.src_date
7✔
2624
        except AttributeError:
7✔
2625
            # RGI V6
2626
            name = rgi_entity.Name
7✔
2627
            rgi_datestr = rgi_entity.BgnDate
7✔
2628

2629

2630
        try:
7✔
2631
            gtype = rgi_entity.GlacType
7✔
2632
        except AttributeError:
5✔
2633
            try:
5✔
2634
                # RGI V6
2635
                gtype = [str(rgi_entity.Form), str(rgi_entity.TermType)]
5✔
2636
            except AttributeError:
×
2637
                # temporary default for RGI V7:
2638
                gtype = ['0', '0']
×
2639

2640
        try:
7✔
2641
            gstatus = rgi_entity.RGIFlag[0]
7✔
2642
        except AttributeError:
5✔
2643
            try:
5✔
2644
                # RGI V6
2645
                gstatus = rgi_entity.Status
5✔
2646
            except AttributeError:
×
2647
                # temporary default for RGI V7:
2648
                gstatus = '0'
×
2649

2650
        # rgi version can be useful
2651
        # RGI2000-v7.0-G-06-00029
2652
        # RGI60-07.00245
2653
        if self.rgi_id.count('-') == 4:
7✔
2654
            self.rgi_version = '70'
×
2655
        else:
2656
            rgi_version = self.rgi_id.split('-')[0][-2:]
7✔
2657
            if rgi_version not in ['50', '60', '61']:
7✔
2658
                raise RuntimeError('RGI Version not supported: '
×
2659
                                   '{}'.format(self.rgi_version))
2660
            else:
2661
                self.rgi_version = rgi_version
7✔
2662
        # remove spurious characters and trailing blanks
2663
        self.name = filter_rgi_name(name)
7✔
2664

2665
        # region
2666
        reg_names, subreg_names = parse_rgi_meta(version=self.rgi_version[0])
7✔
2667
        reg_name = reg_names.loc[int(self.rgi_region)]
7✔
2668
        # RGI V6
2669
        if not isinstance(reg_name, str):
7✔
2670
            reg_name = reg_name.values[0]
7✔
2671
        self.rgi_region_name = self.rgi_region + ': ' + reg_name
7✔
2672
        try:
7✔
2673
            subreg_name = subreg_names.loc[self.rgi_subregion]
7✔
2674
            # RGI V6
2675
            if not isinstance(subreg_name, str):
7✔
2676
                subreg_name = subreg_name.values[0]
7✔
2677
            self.rgi_subregion_name = self.rgi_subregion + ': ' + subreg_name
7✔
2678
        except KeyError:
×
2679
            self.rgi_subregion_name = self.rgi_subregion + ': NoName'
×
2680

2681
        # Read glacier attrs
2682
        gtkeys = {'0': 'Glacier',
7✔
2683
                  '1': 'Ice cap',
2684
                  '2': 'Perennial snowfield',
2685
                  '3': 'Seasonal snowfield',
2686
                  '9': 'Not assigned',
2687
                  }
2688
        ttkeys = {'0': 'Land-terminating',
7✔
2689
                  '1': 'Marine-terminating',
2690
                  '2': 'Lake-terminating',
2691
                  '3': 'Dry calving',
2692
                  '4': 'Regenerated',
2693
                  '5': 'Shelf-terminating',
2694
                  '9': 'Not assigned',
2695
                  }
2696
        stkeys = {'0': 'Glacier or ice cap',
7✔
2697
                  '1': 'Glacier complex',
2698
                  '2': 'Nominal glacier',
2699
                  '9': 'Not assigned',
2700
                  }
2701
        self.glacier_type = gtkeys[gtype[0]]
7✔
2702
        self.terminus_type = ttkeys[gtype[1]]
7✔
2703
        self.status = stkeys['{}'.format(gstatus)]
7✔
2704

2705
        # Decide what is a tidewater glacier
2706
        user = cfg.PARAMS['tidewater_type']
7✔
2707
        if user == 1:
7✔
2708
            sel = ['Marine-terminating']
×
2709
        elif user == 2:
7✔
2710
            sel = ['Marine-terminating', 'Shelf-terminating']
5✔
2711
        elif user == 3:
2✔
2712
            sel = ['Marine-terminating', 'Lake-terminating']
×
2713
        elif user == 4:
2✔
2714
            sel = ['Marine-terminating', 'Lake-terminating', 'Shelf-terminating']
2✔
2715
        else:
2716
            raise InvalidParamsError("PARAMS['tidewater_type'] not understood")
×
2717
        self.is_tidewater = self.terminus_type in sel
7✔
2718
        self.is_lake_terminating = self.terminus_type == 'Lake-terminating'
7✔
2719
        self.is_marine_terminating = self.terminus_type == 'Marine-terminating'
7✔
2720
        self.is_shelf_terminating = self.terminus_type == 'Shelf-terminating'
7✔
2721
        self.is_nominal = self.status == 'Nominal glacier'
7✔
2722
        self.inversion_calving_rate = 0.
7✔
2723
        self.is_icecap = self.glacier_type == 'Ice cap'
7✔
2724

2725
        # Hemisphere
2726
        if self.cenlat < 0 or self.rgi_region == '16':
7✔
2727
            self.hemisphere = 'sh'
×
2728
        else:
2729
            self.hemisphere = 'nh'
7✔
2730

2731
        # convert the date
2732
        rgi_date = int(rgi_datestr[0:4])
7✔
2733
        if rgi_date < 0:
7✔
2734
            rgi_date = RGI_DATE[self.rgi_region]
1✔
2735
        self.rgi_date = rgi_date
7✔
2736
        # Root directory
2737
        self.base_dir = os.path.normpath(base_dir)
7✔
2738
        self.dir = os.path.join(self.base_dir, self.rgi_id[:-6],
7✔
2739
                                self.rgi_id[:-3], self.rgi_id)
2740

2741
        # Do we have to extract the files first?
2742
        if (reset or from_tar) and os.path.exists(self.dir):
7✔
2743
            shutil.rmtree(self.dir)
4✔
2744

2745
        if from_tar:
7✔
2746
            if from_tar is True:
1✔
2747
                from_tar = self.dir + '.tar.gz'
1✔
2748
            robust_tar_extract(from_tar, self.dir, delete_tar=delete_tar)
1✔
2749
            write_shp = False
1✔
2750
        else:
2751
            mkdir(self.dir)
7✔
2752

2753
        if not os.path.isdir(self.dir):
7✔
2754
            raise RuntimeError('GlacierDirectory %s does not exist!' % self.dir)
×
2755

2756
        # logging file
2757
        self.logfile = os.path.join(self.dir, 'log.txt')
7✔
2758

2759
        if write_shp:
7✔
2760
            # Write shapefile
2761
            self._reproject_and_write_shapefile(rgi_entity)
7✔
2762

2763
        # Optimization
2764
        self._mbdf = None
7✔
2765
        self._mbprofdf = None
7✔
2766
        self._mbprofdf_cte_dh = None
7✔
2767

2768
    def __repr__(self):
2769

2770
        summary = ['<oggm.GlacierDirectory>']
2771
        summary += ['  RGI id: ' + self.rgi_id]
2772
        summary += ['  Region: ' + self.rgi_region_name]
2773
        summary += ['  Subregion: ' + self.rgi_subregion_name]
2774
        if self.name:
2775
            summary += ['  Name: ' + self.name]
2776
        summary += ['  Glacier type: ' + str(self.glacier_type)]
2777
        summary += ['  Terminus type: ' + str(self.terminus_type)]
2778
        summary += ['  Status: ' + str(self.status)]
2779
        summary += ['  Area: ' + str(self.rgi_area_km2) + ' km2']
2780
        summary += ['  Lon, Lat: (' + str(self.cenlon) + ', ' +
2781
                    str(self.cenlat) + ')']
2782
        if os.path.isfile(self.get_filepath('glacier_grid')):
2783
            summary += ['  Grid (nx, ny): (' + str(self.grid.nx) + ', ' +
2784
                        str(self.grid.ny) + ')']
2785
            summary += ['  Grid (dx, dy): (' + str(self.grid.dx) + ', ' +
2786
                        str(self.grid.dy) + ')']
2787
        return '\n'.join(summary) + '\n'
2788

2789
    def _reproject_and_write_shapefile(self, entity):
9✔
2790

2791
        # Make a local glacier map
2792
        params = dict(name='tmerc', lat_0=0., lon_0=self.cenlon,
7✔
2793
                      k=0.9996, x_0=0, y_0=0, datum='WGS84')
2794
        proj4_str = "+proj={name} +lat_0={lat_0} +lon_0={lon_0} +k={k} " \
7✔
2795
                    "+x_0={x_0} +y_0={y_0} +datum={datum}".format(**params)
2796

2797
        # Reproject
2798
        proj_in = pyproj.Proj("epsg:4326", preserve_units=True)
7✔
2799
        proj_out = pyproj.Proj(proj4_str, preserve_units=True)
7✔
2800

2801
        # transform geometry to map
2802
        project = partial(transform_proj, proj_in, proj_out)
7✔
2803
        geometry = shp_trafo(project, entity['geometry'])
7✔
2804
        geometry = multipolygon_to_polygon(geometry, gdir=self)
7✔
2805

2806
        # Save transformed geometry to disk
2807
        entity = entity.copy()
7✔
2808
        entity['geometry'] = geometry
7✔
2809

2810
        # Do we want to use the RGI area or ours?
2811
        if not cfg.PARAMS['use_rgi_area']:
7✔
2812
            # Update Area
2813
            area = geometry.area * 1e-6
1✔
2814
            entity['Area'] = area
1✔
2815

2816
        # Avoid fiona bug: https://github.com/Toblerity/Fiona/issues/365
2817
        for k, s in entity.iteritems():
7✔
2818
            if type(s) in [np.int32, np.int64]:
7✔
2819
                entity[k] = int(s)
5✔
2820
        towrite = gpd.GeoDataFrame(entity).T
7✔
2821
        towrite.crs = proj4_str
7✔
2822

2823
        # Write shapefile
2824
        self.write_shapefile(towrite, 'outlines')
7✔
2825

2826
        # Also transform the intersects if necessary
2827
        gdf = cfg.PARAMS['intersects_gdf']
7✔
2828
        if len(gdf) > 0:
7✔
2829
            gdf = gdf.loc[((gdf.RGIId_1 == self.rgi_id) |
6✔
2830
                           (gdf.RGIId_2 == self.rgi_id))]
2831
            if len(gdf) > 0:
6✔
2832
                gdf = salem.transform_geopandas(gdf, to_crs=proj_out)
4✔
2833
                if hasattr(gdf.crs, 'srs'):
4✔
2834
                    # salem uses pyproj
2835
                    gdf.crs = gdf.crs.srs
4✔
2836
                self.write_shapefile(gdf, 'intersects')
4✔
2837
        else:
2838
            # Sanity check
2839
            if cfg.PARAMS['use_intersects']:
4✔
2840
                raise InvalidParamsError(
×
2841
                    'You seem to have forgotten to set the '
2842
                    'intersects file for this run. OGGM '
2843
                    'works better with such a file. If you '
2844
                    'know what your are doing, set '
2845
                    "cfg.PARAMS['use_intersects'] = False to "
2846
                    "suppress this error.")
2847

2848
    def grid_from_params(self):
9✔
2849
        """If the glacier_grid.json file is lost, reconstruct it."""
2850
        from oggm.core.gis import glacier_grid_params
1✔
2851
        utm_proj, nx, ny, ulx, uly, dx = glacier_grid_params(self)
1✔
2852
        x0y0 = (ulx+dx/2, uly-dx/2)  # To pixel center coordinates
1✔
2853
        return salem.Grid(proj=utm_proj, nxny=(nx, ny), dxdy=(dx, -dx),
1✔
2854
                          x0y0=x0y0)
2855

2856
    @lazy_property
9✔
2857
    def grid(self):
9✔
2858
        """A ``salem.Grid`` handling the georeferencing of the local grid"""
2859
        try:
7✔
2860
            return salem.Grid.from_json(self.get_filepath('glacier_grid'))
7✔
2861
        except FileNotFoundError:
1✔
2862
            raise InvalidWorkflowError('This glacier directory seems to '
1✔
2863
                                       'have lost its glacier_grid.json file.'
2864
                                       'Use .grid_from_params(), but make sure'
2865
                                       'that the PARAMS are the ones you '
2866
                                       'want.')
2867

2868
    @lazy_property
9✔
2869
    def rgi_area_km2(self):
9✔
2870
        """The glacier's RGI area (km2)."""
2871
        try:
7✔
2872
            _area = self.read_shapefile('outlines')['Area']
7✔
UNCOV
2873
        except OSError:
×
2874
            raise RuntimeError('No outlines available')
×
NEW
2875
        except KeyError:
×
2876
            # RGI V7
NEW
2877
            _area = self.read_shapefile('outlines')['area_km2']
×
2878
        return np.round(float(_area), decimals=3)
7✔
2879

2880
    @lazy_property
9✔
2881
    def intersects_ids(self):
9✔
2882
        """The glacier's intersects RGI ids."""
2883
        try:
1✔
2884
            gdf = self.read_shapefile('intersects')
1✔
2885
            ids = np.append(gdf['RGIId_1'], gdf['RGIId_2'])
1✔
2886
            ids = list(np.unique(np.sort(ids)))
1✔
2887
            ids.remove(self.rgi_id)
1✔
2888
            return ids
1✔
2889
        except OSError:
×
2890
            return []
×
2891

2892
    @lazy_property
9✔
2893
    def dem_daterange(self):
9✔
2894
        """Years in which most of the DEM data was acquired"""
2895
        source_txt = self.get_filepath('dem_source')
1✔
2896
        if os.path.isfile(source_txt):
1✔
2897
            with open(source_txt, 'r') as f:
1✔
2898
                for line in f.readlines():
1✔
2899
                    if 'Date range:' in line:
1✔
2900
                        return tuple(map(int, line.split(':')[1].split('-')))
1✔
2901
        # we did not find the information in the dem_source file
2902
        log.warning('No DEM date range specified in `dem_source.txt`')
1✔
2903
        return None
1✔
2904

2905
    @lazy_property
9✔
2906
    def dem_info(self):
9✔
2907
        """More detailed information on the acquisition of the DEM data"""
2908
        source_file = self.get_filepath('dem_source')
1✔
2909
        source_text = ''
1✔
2910
        if os.path.isfile(source_file):
1✔
2911
            with open(source_file, 'r') as f:
1✔
2912
                for line in f.readlines():
1✔
2913
                    source_text += line
1✔
2914
        else:
2915
            log.warning('No DEM source file found.')
×
2916
        return source_text
1✔
2917

2918
    @property
9✔
2919
    def rgi_area_m2(self):
9✔
2920
        """The glacier's RGI area (m2)."""
2921
        return self.rgi_area_km2 * 10**6
7✔
2922

2923
    def get_filepath(self, filename, delete=False, filesuffix='',
9✔
2924
                     _deprecation_check=True):
2925
        """Absolute path to a specific file.
2926

2927
        Parameters
2928
        ----------
2929
        filename : str
2930
            file name (must be listed in cfg.BASENAME)
2931
        delete : bool
2932
            delete the file if exists
2933
        filesuffix : str
2934
            append a suffix to the filename (useful for model runs). Note
2935
            that the BASENAME remains same.
2936

2937
        Returns
2938
        -------
2939
        The absolute path to the desired file
2940
        """
2941

2942
        if filename not in cfg.BASENAMES:
7✔
2943
            raise ValueError(filename + ' not in cfg.BASENAMES.')
×
2944

2945
        deprecated = {'climate_monthly': 'climate_historical',
7✔
2946
                      'model_run': 'model_geometry',
2947
                      }
2948
        if _deprecation_check:
7✔
2949
            for old, new in deprecated.items():
7✔
2950
                if filename == old:
7✔
2951
                    warnings.warn('Basename `{}` is deprecated and replaced by'
2✔
2952
                                  ' `{}`. Please update your code soon.'
2953
                                  ''.format(old, new), FutureWarning)
2954
                    return self.get_filepath(new, delete=delete,
2✔
2955
                                             filesuffix=filesuffix)
2956

2957
        fname = cfg.BASENAMES[filename]
7✔
2958
        if filesuffix:
7✔
2959
            fname = fname.split('.')
5✔
2960
            assert len(fname) == 2
5✔
2961
            fname = fname[0] + filesuffix + '.' + fname[1]
5✔
2962

2963
        out = os.path.join(self.dir, fname)
7✔
2964

2965
        # Deprecation cycle:
2966
        for old, new in deprecated.items():
7✔
2967
            if filename == new and not os.path.exists(out):
7✔
2968
                # For backwards compatibility, in these cases try old
2969
                if self.has_file(old, filesuffix=filesuffix,
6✔
2970
                                 _deprecation_check=False):
2971
                    return self.get_filepath(old, delete=delete,
2✔
2972
                                             filesuffix=filesuffix,
2973
                                             _deprecation_check=False)
2974

2975
        if delete and os.path.isfile(out):
7✔
2976
            os.remove(out)
1✔
2977
        return out
7✔
2978

2979
    def has_file(self, filename, filesuffix='', _deprecation_check=True):
9✔
2980
        """Checks if a file exists.
2981

2982
        Parameters
2983
        ----------
2984
        filename : str
2985
            file name (must be listed in cfg.BASENAME)
2986
        filesuffix : str
2987
            append a suffix to the filename (useful for model runs). Note
2988
            that the BASENAME remains same.
2989
        """
2990
        fp = self.get_filepath(filename, filesuffix=filesuffix,
6✔
2991
                               _deprecation_check=_deprecation_check)
2992
        if '.shp' in fp and cfg.PARAMS['use_tar_shapefiles']:
6✔
2993
            fp = fp.replace('.shp', '.tar')
6✔
2994
            if cfg.PARAMS['use_compression']:
6✔
2995
                fp += '.gz'
6✔
2996

2997
        out = os.path.exists(fp)
6✔
2998

2999
        # Deprecation cycle
3000
        if not out and (filename == 'climate_info'):
6✔
3001
            # Try pickle
3002
            out = os.path.exists(fp.replace('.json', '.pkl'))
1✔
3003
        return out
6✔
3004

3005
    def _read_deprecated_climate_info(self):
9✔
3006
        """Temporary fix for climate_info file type change."""
3007
        fp = self.get_filepath('climate_info')
6✔
3008
        if not os.path.exists(fp):
6✔
3009
            fp = fp.replace('.json', '.pkl')
6✔
3010
            if not os.path.exists(fp):
6✔
3011
                raise FileNotFoundError('No climate info file available!')
6✔
3012
            _open = gzip.open if cfg.PARAMS['use_compression'] else open
1✔
3013
            with _open(fp, 'rb') as f:
1✔
3014
                out = pickle.load(f)
1✔
3015
            return out
1✔
3016
        with open(fp, 'r') as f:
6✔
3017
            out = json.load(f)
6✔
3018
        return out
6✔
3019

3020
    def add_to_diagnostics(self, key, value):
9✔
3021
        """Write a key, value pair to the gdir's runtime diagnostics.
3022

3023
        Parameters
3024
        ----------
3025
        key : str
3026
            dict entry key
3027
        value : str or number
3028
            dict entry value
3029
        """
3030

3031
        d = self.get_diagnostics()
6✔
3032
        d[key] = value
6✔
3033
        with open(self.get_filepath('diagnostics'), 'w') as f:
6✔
3034
            json.dump(d, f)
6✔
3035

3036
    def get_diagnostics(self):
9✔
3037
        """Read the gdir's runtime diagnostics.
3038

3039
        Returns
3040
        -------
3041
        the diagnostics dict
3042
        """
3043
        # If not there, create an empty one
3044
        if not self.has_file('diagnostics'):
6✔
3045
            with open(self.get_filepath('diagnostics'), 'w') as f:
6✔
3046
                json.dump(dict(), f)
6✔
3047

3048
        # Read and return
3049
        with open(self.get_filepath('diagnostics'), 'r') as f:
6✔
3050
            out = json.load(f)
6✔
3051
        return out
6✔
3052

3053
    def read_pickle(self, filename, use_compression=None, filesuffix=''):
9✔
3054
        """Reads a pickle located in the directory.
3055

3056
        Parameters
3057
        ----------
3058
        filename : str
3059
            file name (must be listed in cfg.BASENAME)
3060
        use_compression : bool
3061
            whether or not the file ws compressed. Default is to use
3062
            cfg.PARAMS['use_compression'] for this (recommended)
3063
        filesuffix : str
3064
            append a suffix to the filename (useful for experiments).
3065

3066
        Returns
3067
        -------
3068
        An object read from the pickle
3069
        """
3070

3071
        # Some deprecations
3072
        if filename == 'climate_info':
7✔
3073
            return self._read_deprecated_climate_info()
1✔
3074

3075
        use_comp = (use_compression if use_compression is not None
7✔
3076
                    else cfg.PARAMS['use_compression'])
3077
        _open = gzip.open if use_comp else open
7✔
3078
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
3079
        with _open(fp, 'rb') as f:
7✔
3080
            out = pickle.load(f)
7✔
3081

3082
        # Some new attrs to add to old pre-processed directories
3083
        if filename == 'model_flowlines':
7✔
3084
            if getattr(out[0], 'map_trafo', None) is None:
7✔
3085
                try:
3✔
3086
                    # This may fail for very old gdirs
3087
                    grid = self.grid
3✔
3088
                except InvalidWorkflowError:
1✔
3089
                    return out
1✔
3090

3091
                # Add the trafo
3092
                trafo = partial(grid.ij_to_crs, crs=salem.wgs84)
2✔
3093
                for fl in out:
2✔
3094
                    fl.map_trafo = trafo
2✔
3095

3096
        return out
7✔
3097

3098
    def write_pickle(self, var, filename, use_compression=None, filesuffix=''):
9✔
3099
        """ Writes a variable to a pickle on disk.
3100

3101
        Parameters
3102
        ----------
3103
        var : object
3104
            the variable to write to disk
3105
        filename : str
3106
            file name (must be listed in cfg.BASENAME)
3107
        use_compression : bool
3108
            whether or not the file ws compressed. Default is to use
3109
            cfg.PARAMS['use_compression'] for this (recommended)
3110
        filesuffix : str
3111
            append a suffix to the filename (useful for experiments).
3112
        """
3113
        use_comp = (use_compression if use_compression is not None
7✔
3114
                    else cfg.PARAMS['use_compression'])
3115
        _open = gzip.open if use_comp else open
7✔
3116
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
3117
        with _open(fp, 'wb') as f:
7✔
3118
            pickle.dump(var, f, protocol=4)
7✔
3119

3120
    def read_json(self, filename, filesuffix='', allow_empty=False):
9✔
3121
        """Reads a JSON file located in the directory.
3122

3123
        Parameters
3124
        ----------
3125
        filename : str
3126
            file name (must be listed in cfg.BASENAME)
3127
        filesuffix : str
3128
            append a suffix to the filename (useful for experiments).
3129
        allow_empty : bool
3130
            if True, does not raise an error if the file is not there.
3131

3132
        Returns
3133
        -------
3134
        A dictionary read from the JSON file
3135
        """
3136

3137
        # Some deprecations
3138
        if filename == 'climate_info':
6✔
3139
            return self._read_deprecated_climate_info()
6✔
3140

3141
        fp = self.get_filepath(filename, filesuffix=filesuffix)
6✔
3142
        if allow_empty:
6✔
3143
            try:
6✔
3144
                with open(fp, 'r') as f:
6✔
3145
                    out = json.load(f)
5✔
3146
            except FileNotFoundError:
6✔
3147
                out = {}
6✔
3148
        else:
3149
            with open(fp, 'r') as f:
6✔
3150
                out = json.load(f)
6✔
3151
        return out
6✔
3152

3153
    def write_json(self, var, filename, filesuffix=''):
9✔
3154
        """ Writes a variable to a pickle on disk.
3155

3156
        Parameters
3157
        ----------
3158
        var : object
3159
            the variable to write to JSON (must be a dictionary)
3160
        filename : str
3161
            file name (must be listed in cfg.BASENAME)
3162
        filesuffix : str
3163
            append a suffix to the filename (useful for experiments).
3164
        """
3165

3166
        def np_convert(o):
6✔
3167
            if isinstance(o, np.int64):
6✔
3168
                return int(o)
6✔
3169
            raise TypeError
×
3170

3171
        fp = self.get_filepath(filename, filesuffix=filesuffix)
6✔
3172
        with open(fp, 'w') as f:
6✔
3173
            json.dump(var, f, default=np_convert)
6✔
3174

3175
    def get_climate_info(self, input_filesuffix=''):
9✔
3176
        """Convenience function handling some backwards compat aspects
3177

3178
        Parameters
3179
        ----------
3180
        input_filesuffix : str
3181
            input_filesuffix of the climate_historical that should be used.
3182
            Default is to take the climate_historical without input_filesuffix
3183
        """
3184

3185
        try:
6✔
3186
            out = self.read_json('climate_info')
6✔
3187
        except FileNotFoundError:
6✔
3188
            out = {}
6✔
3189

3190
        try:
6✔
3191
            f = self.get_filepath('climate_historical',
6✔
3192
                                  filesuffix=input_filesuffix)
3193
            with ncDataset(f) as nc:
6✔
3194
                out['baseline_climate_source'] = nc.climate_source
6✔
3195
                out['baseline_hydro_yr_0'] = nc.hydro_yr_0
6✔
3196
                out['baseline_hydro_yr_1'] = nc.hydro_yr_1
6✔
3197
        except (AttributeError, FileNotFoundError):
3✔
3198
            pass
3✔
3199

3200
        return out
6✔
3201

3202
    def read_text(self, filename, filesuffix=''):
9✔
3203
        """Reads a text file located in the directory.
3204

3205
        Parameters
3206
        ----------
3207
        filename : str
3208
            file name (must be listed in cfg.BASENAME)
3209
        filesuffix : str
3210
            append a suffix to the filename (useful for experiments).
3211

3212
        Returns
3213
        -------
3214
        the text
3215
        """
3216

3217
        fp = self.get_filepath(filename, filesuffix=filesuffix)
×
3218
        with open(fp, 'r') as f:
×
3219
            out = f.read()
×
3220
        return out
×
3221

3222
    @classmethod
9✔
3223
    def _read_shapefile_from_path(cls, fp):
9✔
3224
        if '.shp' not in fp:
7✔
3225
            raise ValueError('File ending not that of a shapefile')
×
3226

3227
        if cfg.PARAMS['use_tar_shapefiles']:
7✔
3228
            fp = 'tar://' + fp.replace('.shp', '.tar')
7✔
3229
            if cfg.PARAMS['use_compression']:
7✔
3230
                fp += '.gz'
7✔
3231

3232
        shp = gpd.read_file(fp)
7✔
3233

3234
        # .properties file is created for compressed shapefiles. github: #904
3235
        _properties = fp.replace('tar://', '') + '.properties'
7✔
3236
        if os.path.isfile(_properties):
7✔
3237
            # remove it, to keep GDir slim
3238
            os.remove(_properties)
7✔
3239

3240
        return shp
7✔
3241

3242
    def read_shapefile(self, filename, filesuffix=''):
9✔
3243
        """Reads a shapefile located in the directory.
3244

3245
        Parameters
3246
        ----------
3247
        filename : str
3248
            file name (must be listed in cfg.BASENAME)
3249
        filesuffix : str
3250
            append a suffix to the filename (useful for experiments).
3251

3252
        Returns
3253
        -------
3254
        A geopandas.DataFrame
3255
        """
3256
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
3257
        return self._read_shapefile_from_path(fp)
7✔
3258

3259
    def write_shapefile(self, var, filename, filesuffix=''):
9✔
3260
        """ Writes a variable to a shapefile on disk.
3261

3262
        Parameters
3263
        ----------
3264
        var : object
3265
            the variable to write to shapefile (must be a geopandas.DataFrame)
3266
        filename : str
3267
            file name (must be listed in cfg.BASENAME)
3268
        filesuffix : str
3269
            append a suffix to the filename (useful for experiments).
3270
        """
3271
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
3272
        _write_shape_to_disk(var, fp, to_tar=cfg.PARAMS['use_tar_shapefiles'])
7✔
3273

3274
    def write_monthly_climate_file(self, time, prcp, temp,
9✔
3275
                                   ref_pix_hgt, ref_pix_lon, ref_pix_lat, *,
3276
                                   gradient=None,
3277
                                   temp_std=None,
3278
                                   time_unit=None,
3279
                                   calendar=None,
3280
                                   source=None,
3281
                                   file_name='climate_historical',
3282
                                   filesuffix=''):
3283
        """Creates a netCDF4 file with climate data timeseries.
3284

3285
        Parameters
3286
        ----------
3287
        time : ndarray
3288
            the time array, in a format understood by netCDF4
3289
        prcp : ndarray
3290
            the precipitation array (unit: 'kg m-2 month-1')
3291
        temp : ndarray
3292
            the temperature array (unit: 'degC')
3293
        ref_pix_hgt : float
3294
            the elevation of the dataset's reference altitude
3295
            (for correction). In practice it is the same altitude as the
3296
            baseline climate.
3297
        ref_pix_lon : float
3298
            the location of the gridded data's grid point
3299
        ref_pix_lat : float
3300
            the location of the gridded data's grid point
3301
        gradient : ndarray, optional
3302
            whether to use a time varying gradient
3303
        temp_std : ndarray, optional
3304
            the daily standard deviation of temperature (useful for PyGEM)
3305
        time_unit : str
3306
            the reference time unit for your time array. This should be chosen
3307
            depending on the length of your data. The default is to choose
3308
            it ourselves based on the starting year.
3309
        calendar : str
3310
            If you use an exotic calendar (e.g. 'noleap')
3311
        source : str
3312
            the climate data source (required)
3313
        file_name : str
3314
            How to name the file
3315
        filesuffix : str
3316
            Apply a suffix to the file
3317
        """
3318

3319
        # overwrite as default
3320
        fpath = self.get_filepath(file_name, filesuffix=filesuffix)
6✔
3321
        if os.path.exists(fpath):
6✔
3322
            os.remove(fpath)
4✔
3323

3324
        if source is None:
6✔
3325
            raise InvalidParamsError('`source` kwarg is required')
×
3326

3327
        zlib = cfg.PARAMS['compress_climate_netcdf']
6✔
3328

3329
        try:
6✔
3330
            y0 = time[0].year
6✔
3331
            y1 = time[-1].year
6✔
3332
        except AttributeError:
3✔
3333
            time = pd.DatetimeIndex(time)
3✔
3334
            y0 = time[0].year
3✔
3335
            y1 = time[-1].year
3✔
3336

3337
        if time_unit is None:
6✔
3338
            # http://pandas.pydata.org/pandas-docs/stable/timeseries.html
3339
            # #timestamp-limitations
3340
            if y0 > 1800:
6✔
3341
                time_unit = 'days since 1801-01-01 00:00:00'
6✔
3342
            elif y0 >= 0:
1✔
3343
                time_unit = ('days since {:04d}-01-01 '
1✔
3344
                             '00:00:00'.format(time[0].year))
3345
            else:
3346
                raise InvalidParamsError('Time format not supported')
×
3347

3348
        with ncDataset(fpath, 'w', format='NETCDF4') as nc:
6✔
3349
            nc.ref_hgt = ref_pix_hgt
6✔
3350
            nc.ref_pix_lon = ref_pix_lon
6✔
3351
            nc.ref_pix_lat = ref_pix_lat
6✔
3352
            nc.ref_pix_dis = haversine(self.cenlon, self.cenlat,
6✔
3353
                                       ref_pix_lon, ref_pix_lat)
3354
            nc.climate_source = source
6✔
3355

3356
            # hydro_year corresponds to the last month of the data
3357
            if time[0].month == 1:
6✔
3358
                # if first_month = 1, last_month = 12, so y0 is hydro_yr_0
3359
                nc.hydro_yr_0 = y0
3✔
3360
            else:
3361
                # if first_month>1, then the last_month is in the next year,
3362
                nc.hydro_yr_0 = y0 + 1
6✔
3363
            nc.hydro_yr_1 = y1
6✔
3364

3365
            nc.createDimension('time', None)
6✔
3366

3367
            nc.author = 'OGGM'
6✔
3368
            nc.author_info = 'Open Global Glacier Model'
6✔
3369

3370
            timev = nc.createVariable('time', 'i4', ('time',))
6✔
3371

3372
            tatts = {'units': time_unit}
6✔
3373
            if calendar is None:
6✔
3374
                calendar = 'standard'
6✔
3375

3376
            tatts['calendar'] = calendar
6✔
3377
            try:
6✔
3378
                numdate = netCDF4.date2num([t for t in time], time_unit,
6✔
3379
                                           calendar=calendar)
3380
            except TypeError:
×
3381
                # numpy's broken datetime only works for us precision
3382
                time = time.astype('M8[us]').astype(datetime.datetime)
×
3383
                numdate = netCDF4.date2num(time, time_unit, calendar=calendar)
×
3384

3385
            timev.setncatts(tatts)
6✔
3386
            timev[:] = numdate
6✔
3387

3388
            v = nc.createVariable('prcp', 'f4', ('time',), zlib=zlib)
6✔
3389
            v.units = 'kg m-2'
6✔
3390
            v.long_name = 'total monthly precipitation amount'
6✔
3391

3392
            v[:] = prcp
6✔
3393

3394
            v = nc.createVariable('temp', 'f4', ('time',), zlib=zlib)
6✔
3395
            v.units = 'degC'
6✔
3396
            v.long_name = '2m temperature at height ref_hgt'
6✔
3397
            v[:] = temp
6✔
3398

3399
            if gradient is not None:
6✔
3400
                v = nc.createVariable('gradient', 'f4', ('time',), zlib=zlib)
4✔
3401
                v.units = 'degC m-1'
4✔
3402
                v.long_name = ('temperature gradient from local regression or'
4✔
3403
                               'lapserates')
3404
                v[:] = gradient
4✔
3405

3406
            if temp_std is not None:
6✔
3407
                v = nc.createVariable('temp_std', 'f4', ('time',), zlib=zlib)
3✔
3408
                v.units = 'degC'
3✔
3409
                v.long_name = 'standard deviation of daily temperatures'
3✔
3410
                v[:] = temp_std
3✔
3411

3412
    def get_inversion_flowline_hw(self):
9✔
3413
        """ Shortcut function to read the heights and widths of the glacier.
3414

3415
        Parameters
3416
        ----------
3417

3418
        Returns
3419
        -------
3420
        (height, widths) in units of m
3421
        """
3422

3423
        h = np.array([])
5✔
3424
        w = np.array([])
5✔
3425
        fls = self.read_pickle('inversion_flowlines')
5✔
3426
        for fl in fls:
5✔
3427
            w = np.append(w, fl.widths)
5✔
3428
            h = np.append(h, fl.surface_h)
5✔
3429
        return h, w * self.grid.dx
5✔
3430

3431
    def set_ref_mb_data(self, mb_df=None):
9✔
3432
        """Adds reference mass balance data to this glacier.
3433

3434
        The format should be a dataframe with the years as index and
3435
        'ANNUAL_BALANCE' as values in mm yr-1.
3436
        """
3437

3438
        if self.is_tidewater:
4✔
3439
            log.warning('You are trying to set MB data on a tidewater glacier!'
×
3440
                        ' These data will be ignored by the MB model '
3441
                        'calibration routine.')
3442

3443
        if mb_df is None:
4✔
3444

3445
            flink, mbdatadir = get_wgms_files()
4✔
3446
            c = 'RGI{}0_ID'.format(self.rgi_version[0])
4✔
3447
            wid = flink.loc[flink[c] == self.rgi_id]
4✔
3448
            if len(wid) == 0:
4✔
3449
                raise RuntimeError('Not a reference glacier!')
×
3450
            wid = wid.WGMS_ID.values[0]
4✔
3451

3452
            # file
3453
            reff = os.path.join(mbdatadir,
4✔
3454
                                'mbdata_WGMS-{:05d}.csv'.format(wid))
3455
            # list of years
3456
            mb_df = pd.read_csv(reff).set_index('YEAR')
4✔
3457

3458
        # Quality checks
3459
        if 'ANNUAL_BALANCE' not in mb_df:
4✔
3460
            raise InvalidParamsError('Need an "ANNUAL_BALANCE" column in the '
×
3461
                                     'dataframe.')
3462
        if not mb_df.index.is_integer():
4✔
3463
            raise InvalidParamsError('The index needs to be integer years')
×
3464

3465
        mb_df.index.name = 'YEAR'
4✔
3466
        self._mbdf = mb_df
4✔
3467

3468
    def get_ref_mb_data(self, y0=None, y1=None, input_filesuffix=''):
9✔
3469
        """Get the reference mb data from WGMS (for some glaciers only!).
3470

3471
        Raises an Error if it isn't a reference glacier at all.
3472

3473
        Parameters
3474
        ----------
3475
        y0 : int
3476
            override the default behavior which is to check the available
3477
            climate data (or PARAMS['ref_mb_valid_window']) and decide
3478
        y1 : int
3479
            override the default behavior which is to check the available
3480
            climate data (or PARAMS['ref_mb_valid_window']) and decide
3481
        input_filesuffix : str
3482
            input_filesuffix of the climate_historical that should be used
3483
            if y0 and y1 are not given. The default is to take the
3484
            climate_historical without input_filesuffix
3485
        """
3486

3487
        if self._mbdf is None:
4✔
3488
            self.set_ref_mb_data()
4✔
3489

3490
        # logic for period
3491
        t0, t1 = cfg.PARAMS['ref_mb_valid_window']
4✔
3492
        if t0 > 0 and y0 is None:
4✔
3493
            y0 = t0
×
3494
        if t1 > 0 and y1 is None:
4✔
3495
            y1 = t1
×
3496

3497
        if y0 is None or y1 is None:
4✔
3498
            ci = self.get_climate_info(input_filesuffix=input_filesuffix)
4✔
3499
            if 'baseline_hydro_yr_0' not in ci:
4✔
3500
                raise InvalidWorkflowError('Please process some climate data '
1✔
3501
                                           'before call')
3502
            y0 = ci['baseline_hydro_yr_0'] if y0 is None else y0
4✔
3503
            y1 = ci['baseline_hydro_yr_1'] if y1 is None else y1
4✔
3504

3505
        if len(self._mbdf) > 1:
4✔
3506
            out = self._mbdf.loc[y0:y1]
4✔
3507
        else:
3508
            # Some files are just empty
3509
            out = self._mbdf
×
3510
        return out.dropna(subset=['ANNUAL_BALANCE'])
4✔
3511

3512
    def get_ref_mb_profile(self, input_filesuffix='', constant_dh=False, obs_ratio_needed=0):
9✔
3513
        """Get the reference mb profile data from WGMS (if available!).
3514

3515
        Returns None if this glacier has no profile and an Error if it isn't
3516
        a reference glacier at all.
3517

3518
        Parameters
3519
        ----------
3520
        input_filesuffix : str
3521
            input_filesuffix of the climate_historical that should be used. The
3522
            default is to take the climate_historical without input_filesuffix
3523
        constant_dh : boolean
3524
            If set to True, it outputs the MB profiles with a constant step size
3525
            of dh=50m by using interpolation. This can be useful for comparisons
3526
            between years. Default is False which gives the raw
3527
            elevation-dependent point MB
3528
        obs_ratio_needed : float
3529
            necessary relative amount of observations per elevation band in order
3530
            to be included in the MB profile (0<=obs_ratio_needed<=1).
3531
            If obs_ratio_needed set to 0, the output shows all elevation-band
3532
            observations (default is 0).
3533
            When estimating mean MB profiles, it is advisable to set obs_ratio_needed
3534
            to 0.6. E.g. if there are in total 5 years of measurements only those elevation
3535
            bands with at least 3 years of measurements are used. If obs_ratio_needed is not
3536
            0, constant_dh has to be set to True.
3537
        """
3538

3539
        if obs_ratio_needed != 0 and constant_dh is False:
2✔
3540
            raise InvalidParamsError('If a filter is applied, you have to set'
×
3541
                                     ' constant_dh to True')
3542
        if obs_ratio_needed < 0 or obs_ratio_needed > 1:
2✔
3543
            raise InvalidParamsError('obs_ratio_needed is the ratio of necessary relative amount'
×
3544
                                     'of observations per elevation band. It has to be between'
3545
                                     '0 and 1!')
3546

3547
        if self._mbprofdf is None and not constant_dh:
2✔
3548
            flink, mbdatadir = get_wgms_files()
2✔
3549
            c = 'RGI{}0_ID'.format(self.rgi_version[0])
2✔
3550
            wid = flink.loc[flink[c] == self.rgi_id]
2✔
3551
            if len(wid) == 0:
2✔
3552
                raise RuntimeError('Not a reference glacier!')
×
3553
            wid = wid.WGMS_ID.values[0]
2✔
3554

3555
            # file
3556
            mbdatadir = os.path.join(os.path.dirname(mbdatadir), 'mb_profiles')
2✔
3557
            reff = os.path.join(mbdatadir,
2✔
3558
                                'profile_WGMS-{:05d}.csv'.format(wid))
3559
            if not os.path.exists(reff):
2✔
3560
                return None
×
3561
            # list of years
3562
            self._mbprofdf = pd.read_csv(reff, index_col=0)
2✔
3563

3564
        if self._mbprofdf_cte_dh is None and constant_dh:
2✔
3565
            flink, mbdatadir = get_wgms_files()
1✔
3566
            c = 'RGI{}0_ID'.format(self.rgi_version[0])
1✔
3567
            wid = flink.loc[flink[c] == self.rgi_id]
1✔
3568
            if len(wid) == 0:
1✔
3569
                raise RuntimeError('Not a reference glacier!')
×
3570
            wid = wid.WGMS_ID.values[0]
1✔
3571

3572
            # file
3573
            mbdatadir = os.path.join(os.path.dirname(mbdatadir), 'mb_profiles_constant_dh')
1✔
3574
            reff = os.path.join(mbdatadir,
1✔
3575
                                'profile_constant_dh_WGMS-{:05d}.csv'.format(wid))
3576
            if not os.path.exists(reff):
1✔
3577
                return None
×
3578
            # list of years
3579
            self._mbprofdf_cte_dh = pd.read_csv(reff, index_col=0)
1✔
3580

3581
        ci = self.get_climate_info(input_filesuffix=input_filesuffix)
2✔
3582
        if 'baseline_hydro_yr_0' not in ci:
2✔
3583
            raise RuntimeError('Please process some climate data before call')
×
3584
        y0 = ci['baseline_hydro_yr_0']
2✔
3585
        y1 = ci['baseline_hydro_yr_1']
2✔
3586
        if not constant_dh:
2✔
3587
            if len(self._mbprofdf) > 1:
2✔
3588
                out = self._mbprofdf.loc[y0:y1]
2✔
3589
            else:
3590
                # Some files are just empty
3591
                out = self._mbprofdf
×
3592
        else:
3593
            if len(self._mbprofdf_cte_dh) > 1:
1✔
3594
                out = self._mbprofdf_cte_dh.loc[y0:y1]
1✔
3595
                if obs_ratio_needed != 0:
1✔
3596
                    # amount of years with any observation
3597
                    n_obs = len(out.index)
1✔
3598
                    # amount of years with observations for each elevation band
3599
                    n_obs_h = out.describe().loc['count']
1✔
3600
                    # relative amount of observations per elevation band
3601
                    rel_obs_h = n_obs_h / n_obs
1✔
3602
                    # select only those elevation bands with a specific ratio
3603
                    # of years with available measurements
3604
                    out = out[rel_obs_h[rel_obs_h >= obs_ratio_needed].index]
1✔
3605

3606
            else:
3607
                # Some files are just empty
3608
                out = self._mbprofdf_cte_dh
×
3609
        out.columns = [float(c) for c in out.columns]
2✔
3610
        return out.dropna(axis=1, how='all').dropna(axis=0, how='all')
2✔
3611

3612

3613
    def get_ref_length_data(self):
9✔
3614
        """Get the glacier length data from P. Leclercq's data base.
3615

3616
         https://folk.uio.no/paulwl/data.php
3617

3618
         For some glaciers only!
3619
         """
3620

3621
        df = pd.read_csv(get_demo_file('rgi_leclercq_links_2012_RGIV5.csv'))
1✔
3622
        df = df.loc[df.RGI_ID == self.rgi_id]
1✔
3623
        if len(df) == 0:
1✔
3624
            raise RuntimeError('No length data found for this glacier!')
×
3625
        ide = df.LID.values[0]
1✔
3626

3627
        f = get_demo_file('Glacier_Lengths_Leclercq.nc')
1✔
3628
        with xr.open_dataset(f) as dsg:
1✔
3629
            # The database is not sorted by ID. Don't ask me...
3630
            grp_id = np.argwhere(dsg['index'].values == ide)[0][0] + 1
1✔
3631
        with xr.open_dataset(f, group=str(grp_id)) as ds:
1✔
3632
            df = ds.to_dataframe()
1✔
3633
            df.name = ds.glacier_name
1✔
3634
        return df
1✔
3635

3636
    def log(self, task_name, *, err=None, task_time=None):
9✔
3637
        """Logs a message to the glacier directory.
3638

3639
        It is usually called by the :py:class:`entity_task` decorator, normally
3640
        you shouldn't take care about that.
3641

3642
        Parameters
3643
        ----------
3644
        func : a function
3645
            the function which wants to log
3646
        err : Exception
3647
            the exception which has been raised by func (if no exception was
3648
            raised, a success is logged)
3649
        time : float
3650
            the time (in seconds) that the task needed to run
3651
        """
3652

3653
        # a line per function call
3654
        nowsrt = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
7✔
3655
        line = nowsrt + ';' + task_name + ';'
7✔
3656

3657
        if task_time is not None:
7✔
3658
            line += 'time:{};'.format(task_time)
7✔
3659

3660
        if err is None:
7✔
3661
            line += 'SUCCESS'
7✔
3662
        else:
3663
            line += err.__class__.__name__ + ': {}'.format(err)\
5✔
3664

3665
        line = line.replace('\n', ' ')
7✔
3666

3667
        count = 0
7✔
3668
        while count < 5:
7✔
3669
            try:
7✔
3670
                with open(self.logfile, 'a') as logfile:
7✔
3671
                    logfile.write(line + '\n')
7✔
3672
                break
7✔
3673
            except FileNotFoundError:
×
3674
                # I really don't know when this error happens
3675
                # In this case sleep and try again
3676
                time.sleep(0.05)
×
3677
                count += 1
×
3678

3679
        if count == 5:
7✔
3680
            log.warning('Could not write to logfile: ' + line)
×
3681

3682
    def get_task_status(self, task_name):
9✔
3683
        """Opens this directory's log file to check for a task's outcome.
3684

3685
        Parameters
3686
        ----------
3687
        task_name : str
3688
            the name of the task which has to be tested for
3689

3690
        Returns
3691
        -------
3692
        The last message for this task (SUCCESS if was successful),
3693
        None if the task was not run yet
3694
        """
3695

3696
        if not os.path.isfile(self.logfile):
7✔
3697
            return None
7✔
3698

3699
        with open(self.logfile) as logfile:
7✔
3700
            lines = logfile.readlines()
7✔
3701

3702
        lines = [l.replace('\n', '') for l in lines
7✔
3703
                 if ';' in l and (task_name == l.split(';')[1])]
3704
        if lines:
7✔
3705
            # keep only the last log
3706
            return lines[-1].split(';')[-1]
7✔
3707
        else:
3708
            return None
6✔
3709

3710
    def get_task_time(self, task_name):
9✔
3711
        """Opens this directory's log file to check for a task's run time.
3712

3713
        Parameters
3714
        ----------
3715
        task_name : str
3716
            the name of the task which has to be tested for
3717

3718
        Returns
3719
        -------
3720
        The timing that the last call of this task needed.
3721
        None if the task was not run yet, or if it errored
3722
        """
3723

3724
        if not os.path.isfile(self.logfile):
1✔
3725
            return None
×
3726

3727
        with open(self.logfile) as logfile:
1✔
3728
            lines = logfile.readlines()
1✔
3729

3730
        lines = [l.replace('\n', '') for l in lines
1✔
3731
                 if task_name == l.split(';')[1]]
3732
        if lines:
1✔
3733
            line = lines[-1]
1✔
3734
            # Last log is message
3735
            if 'ERROR' in line.split(';')[-1] or 'time:' not in line:
1✔
3736
                return None
1✔
3737
            # Get the time
3738
            return float(line.split('time:')[-1].split(';')[0])
1✔
3739
        else:
3740
            return None
1✔
3741

3742
    def get_error_log(self):
9✔
3743
        """Reads the directory's log file to find the invalid task (if any).
3744

3745
        Returns
3746
        -------
3747
        The first error message in this log, None if all good
3748
        """
3749

3750
        if not os.path.isfile(self.logfile):
5✔
3751
            return None
1✔
3752

3753
        with open(self.logfile) as logfile:
5✔
3754
            lines = logfile.readlines()
5✔
3755

3756
        for l in lines:
5✔
3757
            if 'SUCCESS' in l:
5✔
3758
                continue
5✔
3759
            return l.replace('\n', '')
2✔
3760

3761
        # OK all good
3762
        return None
5✔
3763

3764

3765
@entity_task(log)
9✔
3766
def copy_to_basedir(gdir, base_dir=None, setup='run'):
9✔
3767
    """Copies the glacier directories and their content to a new location.
3768

3769
    This utility function allows to select certain files only, thus
3770
    saving time at copy.
3771

3772
    Parameters
3773
    ----------
3774
    gdir : :py:class:`oggm.GlacierDirectory`
3775
        the glacier directory to copy
3776
    base_dir : str
3777
        path to the new base directory (should end with "per_glacier" most
3778
        of the times)
3779
    setup : str
3780
        set up you want the copied directory to be useful for. Currently
3781
        supported are 'all' (copy the entire directory), 'inversion'
3782
        (copy the necessary files for the inversion AND the run)
3783
        , 'run' (copy the necessary files for a dynamical run) or 'run/spinup'
3784
        (copy the necessary files and all already conducted model runs, e.g.
3785
        from a dynamic spinup).
3786

3787
    Returns
3788
    -------
3789
    New glacier directories from the copied folders
3790
    """
3791
    base_dir = os.path.abspath(base_dir)
2✔
3792
    new_dir = os.path.join(base_dir, gdir.rgi_id[:8], gdir.rgi_id[:11],
2✔
3793
                           gdir.rgi_id)
3794
    if setup == 'run':
2✔
3795
        paths = ['model_flowlines', 'inversion_params', 'outlines',
1✔
3796
                 'local_mustar', 'climate_historical', 'glacier_grid',
3797
                 'gcm_data', 'climate_info', 'diagnostics', 'log']
3798
        paths = ('*' + p + '*' for p in paths)
1✔
3799
        shutil.copytree(gdir.dir, new_dir,
1✔
3800
                        ignore=include_patterns(*paths))
3801
    elif setup == 'inversion':
2✔
3802
        paths = ['inversion_params', 'downstream_line', 'outlines',
1✔
3803
                 'inversion_flowlines', 'glacier_grid', 'diagnostics',
3804
                 'local_mustar', 'climate_historical', 'gridded_data',
3805
                 'gcm_data', 'climate_info', 'log']
3806
        paths = ('*' + p + '*' for p in paths)
1✔
3807
        shutil.copytree(gdir.dir, new_dir,
1✔
3808
                        ignore=include_patterns(*paths))
3809
    elif setup == 'run/spinup':
2✔
3810
        paths = ['model_flowlines', 'inversion_params', 'outlines',
1✔
3811
                 'local_mustar', 'climate_historical', 'glacier_grid',
3812
                 'gcm_data', 'climate_info', 'diagnostics', 'log', 'model_run',
3813
                 'model_diagnostics', 'model_geometry']
3814
        paths = ('*' + p + '*' for p in paths)
1✔
3815
        shutil.copytree(gdir.dir, new_dir,
1✔
3816
                        ignore=include_patterns(*paths))
3817
    elif setup == 'all':
2✔
3818
        shutil.copytree(gdir.dir, new_dir)
2✔
3819
    else:
3820
        raise ValueError('setup not understood: {}'.format(setup))
×
3821
    return GlacierDirectory(gdir.rgi_id, base_dir=base_dir)
2✔
3822

3823

3824
def initialize_merged_gdir(main, tribs=[], glcdf=None,
9✔
3825
                           filename='climate_historical',
3826
                           input_filesuffix='',
3827
                           dem_source=None):
3828
    """Creates a new GlacierDirectory if tributaries are merged to a glacier
3829

3830
    This function should be called after centerlines.intersect_downstream_lines
3831
    and before flowline.merge_tributary_flowlines.
3832
    It will create a new GlacierDirectory, with a suitable DEM and reproject
3833
    the flowlines of the main glacier.
3834

3835
    Parameters
3836
    ----------
3837
    main : oggm.GlacierDirectory
3838
        the main glacier
3839
    tribs : list or dictionary containing oggm.GlacierDirectories
3840
        true tributary glaciers to the main glacier
3841
    glcdf: geopandas.GeoDataFrame
3842
        which contains the main glacier, will be downloaded if None
3843
    filename: str
3844
        Baseline climate file
3845
    input_filesuffix: str
3846
        Filesuffix to the climate file
3847
    dem_source: str
3848
        the DEM source to use
3849
    Returns
3850
    -------
3851
    merged : oggm.GlacierDirectory
3852
        the new GDir
3853
    """
3854
    from oggm.core.gis import define_glacier_region, merged_glacier_masks
1✔
3855

3856
    # If its a dict, select the relevant ones
3857
    if isinstance(tribs, dict):
1✔
3858
        tribs = tribs[main.rgi_id]
×
3859
    # make sure tributaries are iterable
3860
    tribs = tolist(tribs)
1✔
3861

3862
    # read flowlines of the Main glacier
3863
    mfls = main.read_pickle('model_flowlines')
1✔
3864

3865
    # ------------------------------
3866
    # 0. create the new GlacierDirectory from main glaciers GeoDataFrame
3867
    # Should be passed along, if not download it
3868
    if glcdf is None:
1✔
3869
        glcdf = get_rgi_glacier_entities([main.rgi_id])
×
3870
    # Get index location of the specific glacier
3871
    idx = glcdf.loc[glcdf.RGIId == main.rgi_id].index
1✔
3872
    maindf = glcdf.loc[idx].copy()
1✔
3873

3874
    # add tributary geometries to maindf
3875
    merged_geometry = maindf.loc[idx, 'geometry'].iloc[0].buffer(0)
1✔
3876
    for trib in tribs:
1✔
3877
        geom = trib.read_pickle('geometries')['polygon_hr']
1✔
3878
        geom = salem.transform_geometry(geom, crs=trib.grid)
1✔
3879
        merged_geometry = merged_geometry.union(geom).buffer(0)
1✔
3880

3881
    # to get the center point, maximal extensions for DEM and single Polygon:
3882
    new_geometry = merged_geometry.convex_hull
1✔
3883
    maindf.loc[idx, 'geometry'] = new_geometry
1✔
3884

3885
    # make some adjustments to the rgi dataframe
3886
    # 1. calculate central point of new glacier
3887
    #    reproject twice to avoid Warning, first to flat projection
3888
    flat_centroid = salem.transform_geometry(new_geometry,
1✔
3889
                                             to_crs=main.grid).centroid
3890
    #    second reprojection of centroid to wgms
3891
    new_centroid = salem.transform_geometry(flat_centroid, crs=main.grid)
1✔
3892
    maindf.loc[idx, 'CenLon'] = new_centroid.x
1✔
3893
    maindf.loc[idx, 'CenLat'] = new_centroid.y
1✔
3894
    # 2. update names
3895
    maindf.loc[idx, 'RGIId'] += '_merged'
1✔
3896
    if maindf.loc[idx, 'Name'].iloc[0] is None:
1✔
3897
        maindf.loc[idx, 'Name'] = main.name + ' (merged)'
×
3898
    else:
3899
        maindf.loc[idx, 'Name'] += ' (merged)'
1✔
3900

3901
    # finally create new Glacier Directory
3902
    # 1. set dx spacing to the one used for the flowlines
3903
    dx_method = cfg.PARAMS['grid_dx_method']
1✔
3904
    dx_spacing = cfg.PARAMS['fixed_dx']
1✔
3905
    cfg.PARAMS['grid_dx_method'] = 'fixed'
1✔
3906
    cfg.PARAMS['fixed_dx'] = mfls[-1].map_dx
1✔
3907
    merged = GlacierDirectory(maindf.loc[idx].iloc[0])
1✔
3908

3909
    # run define_glacier_region to get a fitting DEM and proper grid
3910
    define_glacier_region(merged, entity=maindf.loc[idx].iloc[0],
1✔
3911
                          source=dem_source)
3912

3913
    # write gridded data and geometries for visualization
3914
    merged_glacier_masks(merged, merged_geometry)
1✔
3915

3916
    # reset dx method
3917
    cfg.PARAMS['grid_dx_method'] = dx_method
1✔
3918
    cfg.PARAMS['fixed_dx'] = dx_spacing
1✔
3919

3920
    # copy main climate file, climate info and local_mustar to new gdir
3921
    climfilename = filename + '_' + main.rgi_id + input_filesuffix + '.nc'
1✔
3922
    climfile = os.path.join(merged.dir, climfilename)
1✔
3923
    shutil.copyfile(main.get_filepath(filename, filesuffix=input_filesuffix),
1✔
3924
                    climfile)
3925
    _mufile = os.path.basename(merged.get_filepath('local_mustar')).split('.')
1✔
3926
    mufile = _mufile[0] + '_' + main.rgi_id + '.' + _mufile[1]
1✔
3927
    shutil.copyfile(main.get_filepath('local_mustar'),
1✔
3928
                    os.path.join(merged.dir, mufile))
3929
    # I think I need the climate_info only for the main glacier
3930
    climateinfo = main.read_json('climate_info')
1✔
3931
    merged.write_json(climateinfo, 'climate_info')
1✔
3932

3933
    # reproject the flowlines to the new grid
3934
    for nr, fl in reversed(list(enumerate(mfls))):
1✔
3935

3936
        # 1. Step: Change projection to the main glaciers grid
3937
        _line = salem.transform_geometry(fl.line,
1✔
3938
                                         crs=main.grid, to_crs=merged.grid)
3939
        # 2. set new line
3940
        fl.set_line(_line)
1✔
3941

3942
        # 3. set flow to attributes
3943
        if fl.flows_to is not None:
1✔
3944
            fl.set_flows_to(fl.flows_to)
1✔
3945
        # remove inflow points, will be set by other flowlines if need be
3946
        fl.inflow_points = []
1✔
3947

3948
        # 5. set grid size attributes
3949
        dx = [shpg.Point(fl.line.coords[i]).distance(
1✔
3950
            shpg.Point(fl.line.coords[i+1]))
3951
            for i, pt in enumerate(fl.line.coords[:-1])]  # get distance
3952
        # and check if equally spaced
3953
        if not np.allclose(dx, np.mean(dx), atol=1e-2):
1✔
3954
            raise RuntimeError('Flowline is not evenly spaced.')
×
3955
        # dx might very slightly change, but should not
3956
        fl.dx = np.mean(dx).round(2)
1✔
3957
        # map_dx should stay exactly the same
3958
        # fl.map_dx = mfls[-1].map_dx
3959
        fl.dx_meter = fl.map_dx * fl.dx
1✔
3960

3961
        # replace flowline within the list
3962
        mfls[nr] = fl
1✔
3963

3964
    # Write the reprojecflowlines
3965
    merged.write_pickle(mfls, 'model_flowlines')
1✔
3966

3967
    return merged
1✔
3968

3969

3970
@entity_task(log)
9✔
3971
def gdir_to_tar(gdir, base_dir=None, delete=True):
9✔
3972
    """Writes the content of a glacier directory to a tar file.
3973

3974
    The tar file is located at the same location of the original directory.
3975
    The glacier directory objects are useless if deleted!
3976

3977
    Parameters
3978
    ----------
3979
    base_dir : str
3980
        path to the basedir where to write the directory (defaults to the
3981
        same location of the original directory)
3982
    delete : bool
3983
        delete the original directory afterwards (default)
3984

3985
    Returns
3986
    -------
3987
    the path to the tar file
3988
    """
3989

3990
    source_dir = os.path.normpath(gdir.dir)
1✔
3991
    opath = source_dir + '.tar.gz'
1✔
3992
    if base_dir is not None:
1✔
3993
        opath = os.path.join(base_dir, os.path.relpath(opath, gdir.base_dir))
1✔
3994
        mkdir(os.path.dirname(opath))
1✔
3995

3996
    with tarfile.open(opath, "w:gz") as tar:
1✔
3997
        tar.add(source_dir, arcname=os.path.basename(source_dir))
1✔
3998

3999
    if delete:
1✔
4000
        shutil.rmtree(source_dir)
1✔
4001

4002
    return opath
1✔
4003

4004

4005
def base_dir_to_tar(base_dir=None, delete=True):
9✔
4006
    """Merge the directories into 1000 bundles as tar files.
4007

4008
    The tar file is located at the same location of the original directory.
4009

4010
    Parameters
4011
    ----------
4012
    base_dir : str
4013
        path to the basedir to parse (defaults to the working directory)
4014
    to_base_dir : str
4015
        path to the basedir where to write the directory (defaults to the
4016
        same location of the original directory)
4017
    delete : bool
4018
        delete the original directory tars afterwards (default)
4019
    """
4020

4021
    if base_dir is None:
1✔
4022
        if not cfg.PATHS.get('working_dir', None):
1✔
4023
            raise ValueError("Need a valid PATHS['working_dir']!")
×
4024
        base_dir = os.path.join(cfg.PATHS['working_dir'], 'per_glacier')
1✔
4025

4026
    to_delete = []
1✔
4027
    for dirname, subdirlist, filelist in os.walk(base_dir):
1✔
4028
        # RGI60-01.00
4029
        bname = os.path.basename(dirname)
1✔
4030
        if not (len(bname) == 11 and bname[-3] == '.'):
1✔
4031
            continue
1✔
4032
        opath = dirname + '.tar'
1✔
4033
        with tarfile.open(opath, 'w') as tar:
1✔
4034
            tar.add(dirname, arcname=os.path.basename(dirname))
1✔
4035
        if delete:
1✔
4036
            to_delete.append(dirname)
1✔
4037

4038
    for dirname in to_delete:
1✔
4039
        shutil.rmtree(dirname)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc