• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pypest / pyemu / 5887625428

17 Aug 2023 06:23AM UTC coverage: 79.857% (+1.5%) from 78.319%
5887625428

push

github

briochh
Merge branch 'develop'

11386 of 14258 relevant lines covered (79.86%)

6.77 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.01
/pyemu/utils/helpers.py
1
"""High-level functions to help perform complex tasks
9✔
2

3
"""
4

5
from __future__ import print_function, division
9✔
6
import os
9✔
7
import multiprocessing as mp
9✔
8
import warnings
9✔
9
from datetime import datetime
9✔
10
import platform
9✔
11
import struct
9✔
12
import shutil
9✔
13
from ast import literal_eval
9✔
14
import traceback
9✔
15
import re
9✔
16
import numpy as np
9✔
17
import pandas as pd
9✔
18

19
pd.options.display.max_colwidth = 100
9✔
20
from ..pyemu_warnings import PyemuWarning
9✔
21

22

23
try:
9✔
24
    import flopy
9✔
25
except:
×
26
    pass
×
27

28
import pyemu
9✔
29
from pyemu.utils.os_utils import run, start_workers
9✔
30

31

32
class Trie:
9✔
33
    """Regex::Trie in Python. Creates a Trie out of a list of words. The trie can be exported to a Regex pattern.
9✔
34
    The corresponding Regex should match much faster than a simple Regex union."""
35
    # after https://gist.github.com/EricDuminil/8faabc2f3de82b24e5a371b6dc0fd1e0
36
    def __init__(self):
9✔
37
        self.data = {}
8✔
38

39
    def add(self, word):
9✔
40
        ref = self.data
8✔
41
        for char in word:
8✔
42
            ref[char] = char in ref and ref[char] or {}
8✔
43
            ref = ref[char]
8✔
44
        ref[''] = 1
8✔
45

46
    def dump(self):
9✔
47
        return self.data
8✔
48

49
    def quote(self, char):
9✔
50
        return re.escape(char)
8✔
51

52
    def _pattern(self, pData):
9✔
53
        data = pData
8✔
54
        if "" in data and len(data.keys()) == 1:
8✔
55
            return None
8✔
56

57
        alt = []
8✔
58
        cc = []
8✔
59
        q = 0
8✔
60
        for char in sorted(data.keys()):
8✔
61
            if isinstance(data[char], dict):
8✔
62
                try:
8✔
63
                    recurse = self._pattern(data[char])
8✔
64
                    alt.append(self.quote(char) + recurse)
8✔
65
                except:
8✔
66
                    cc.append(self.quote(char))
8✔
67
            else:
68
                q = 1
8✔
69
        cconly = not len(alt) > 0
8✔
70

71
        if len(cc) > 0:
8✔
72
            if len(cc) == 1:
8✔
73
                alt.append(cc[0])
8✔
74
            else:
75
                alt.append('[' + ''.join(cc) + ']')
8✔
76

77
        if len(alt) == 1:
8✔
78
            result = alt[0]
8✔
79
        else:
80
            result = "(?:" + "|".join(alt) + ")"
8✔
81

82
        if q:
8✔
83
            if cconly:
8✔
84
                result += "?"
8✔
85
            else:
86
                result = "(?:%s)?" % result
8✔
87
        return result
8✔
88

89
    def pattern(self):
9✔
90
        return self._pattern(self.dump())
8✔
91

92
def autocorrelated_draw(pst,struct_dict,time_distance_col="distance",num_reals=100,verbose=True,
9✔
93
                        enforce_bounds=False, draw_ineq=False):
94
    """construct an autocorrelated observation noise ensemble from covariance matrices
95
        implied by geostatistical structure(s).
96

97
        Args:
98
            pst (`pyemu.Pst`): a control file (or the name of control file).  The
99
                information in the `* observation data` dataframe is used extensively,
100
                including weight, standard_deviation (if present), upper_bound/lower_bound (if present).
101
            time_distance_col (str): the column in `* observation_data` that represents the distance in time
102
            for each observation listed in `struct_dict`
103

104
            struct_dict (`dict`): a dict of GeoStruct (or structure file), and list of
105
                observation names.
106
            num_reals (`int`, optional): number of realizations to draw.  Default is 100
107

108
            verbose (`bool`, optional): flag to control output to stdout.  Default is True.
109
                flag for stdout.
110
            enforce_bounds (`bool`, optional): flag to enforce `lower_bound` and `upper_bound` if
111
                these are present in `* observation data`.  Default is False
112
            draw_ineq (`bool`, optional): flag to generate noise realizations for inequality observations.
113
                If False, noise will not be added inequality observations in the ensemble.  Default is False
114

115

116
        Returns
117
            **pyemu.ObservationEnsemble**: the realized noise ensemble added to the observation values in the
118
                control file.
119

120
        Note:
121
            The variance of each observation is used to scale the resulting geostatistical
122
            covariance matrix (as defined by the weight or optional standard deviation.
123
            Therefore, the sill of the geostatistical structures
124
            in `struct_dict` should be 1.0
125

126
        Example::
127

128
            pst = pyemu.Pst("my.pst")
129
            #assuming there is only one timeseries of observations
130
            # and they are spaced one time unit apart
131
            pst.observation_data.loc[:,"distance"] = np.arange(pst.nobs)
132
            v = pyemu.geostats.ExpVario(a=10) #units of `a` are time units
133
            gs = pyemu.geostats.Geostruct(variograms=v)
134
            sd = {gs:["obs1","obs2",""obs3]}
135
            oe = pyemu.helpers.autocorrelated_draws(pst,struct_dict=sd}
136
            oe.to_csv("my_oe.csv")
137

138

139
        """
140

141
    #check that the required time metadata is appropriate
142
    passed_names = []
8✔
143
    nz_names = pst.nnz_obs_names
8✔
144
    [passed_names.extend(obs) for gs,obs in struct_dict.items()]
8✔
145
    missing = list(set(passed_names) - set(nz_names))
8✔
146
    if len(missing) > 0:
8✔
147
        raise Exception("the following obs in struct_dict were not found in the nz obs names"+str(missing))
×
148
    obs = pst.observation_data
8✔
149
    if time_distance_col not in obs.columns:
8✔
150
        raise Exception("time_distance_col missing")
×
151
    dvals = obs.loc[passed_names,time_distance_col]
8✔
152
    pobs = obs.loc[passed_names,:]
8✔
153
    isna = pobs.loc[pd.isna(dvals),"obsnme"]
8✔
154
    if isna.shape[0] > 0:
8✔
155
        raise Exception("the following struct dict observations have NaN for time_distance_col: {0}".format(str(isna)))
×
156
    if verbose:
8✔
157
        print("--> getting full diagonal cov matrix")
8✔
158
    fcov = pyemu.Cov.from_observation_data(pst)
8✔
159
    fcov_dict = {o:np.sqrt(fcov.x[i]) for i,o in enumerate(fcov.names)}
8✔
160
    if verbose:
8✔
161
        print("-->draw full obs en from diagonal cov")
8✔
162
    full_oe = pyemu.ObservationEnsemble.from_gaussian_draw(pst,fcov,num_reals=num_reals,fill=True)
8✔
163
    keys = list(struct_dict.keys())
8✔
164
    keys.sort()
8✔
165
    #for gs,onames in struct_dict.items():
166
    for gs in keys:
8✔
167
        onames = struct_dict[gs]
8✔
168
        if verbose:
8✔
169
            print("-->processing cov matrix for {0} items with gs {1}".format(len(onames),gs))
8✔
170
        dvals = obs.loc[onames,time_distance_col].values
8✔
171
        gcov = gs.covariance_matrix(dvals,np.ones(len(onames)),names=onames)
8✔
172
        if verbose:
8✔
173
            print("...scaling rows and cols")
8✔
174
        for i,name in enumerate(gcov.names):
8✔
175
            gcov.x[:,i] *= fcov_dict[name]
8✔
176
            gcov.x[i, :] *= fcov_dict[name]
8✔
177
        if verbose:
8✔
178
            print("...draw")
8✔
179
        oe = pyemu.ObservationEnsemble.from_gaussian_draw(pst,gcov,num_reals=num_reals,fill=False,by_groups=False)
8✔
180
        oe = oe.loc[:,gcov.names]
8✔
181
        full_oe.loc[:,gcov.names] = oe._df.values
8✔
182

183
    if enforce_bounds:
8✔
184
        if verbose:
8✔
185
            print("-->enforcing bounds")
8✔
186
        ub_dict = {o:1e300 for o in full_oe.columns}
8✔
187
        if "upper_bound" in pst.observation_data.columns:
8✔
188
            ub_dict.update(pst.observation_data.upper_bound.fillna(1.0e300).to_dict())
8✔
189

190
        lb_dict = {o:-1e300 for o in full_oe.columns}
8✔
191
        if "lower_bound" in pst.observation_data.columns:
8✔
192
            lb_dict.update(pst.observation_data.lower_bound.fillna(-1.0e300).to_dict())
8✔
193
        allvals = full_oe.values
8✔
194
        for i,name in enumerate(full_oe.columns):
8✔
195
            #print("before:",name,ub_dict[name],full_oe.loc[:,name].max(),lb_dict[name],full_oe.loc[:,name].min())
196
            #vals = full_oe.loc[:,name].values
197
            vals = allvals[:,i]
8✔
198
            vals[vals>ub_dict[name]] = ub_dict[name]
8✔
199
            vals[vals < lb_dict[name]] = lb_dict[name]
8✔
200
            #full_oe.loc[:,name] = vals#oe.loc[:,name].apply(lambda x: min(x,ub_dict[name])).apply(lambda x: max(x,lb_dict[name]))
201
            allvals[:,i] = vals
8✔
202
            #print("...after:", name, ub_dict[name],full_oe.loc[:, name].max(),  lb_dict[name], full_oe.loc[:, name].min(), )
203

204
    if not draw_ineq:
8✔
205
        obs = pst.observation_data
8✔
206
        #lt_tags = pst.get_constraint_tags("lt")
207
        #lt_onames = [oname for oname,ogrp in zip(obs.obsnme,obs.obgnme) if True in [True if str(ogrp).startswith(tag) else False for tag in lt_tags]  ]
208
        lt_onames = pst.less_than_obs_constraints.to_list()
8✔
209
        if verbose:
8✔
210
            print("--> less than ineq obs:",lt_onames)
8✔
211
        lt_dict = obs.loc[lt_onames,"obsval"].to_dict()
8✔
212
        for n,v in lt_dict.items():
8✔
213
            full_oe.loc[:,n] = v
8✔
214
        obs = pst.observation_data
8✔
215
        #gt_tags = pst.get_constraint_tags("gt")
216
        #gt_onames = [oname for oname, ogrp in zip(obs.obsnme, obs.obgnme) if
217
        #             True in [True if str(ogrp).startswith(tag) else False for tag in gt_tags]]
218
        gt_onames = pst.greater_than_obs_constraints.to_list()
8✔
219
        if verbose:
8✔
220
            print("--> greater than ineq obs:", gt_onames)
8✔
221
        gt_dict = obs.loc[gt_onames, "obsval"].to_dict()
8✔
222
        for n, v in gt_dict.items():
8✔
223
            full_oe.loc[:, n] = v
8✔
224
    return full_oe
8✔
225

226

227
def geostatistical_draws(
9✔
228
    pst, struct_dict, num_reals=100, sigma_range=4, verbose=True,
229
        scale_offset=True, subset=None
230
):
231
    """construct a parameter ensemble from a prior covariance matrix
232
    implied by geostatistical structure(s) and parameter bounds.
233

234
    Args:
235
        pst (`pyemu.Pst`): a control file (or the name of control file).  The
236
            parameter bounds in `pst` are used to define the variance of each
237
            parameter group.
238
        struct_dict (`dict`): a dict of GeoStruct (or structure file), and list of
239
            pilot point template files pairs. If the values in the dict are
240
            `pd.DataFrames`, then they must have an 'x','y', and 'parnme' column.
241
            If the filename ends in '.csv', then a pd.DataFrame is loaded,
242
            otherwise a pilot points file is loaded.
243
        num_reals (`int`, optional): number of realizations to draw.  Default is 100
244
        sigma_range (`float`): a float representing the number of standard deviations
245
            implied by parameter bounds. Default is 4.0, which implies 95% confidence parameter bounds.
246
        verbose (`bool`, optional): flag to control output to stdout.  Default is True.
247
            flag for stdout.
248
        scale_offset (`bool`,optional): flag to apply scale and offset to parameter bounds
249
            when calculating variances - this is passed through to `pyemu.Cov.from_parameter_data()`.
250
            Default is True.
251
        subset (`array-like`, optional): list, array, set or pandas index defining subset of paramters
252
            for draw.
253

254
    Returns
255
        **pyemu.ParameterEnsemble**: the realized parameter ensemble.
256

257
    Note:
258
        Parameters are realized by parameter group.
259

260
        The variance of each parameter is used to scale the resulting geostatistical
261
        covariance matrix Therefore, the sill of the geostatistical structures
262
        in `struct_dict` should be 1.0
263

264
    Example::
265

266
        pst = pyemu.Pst("my.pst")
267
        sd = {"struct.dat":["hkpp.dat.tpl","vka.dat.tpl"]}
268
        pe = pyemu.helpers.geostatistical_draws(pst,struct_dict=sd}
269
        pe.to_csv("my_pe.csv")
270

271

272
    """
273

274
    if isinstance(pst, str):
9✔
275
        pst = pyemu.Pst(pst)
8✔
276
    assert isinstance(pst, pyemu.Pst), "pst arg must be a Pst instance, not {0}".format(
9✔
277
        type(pst)
278
    )
279
    if verbose:
9✔
280
        print("building diagonal cov")
9✔
281
    if subset is not None:
9✔
282
        if subset.empty or subset.intersection(pst.par_names).empty:
9✔
283
            warnings.warn(
×
284
                "Empty subset passed to draw method, or no intersecting pars "
285
                "with pst...\nwill draw full cov", PyemuWarning
286
            )
287
            subset = None
×
288
    full_cov = pyemu.Cov.from_parameter_data(
9✔
289
        pst, sigma_range=sigma_range, scale_offset=scale_offset,
290
        subset=subset
291
    )
292
    full_cov_dict = {n: float(v) for n, v in
9✔
293
                     zip(full_cov.col_names, full_cov.x.ravel())}
294
    # par_org = pst.parameter_data.copy  # not sure about the need or function of this line? (BH)
295
    par = pst.parameter_data
9✔
296
    par_ens = []
9✔
297
    pars_in_cov = set()
9✔
298
    keys = list(struct_dict.keys())
9✔
299
    keys.sort()
9✔
300
    for gs in keys:
9✔
301
        items = struct_dict[gs]
9✔
302
        if verbose:
9✔
303
            print("processing ", gs)
9✔
304
        if isinstance(gs, str):
9✔
305
            gss = pyemu.geostats.read_struct_file(gs)
8✔
306
            if isinstance(gss, list):
8✔
307
                warnings.warn(
×
308
                    "using first geostat structure in file {0}".format(gs), PyemuWarning
309
                )
310
                gs = gss[0]
×
311
            else:
312
                gs = gss
8✔
313
        if gs.sill != 1.0:
9✔
314
            warnings.warn("GeoStruct {0} sill != 1.0 - this is bad!".format(gs.name))
8✔
315
        if not isinstance(items, list):
9✔
316
            items = [items]
8✔
317
        # items.sort()
318
        for iitem, item in enumerate(items):
9✔
319
            if isinstance(item, str):
9✔
320
                assert os.path.exists(item), "file {0} not found".format(item)
8✔
321
                if item.lower().endswith(".tpl"):
8✔
322
                    df = pyemu.pp_utils.pp_tpl_to_dataframe(item)
8✔
323
                elif item.lower.endswith(".csv"):
×
324
                    df = pd.read_csv(item)
×
325
            else:
326
                df = item
9✔
327
            if df.shape[0] < 2:
9✔
328
                continue
8✔
329
            if "pargp" in df.columns:
9✔
330
                if verbose:
9✔
331
                    print("working on pargroups {0}".format(df.pargp.unique().tolist()))
9✔
332
            for req in ["x", "y", "parnme"]:
9✔
333
                if req not in df.columns:
9✔
334
                    raise Exception("{0} is not in the columns".format(req))
×
335
            missing = df.loc[~df.parnme.isin(par.parnme), "parnme"]
9✔
336
            if len(missing) > 0:
9✔
337
                warnings.warn(
×
338
                    "the following parameters are not "
339
                    + "in the control file: {0}".format(",".join(missing)),
340
                    PyemuWarning,
341
                )
342
                df = df.loc[~df.parnme.isin(missing)]
×
343
            if df.shape[0] == 0:
9✔
344
                warnings.warn(
×
345
                    "geostatistical_draws(): empty parameter df at position {0} items for geostruct {1}, skipping...".format(
346
                        iitem, gs
347
                    )
348
                )
349
                continue
×
350
            if "zone" not in df.columns:
9✔
351
                df.loc[:, "zone"] = 1
9✔
352
            zones = df.zone.unique()
9✔
353
            aset = set(pst.adj_par_names)
9✔
354
            for zone in zones:
9✔
355
                df_zone = df.loc[df.zone == zone, :].copy()
9✔
356
                df_zone = df_zone.loc[df_zone.parnme.isin(aset), :]
9✔
357
                if df_zone.shape[0] == 0:
9✔
358
                    warnings.warn(
8✔
359
                        "all parameters in zone {0} tied and/or fixed, skipping...".format(
360
                            zone
361
                        ),
362
                        PyemuWarning,
363
                    )
364
                    continue
8✔
365

366
                # df_zone.sort_values(by="parnme",inplace=True)
367
                df_zone.sort_index(inplace=True)
9✔
368
                if verbose:
9✔
369
                    print("build cov matrix")
9✔
370
                cov = gs.covariance_matrix(df_zone.x, df_zone.y, df_zone.parnme)
9✔
371
                if verbose:
9✔
372
                    print("done")
9✔
373

374
                if verbose:
9✔
375
                    print("getting diag var cov", df_zone.shape[0])
9✔
376

377
                # tpl_var = max([full_cov_dict[pn] for pn in df_zone.parnme])
378
                if verbose:
9✔
379
                    print("scaling full cov by diag var cov")
9✔
380

381
                # for i in range(cov.shape[0]):
382
                #     cov.x[i, :] *= tpl_var
383
                for i, name in enumerate(cov.row_names):
9✔
384
                    # print(name,full_cov_dict[name])
385
                    cov.x[:, i] *= np.sqrt(full_cov_dict[name])
9✔
386
                    cov.x[i, :] *= np.sqrt(full_cov_dict[name])
9✔
387
                    cov.x[i, i] = full_cov_dict[name]
9✔
388
                # no fixed values here
389
                pe = pyemu.ParameterEnsemble.from_gaussian_draw(
9✔
390
                    pst=pst, cov=cov, num_reals=num_reals, by_groups=False, fill=False
391
                )
392
                par_ens.append(pe._df)
9✔
393
                pars_in_cov.update(set(pe.columns))
9✔
394

395
    if verbose:
9✔
396
        print("adding remaining parameters to diagonal")
9✔
397
    fset = set(full_cov.row_names)
9✔
398
    diff = list(fset.difference(pars_in_cov))
9✔
399
    diff.sort()
9✔
400
    if len(diff) > 0:
9✔
401
        name_dict = {name: i for i, name in enumerate(full_cov.row_names)}
9✔
402
        vec = np.atleast_2d(np.array([full_cov.x[name_dict[d]] for d in diff]))
9✔
403
        cov = pyemu.Cov(x=vec, names=diff, isdiagonal=True)
9✔
404
        # cov = full_cov.get(diff,diff)
405
        # here we fill in the fixed values
406
        pe = pyemu.ParameterEnsemble.from_gaussian_draw(
9✔
407
            pst, cov, num_reals=num_reals, fill=False
408
        )
409
        par_ens.append(pe._df)
9✔
410
    par_ens = pd.concat(par_ens, axis=1)
9✔
411
    par_ens = pyemu.ParameterEnsemble(pst=pst, df=par_ens)
9✔
412
    return par_ens
9✔
413

414

415
def geostatistical_prior_builder(
9✔
416
    pst, struct_dict, sigma_range=4, verbose=False, scale_offset=False
417
):
418
    """construct a full prior covariance matrix using geostastical structures
419
    and parameter bounds information.
420

421
    Args:
422
        pst (`pyemu.Pst`): a control file instance (or the name of control file)
423
        struct_dict (`dict`): a dict of GeoStruct (or structure file), and list of
424
            pilot point template files pairs. If the values in the dict are
425
            `pd.DataFrame` instances, then they must have an 'x','y', and 'parnme' column.
426
            If the filename ends in '.csv', then a pd.DataFrame is loaded,
427
            otherwise a pilot points file is loaded.
428
        sigma_range (`float`): a float representing the number of standard deviations
429
            implied by parameter bounds. Default is 4.0, which implies 95% confidence parameter bounds.
430
        verbose (`bool`, optional): flag to control output to stdout.  Default is True.
431
            flag for stdout.
432
        scale_offset (`bool`): a flag to apply scale and offset to parameter upper and lower bounds
433
            before applying log transform.  Passed to pyemu.Cov.from_parameter_data().  Default
434
            is False
435

436
    Returns:
437
        **pyemu.Cov**: a covariance matrix that includes all adjustable parameters in the control
438
        file.
439

440
    Note:
441
        The covariance of parameters associated with geostatistical structures is defined
442
        as a mixture of GeoStruct and bounds.  That is, the GeoStruct is used to construct a
443
        pyemu.Cov, then the rows and columns of the pyemu.Cov block are scaled by the uncertainty implied by the bounds and
444
        sigma_range. Most users will want to sill of the geostruct to sum to 1.0 so that the resulting
445
        covariance matrices have variance proportional to the parameter bounds. Sounds complicated...
446

447
        If the number of parameters exceeds about 20,000 this function may use all available memory
448
        then crash your computer.  In these high-dimensional cases, you probably dont need the prior
449
        covariance matrix itself, but rather an ensemble of paraaeter realizations.  In this case,
450
        please use the `geostatistical_draws()` function.
451

452
    Example::
453

454
        pst = pyemu.Pst("my.pst")
455
        sd = {"struct.dat":["hkpp.dat.tpl","vka.dat.tpl"]}
456
        cov = pyemu.helpers.geostatistical_prior_builder(pst,struct_dict=sd}
457
        cov.to_binary("prior.jcb")
458

459
    """
460

461
    if isinstance(pst, str):
9✔
462
        pst = pyemu.Pst(pst)
8✔
463
    assert isinstance(pst, pyemu.Pst), "pst arg must be a Pst instance, not {0}".format(
9✔
464
        type(pst)
465
    )
466
    if verbose:
9✔
467
        print("building diagonal cov")
×
468
    full_cov = pyemu.Cov.from_parameter_data(
9✔
469
        pst, sigma_range=sigma_range, scale_offset=scale_offset
470
    )
471

472
    full_cov_dict = {n: float(v)
9✔
473
                     for n, v in zip(full_cov.col_names, full_cov.x.ravel())}
474
    # full_cov = None
475
    par = pst.parameter_data
9✔
476
    for gs, items in struct_dict.items():
9✔
477
        if verbose:
9✔
478
            print("processing ", gs)
×
479
        if isinstance(gs, str):
9✔
480
            gss = pyemu.geostats.read_struct_file(gs)
8✔
481
            if isinstance(gss, list):
8✔
482
                warnings.warn(
×
483
                    "using first geostat structure in file {0}".format(gs), PyemuWarning
484
                )
485
                gs = gss[0]
×
486
            else:
487
                gs = gss
8✔
488
        if gs.sill != 1.0:
9✔
489
            warnings.warn(
8✔
490
                "geostatistical_prior_builder() warning: geostruct sill != 1.0, user beware!"
491
            )
492
        if not isinstance(items, list):
9✔
493
            items = [items]
8✔
494
        for item in items:
9✔
495
            if isinstance(item, str):
9✔
496
                assert os.path.exists(item), "file {0} not found".format(item)
8✔
497
                if item.lower().endswith(".tpl"):
8✔
498
                    df = pyemu.pp_utils.pp_tpl_to_dataframe(item)
8✔
499
                elif item.lower.endswith(".csv"):
×
500
                    df = pd.read_csv(item)
×
501
            else:
502
                df = item
9✔
503
            for req in ["x", "y", "parnme"]:
9✔
504
                if req not in df.columns:
9✔
505
                    raise Exception("{0} is not in the columns".format(req))
×
506
            missing = df.loc[df.parnme.apply(lambda x: x not in par.parnme), "parnme"]
9✔
507
            if len(missing) > 0:
9✔
508
                warnings.warn(
×
509
                    "the following parameters are not "
510
                    + "in the control file: {0}".format(",".join(missing)),
511
                    PyemuWarning,
512
                )
513
                df = df.loc[df.parnme.apply(lambda x: x not in missing)]
×
514
            if "zone" not in df.columns:
9✔
515
                df.loc[:, "zone"] = 1
9✔
516
            zones = df.zone.unique()
9✔
517
            aset = set(pst.adj_par_names)
9✔
518
            for zone in zones:
9✔
519
                df_zone = df.loc[df.zone == zone, :].copy()
9✔
520
                df_zone = df_zone.loc[df_zone.parnme.apply(lambda x: x in aset), :]
9✔
521
                if df_zone.shape[0] == 0:
9✔
522
                    warnings.warn(
8✔
523
                        "all parameters in zone {0} tied and/or fixed, skipping...".format(
524
                            zone
525
                        ),
526
                        PyemuWarning,
527
                    )
528
                    continue
8✔
529
                # df_zone.sort_values(by="parnme",inplace=True)
530
                df_zone.sort_index(inplace=True)
9✔
531
                if verbose:
9✔
532
                    print("build cov matrix")
×
533
                cov = gs.covariance_matrix(df_zone.x, df_zone.y, df_zone.parnme)
9✔
534
                if verbose:
9✔
535
                    print("done")
×
536
                # find the variance in the diagonal cov
537
                if verbose:
9✔
538
                    print("getting diag var cov", df_zone.shape[0])
×
539

540
                # tpl_var = max([full_cov_dict[pn] for pn in df_zone.parnme])
541

542
                if verbose:
9✔
543
                    print("scaling full cov by diag var cov")
×
544

545
                # cov *= tpl_var
546
                for i, name in enumerate(cov.row_names):
9✔
547
                    cov.x[:, i] *= np.sqrt(full_cov_dict[name])
9✔
548
                    cov.x[i, :] *= np.sqrt(full_cov_dict[name])
9✔
549
                    cov.x[i, i] = full_cov_dict[name]
9✔
550
                if verbose:
9✔
551
                    print("test for inversion")
×
552
                try:
9✔
553
                    ci = cov.inv
9✔
554
                except:
×
555
                    df_zone.to_csv("prior_builder_crash.csv")
×
556
                    raise Exception("error inverting cov {0}".format(cov.row_names[:3]))
×
557

558
                if verbose:
9✔
559
                    print("replace in full cov")
×
560
                full_cov.replace(cov)
9✔
561
                # d = np.diag(full_cov.x)
562
                # idx = np.argwhere(d==0.0)
563
                # for i in idx:
564
                #     print(full_cov.names[i])
565
    return full_cov
9✔
566

567

568
def _rmse(v1, v2):
9✔
569
    """return root mean squared error between v1 and v2
570

571
    Args:
572
        v1 (iterable): one vector
573
        v2 (iterable): another vector
574

575
    Returns:
576
        **float**: root mean squared error of v1,v2
577

578
    """
579

580
    return np.sqrt(np.mean(np.square(v1 - v2)))
×
581

582

583
def calc_observation_ensemble_quantiles(
9✔
584
    ens, pst, quantiles, subset_obsnames=None, subset_obsgroups=None
585
):
586
    """Given an observation ensemble, and requested quantiles, this function calculates the requested
587
       quantile point-by-point in the ensemble. This resulting set of values does not, however, correspond
588
       to a single realization in the ensemble. So, this function finds the minimum weighted squared
589
       distance to the quantile and labels it in the ensemble. Also indicates which realizations
590
       correspond to the selected quantiles.
591

592
    Args:
593
        ens (pandas DataFrame): DataFrame read from an observation
594
        pst (pyemy.Pst object) - needed to obtain observation weights
595
        quantiles (iterable): quantiles ranging from 0-1.0 for which results requested
596
        subset_obsnames (iterable): list of observation names to include in calculations
597
        subset_obsgroups (iterable): list of observation groups to include in calculations
598

599
    Returns:
600
        tuple containing
601

602
        - **pandas DataFrame**: same ens object that was input but with quantile realizations
603
                                appended as new rows labelled with 'q_#' where '#' is the slected quantile
604
        - **dict**: dictionary with keys being quantiles and values being realizations
605
                    corresponding to each realization
606
    """
607
    # TODO: handle zero weights due to PDC
608

609
    quantile_idx = {}
8✔
610
    # make sure quantiles and subset names and groups are lists
611
    if not isinstance(quantiles, list):
8✔
612
        quantiles = list(quantiles)
×
613
    if not isinstance(subset_obsnames, list) and subset_obsnames is not None:
8✔
614
        subset_obsnames = list(subset_obsnames)
×
615
    if not isinstance(subset_obsgroups, list) and subset_obsgroups is not None:
8✔
616
        subset_obsgroups = list(subset_obsgroups)
×
617

618
    if "real_name" in ens.columns:
8✔
619
        ens = ens.set_index("real_name")
8✔
620
    # if 'base' real was lost, then the index is of type int. needs to be string later so set here
621
    ens.index = [str(i) for i in ens.index]
8✔
622
    if not isinstance(pst, pyemu.Pst):
8✔
623
        raise Exception("pst object must be of type pyemu.Pst")
×
624

625
    # get the observation data
626
    obs = pst.observation_data.copy()
8✔
627

628
    # confirm that the indices and weights line up
629
    if not all(ens.columns==obs.index):
8✔
630
        raise Exception("ens and pst observation names do not align")
×
631

632
    # deal with any subsetting of observations that isn't handled through weights
633

634
    trimnames = obs.index.values
8✔
635
    if subset_obsgroups is not None and subset_obsnames is not None:
8✔
636
        raise Exception(
×
637
            "can only specify information in one of subset_obsnames of subset_obsgroups. not both"
638
        )
639

640
    if subset_obsnames is not None:
8✔
641
        trimnames = subset_obsnames
×
642
        if len(set(trimnames) - set(obs.index.values)) != 0:
×
643
            raise Exception(
×
644
                "the following names in subset_obsnames are not in the ensemble:\n"
645
                + ["{}\n".format(i) for i in (set(trimnames) - set(obs.index.values))]
646
            )
647

648
    if subset_obsgroups is not None:
8✔
649
        if len((set(subset_obsgroups) - set(pst.obs_groups))) != 0:
×
650
            raise Exception(
×
651
                "the following groups in subset_obsgroups are not in pst:\n"
652
                + [
653
                    "{}\n".format(i)
654
                    for i in (set(subset_obsgroups) - set(pst.obs_groups))
655
                ]
656
            )
657

658
        trimnames = obs.loc[obs.obgnme.isin(subset_obsgroups)].obsnme.tolist()
×
659
        if len((set(trimnames) - set(obs.index.values))) != 0:
×
660
            raise Exception(
×
661
                "the following names in subset_obsnames are not in the ensemble:\n"
662
                + ["{}\n".format(i) for i in (set(trimnames) - set(obs.index.values))]
663
            )
664
    # trim the data to subsets (or complete )
665
    ens_eval = ens.loc[:,trimnames].copy()
8✔
666
    weights = obs.loc[trimnames,:].weight.values.astype(float)
8✔
667

668
    data = {}
8✔
669
    for cq in quantiles:
8✔
670
        # calculate the point-wise quantile values
671
        qfit = np.quantile(ens_eval, cq, axis=0)
8✔
672
        # calculate the weighted distance between all reals and the desired quantile
673
        qreal = np.argmin(
8✔
674
            np.linalg.norm([(i - qfit) * weights for i in ens_eval.values], axis=1)
675
        )
676
        quantile_idx["q{}".format(cq)] = qreal
8✔
677
        #ens = ens.append(ens.iloc[qreal])
678
        #idx = ens.index.values
679
        #idx[-1] = "q{}".format(cq)
680
        #ens.set_index(idx, inplace=True)
681
        data["q{}".format(cq)] = ens.iloc[qreal]
8✔
682

683
    ens = pd.DataFrame(data=data).transpose()
8✔
684

685
    return ens, quantile_idx
8✔
686

687

688
def calc_rmse_ensemble(ens, pst, bygroups=True, subset_realizations=None):
9✔
689
    """
690
    DEPRECATED -->please see pyemu.utils.metrics.calc_metric_ensemble()
691
    Calculates RMSE (without weights) to quantify fit to observations for ensemble members
692

693
    Args:
694
        ens (pandas DataFrame): DataFrame read from an observation
695
        pst (pyemy.Pst object) - needed to obtain observation weights
696
        bygroups (Bool): Flag to summarize by groups or not. Defaults to True.
697
        subset_realizations (iterable, optional): Subset of realizations for which
698
                to report RMSE. Defaults to None which returns all realizations.
699

700
    Returns:
701
        **pandas.DataFrame**: rows are realizations. Columns are groups. Content is RMSE
702
    """
703

704
    raise Exception(
×
705
        "this is deprecated-->please see pyemu.utils.metrics.calc_metric_ensemble()"
706
    )
707

708

709
def _condition_on_par_knowledge(cov, var_knowledge_dict):
9✔
710
    """experimental function to condition a covariance matrix with the variances of new information.
711

712
    Args:
713
        cov (`pyemu.Cov`): prior covariance matrix
714
        var_knowledge_dict (`dict`): a dictionary of covariance entries and variances
715

716
    Returns:
717
        **pyemu.Cov**: the conditional covariance matrix
718

719
    """
720

721
    missing = []
8✔
722
    for name in var_knowledge_dict.keys():
8✔
723
        if name not in cov.row_names:
8✔
724
            missing.append(name)
×
725
    if len(missing):
8✔
726
        raise Exception(
×
727
            "var knowledge dict entries not found: {0}".format(",".join(missing))
728
        )
729
    if cov.isdiagonal:
8✔
730
        raise Exception(
×
731
            "_condition_on_par_knowledge(): cov is diagonal, simply update the par variances"
732
        )
733
    know_names = list(var_knowledge_dict.keys())
8✔
734
    know_names.sort()
8✔
735
    know_cross_cov = cov.get(cov.row_names, know_names)
8✔
736

737
    know_cov = cov.get(know_names, know_names)
8✔
738
    # add the par knowledge to the diagonal of know_cov
739
    for i, name in enumerate(know_names):
8✔
740
        know_cov.x[i, i] += np.squeeze(var_knowledge_dict[name])
8✔
741

742
    # kalman gain
743
    k_gain = know_cross_cov * know_cov.inv
8✔
744
    # selection matrix
745
    h = k_gain.zero2d.T
8✔
746
    know_dict = {n: i for i, n in enumerate(know_names)}
8✔
747
    for i, name in enumerate(cov.row_names):
8✔
748
        if name in know_dict:
8✔
749
            h.x[know_dict[name], i] = 1.0
8✔
750

751
    prod = k_gain * h
8✔
752
    conditional_cov = (prod.identity - prod) * cov
8✔
753
    return conditional_cov
8✔
754

755

756
def kl_setup(
9✔
757
    num_eig,
758
    sr,
759
    struct,
760
    prefixes,
761
    factors_file="kl_factors.dat",
762
    islog=True,
763
    basis_file=None,
764
    tpl_dir=".",
765
):
766
    """setup a karhuenen-Loeve based parameterization for a given
767
    geostatistical structure.
768

769
    Args:
770
        num_eig (`int`): the number of basis vectors to retain in the
771
            reduced basis
772
        sr (`flopy.reference.SpatialReference`): a spatial reference instance
773
        struct (`str`): a PEST-style structure file.  Can also be a
774
            `pyemu.geostats.Geostruct` instance.
775
        prefixes ([`str`]): a list of parameter prefixes to generate KL
776
            parameterization for.
777
        factors_file (`str`, optional): name of the PEST-style interpolation
778
            factors file to write (can be processed with FAC2REAL).
779
            Default is "kl_factors.dat".
780
        islog (`bool`, optional): flag to indicate if the parameters are log transformed.
781
            Default is True
782
        basis_file (`str`, optional): the name of the PEST-style binary (e.g. jco)
783
            file to write the reduced basis vectors to.  Default is None (not saved).
784
        tpl_dir (`str`, optional): the directory to write the resulting
785
            template files to.  Default is "." (current directory).
786

787
    Returns:
788
        **pandas.DataFrame**: a dataframe of parameter information.
789

790
    Note:
791
        This is the companion function to `helpers.apply_kl()`
792

793
    Example::
794

795
        m = flopy.modflow.Modflow.load("mymodel.nam")
796
        prefixes = ["hk","vka","ss"]
797
        df = pyemu.helpers.kl_setup(10,m.sr,"struct.dat",prefixes)
798

799
    """
800

801
    try:
8✔
802
        import flopy
8✔
803
    except Exception as e:
×
804
        raise Exception("error import flopy: {0}".format(str(e)))
×
805
    assert isinstance(sr, pyemu.helpers.SpatialReference)
8✔
806
    # for name,array in array_dict.items():
807
    #     assert isinstance(array,np.ndarray)
808
    #     assert array.shape[0] == sr.nrow
809
    #     assert array.shape[1] == sr.ncol
810
    #     assert len(name) + len(str(num_eig)) <= 12,"name too long:{0}".\
811
    #         format(name)
812

813
    if isinstance(struct, str):
8✔
814
        assert os.path.exists(struct)
8✔
815
        gs = pyemu.utils.read_struct_file(struct)
8✔
816
    else:
817
        gs = struct
×
818
    names = []
8✔
819
    for i in range(sr.nrow):
8✔
820
        names.extend(["i{0:04d}j{1:04d}".format(i, j) for j in range(sr.ncol)])
8✔
821

822
    cov = gs.covariance_matrix(
8✔
823
        sr.xcentergrid.flatten(), sr.ycentergrid.flatten(), names=names
824
    )
825

826
    eig_names = ["eig_{0:04d}".format(i) for i in range(cov.shape[0])]
8✔
827
    trunc_basis = cov.u
8✔
828
    trunc_basis.col_names = eig_names
8✔
829
    # trunc_basis.col_names = [""]
830
    if basis_file is not None:
8✔
831
        trunc_basis.to_binary(basis_file)
8✔
832
    trunc_basis = trunc_basis[:, :num_eig]
8✔
833
    eig_names = eig_names[:num_eig]
8✔
834

835
    pp_df = pd.DataFrame({"name": eig_names}, index=eig_names)
8✔
836
    pp_df.loc[:, "x"] = -1.0 * sr.ncol
8✔
837
    pp_df.loc[:, "y"] = -1.0 * sr.nrow
8✔
838
    pp_df.loc[:, "zone"] = -999
8✔
839
    pp_df.loc[:, "parval1"] = 1.0
8✔
840
    pyemu.pp_utils.write_pp_file(os.path.join("temp.dat"), pp_df)
8✔
841

842
    _eigen_basis_to_factor_file(
8✔
843
        sr.nrow, sr.ncol, trunc_basis, factors_file=factors_file, islog=islog
844
    )
845
    dfs = []
8✔
846
    for prefix in prefixes:
8✔
847
        tpl_file = os.path.join(tpl_dir, "{0}.dat_kl.tpl".format(prefix))
8✔
848
        df = pyemu.pp_utils.pilot_points_to_tpl("temp.dat", tpl_file, prefix)
8✔
849
        shutil.copy2("temp.dat", tpl_file.replace(".tpl", ""))
8✔
850
        df.loc[:, "tpl_file"] = tpl_file
8✔
851
        df.loc[:, "in_file"] = tpl_file.replace(".tpl", "")
8✔
852
        df.loc[:, "prefix"] = prefix
8✔
853
        df.loc[:, "pargp"] = "kl_{0}".format(prefix)
8✔
854
        dfs.append(df)
8✔
855
        # arr = pyemu.geostats.fac2real(df,factors_file=factors_file,out_file=None)
856
    df = pd.concat(dfs)
8✔
857
    df.loc[:, "parubnd"] = 10.0
8✔
858
    df.loc[:, "parlbnd"] = 0.1
8✔
859
    return pd.concat(dfs)
8✔
860

861
    # back_array_dict = {}
862
    # f = open(tpl_file,'w')
863
    # f.write("ptf ~\n")
864
    # f.write("name,org_val,new_val\n")
865
    # for name,array in array_dict.items():
866
    #     mname = name+"mean"
867
    #     f.write("{0},{1:20.8E},~   {2}    ~\n".format(mname,0.0,mname))
868
    #     #array -= array.mean()
869
    #     array_flat = pyemu.Matrix(x=np.atleast_2d(array.flatten()).transpose()
870
    #                               ,col_names=["flat"],row_names=names,
871
    #                               isdiagonal=False)
872
    #     factors = trunc_basis * array_flat
873
    #     enames = ["{0}{1:04d}".format(name,i) for i in range(num_eig)]
874
    #     for n,val in zip(enames,factors.x):
875
    #        f.write("{0},{1:20.8E},~    {0}    ~\n".format(n,val[0]))
876
    #     back_array_dict[name] = (factors.T * trunc_basis).x.reshape(array.shape)
877
    #     print(array_back)
878
    #     print(factors.shape)
879
    #
880
    # return back_array_dict
881

882

883
def _eigen_basis_to_factor_file(nrow, ncol, basis, factors_file, islog=True):
9✔
884
    assert nrow * ncol == basis.shape[0]
8✔
885
    with open(factors_file, "w") as f:
8✔
886
        f.write("junk.dat\n")
8✔
887
        f.write("junk.zone.dat\n")
8✔
888
        f.write("{0} {1}\n".format(ncol, nrow))
8✔
889
        f.write("{0}\n".format(basis.shape[1]))
8✔
890
        [f.write(name + "\n") for name in basis.col_names]
8✔
891
        t = 0
8✔
892
        if islog:
8✔
893
            t = 1
×
894
        for i in range(nrow * ncol):
8✔
895
            f.write("{0} {1} {2} {3:8.5e}".format(i + 1, t, basis.shape[1], 0.0))
8✔
896
            [
8✔
897
                f.write(" {0} {1:12.8g} ".format(i + 1, w))
898
                for i, w in enumerate(basis.x[i, :])
899
            ]
900
            f.write("\n")
8✔
901

902

903
def kl_apply(par_file, basis_file, par_to_file_dict, arr_shape):
9✔
904
    """Apply a KL parameterization transform from basis factors to model
905
    input arrays.
906

907
    Args:
908
        par_file (`str`): the csv file to get factor values from.  Must contain
909
            the following columns: "name", "new_val", "org_val"
910
        basis_file (`str`): the PEST-style binary file that contains the reduced
911
            basis
912
        par_to_file_dict (`dict`): a mapping from KL parameter prefixes to array
913
            file names.
914
        arr_shape (tuple): a length 2 tuple of number of rows and columns
915
            the resulting arrays should have.
916

917
        Note:
918
            This is the companion function to kl_setup.
919
            This function should be called during the forward run
920

921
    """
922
    df = pd.read_csv(par_file)
×
923
    assert "name" in df.columns
×
924
    assert "org_val" in df.columns
×
925
    assert "new_val" in df.columns
×
926

927
    df.loc[:, "prefix"] = df.name.apply(lambda x: x[:-4])
×
928
    for prefix in df.prefix.unique():
×
929
        assert prefix in par_to_file_dict.keys(), "missing prefix:{0}".format(prefix)
×
930
    basis = pyemu.Matrix.from_binary(basis_file)
×
931
    assert basis.shape[1] == arr_shape[0] * arr_shape[1]
×
932
    arr_min = 1.0e-10  # a temp hack
×
933

934
    # means = df.loc[df.name.apply(lambda x: x.endswith("mean")),:]
935
    # print(means)
936
    df = df.loc[df.name.apply(lambda x: not x.endswith("mean")), :]
×
937
    for prefix, filename in par_to_file_dict.items():
×
938
        factors = pyemu.Matrix.from_dataframe(df.loc[df.prefix == prefix, ["new_val"]])
×
939
        factors.autoalign = False
×
940
        basis_prefix = basis[: factors.shape[0], :]
×
941
        arr = (factors.T * basis_prefix).x.reshape(arr_shape)
×
942
        # arr += means.loc[means.prefix==prefix,"new_val"].values
943
        arr[arr < arr_min] = arr_min
×
944
        np.savetxt(filename, arr, fmt="%20.8E")
×
945

946

947
def zero_order_tikhonov(pst, parbounds=True, par_groups=None, reset=True):
9✔
948
    """setup preferred-value regularization in a pest control file.
949

950
    Args:
951
        pst (`pyemu.Pst`): the control file instance
952
        parbounds (`bool`, optional): flag to weight the new prior information
953
            equations according to parameter bound width - approx the KL
954
            transform. Default is True
955
        par_groups (`list`): a list of parameter groups to build PI equations for.
956
            If None, all adjustable parameters are used. Default is None
957
        reset (`bool`): a flag to remove any existing prior information equations
958
            in the control file.  Default is True
959

960
    Note:
961
        Operates in place.
962

963

964
    Example::
965

966
        pst = pyemu.Pst("my.pst")
967
        pyemu.helpers.zero_order_tikhonov(pst)
968
        pst.write("my_reg.pst")
969

970
    """
971

972
    if par_groups is None:
9✔
973
        par_groups = pst.par_groups
9✔
974

975
    pilbl, obgnme, weight, equation = [], [], [], []
9✔
976
    for idx, row in pst.parameter_data.iterrows():
9✔
977
        pt = row["partrans"].lower()
9✔
978
        try:
9✔
979
            pt = pt.decode()
9✔
980
        except:
9✔
981
            pass
9✔
982
        if pt not in ["tied", "fixed"] and row["pargp"] in par_groups:
9✔
983
            pilbl.append(row["parnme"])
9✔
984
            weight.append(1.0)
9✔
985
            ogp_name = "regul" + row["pargp"]
9✔
986
            obgnme.append(ogp_name[:12])
9✔
987
            parnme = row["parnme"]
9✔
988
            parval1 = row["parval1"]
9✔
989
            if pt == "log":
9✔
990
                parnme = "log(" + parnme + ")"
9✔
991
                parval1 = np.log10(parval1)
9✔
992
            eq = "1.0 * " + parnme + " ={0:15.6E}".format(parval1)
9✔
993
            equation.append(eq)
9✔
994

995
    if reset:
9✔
996
        pst.prior_information = pd.DataFrame(
9✔
997
            {"pilbl": pilbl, "equation": equation, "obgnme": obgnme, "weight": weight}
998
        )
999
    else:
1000
        pi = pd.DataFrame(
8✔
1001
            {"pilbl": pilbl, "equation": equation, "obgnme": obgnme, "weight": weight}
1002
        )
1003

1004
        pst.prior_information = pd.concat([pst.prior_information, pi])
8✔
1005
    if parbounds:
9✔
1006
        _regweight_from_parbound(pst)
9✔
1007
    if pst.control_data.pestmode == "estimation":
9✔
1008
        pst.control_data.pestmode = "regularization"
9✔
1009

1010

1011
def _regweight_from_parbound(pst):
9✔
1012
    """sets regularization weights from parameter bounds
1013
    which approximates the KL expansion.  Called by
1014
    zero_order_tikhonov().
1015

1016
    Args:
1017
        pst (`pyemu.Pst`): control file
1018

1019
    """
1020

1021
    pst.parameter_data.index = pst.parameter_data.parnme
9✔
1022
    pst.prior_information.index = pst.prior_information.pilbl
9✔
1023
    for idx, parnme in enumerate(pst.prior_information.pilbl):
9✔
1024
        if parnme in pst.parameter_data.index:
9✔
1025
            row = pst.parameter_data.loc[parnme, :]
9✔
1026
            lbnd, ubnd = row["parlbnd"], row["parubnd"]
9✔
1027
            if row["partrans"].lower() == "log":
9✔
1028
                weight = 1.0 / (np.log10(ubnd) - np.log10(lbnd))
9✔
1029
            else:
1030
                weight = 1.0 / (ubnd - lbnd)
×
1031
            pst.prior_information.loc[parnme, "weight"] = weight
9✔
1032
        else:
1033
            print(
×
1034
                "prior information name does not correspond"
1035
                + " to a parameter: "
1036
                + str(parnme)
1037
            )
1038

1039

1040
def first_order_pearson_tikhonov(pst, cov, reset=True, abs_drop_tol=1.0e-3):
9✔
1041
    """setup preferred-difference regularization from a covariance matrix.
1042

1043

1044
    Args:
1045
        pst (`pyemu.Pst`): the PEST control file
1046
        cov (`pyemu.Cov`): a covariance matrix instance with
1047
            some or all of the parameters listed in `pst`.
1048
        reset (`bool`): a flag to remove any existing prior information equations
1049
            in the control file.  Default is True
1050
        abs_drop_tol (`float`, optional): tolerance to control how many pi equations
1051
            are written. If the absolute value of the Pearson CC is less than
1052
            abs_drop_tol, the prior information equation will not be included in
1053
            the control file.
1054

1055
    Note:
1056
        The weights on the prior information equations are the Pearson
1057
        correlation coefficients implied by covariance matrix.
1058

1059
        Operates in place
1060

1061
    Example::
1062

1063
        pst = pyemu.Pst("my.pst")
1064
        cov = pyemu.Cov.from_ascii("my.cov")
1065
        pyemu.helpers.first_order_pearson_tikhonov(pst,cov)
1066
        pst.write("my_reg.pst")
1067

1068
    """
1069
    assert isinstance(cov, pyemu.Cov)
9✔
1070
    print("getting CC matrix")
9✔
1071
    cc_mat = cov.to_pearson()
9✔
1072
    # print(pst.parameter_data.dtypes)
1073
    try:
9✔
1074
        ptrans = pst.parameter_data.partrans.apply(lambda x: x.decode()).to_dict()
9✔
1075
    except:
9✔
1076
        ptrans = pst.parameter_data.partrans.to_dict()
9✔
1077
    pi_num = pst.prior_information.shape[0] + 1
9✔
1078
    pilbl, obgnme, weight, equation = [], [], [], []
9✔
1079
    sadj_names = set(pst.adj_par_names)
9✔
1080
    print("processing")
9✔
1081
    for i, iname in enumerate(cc_mat.row_names):
9✔
1082
        if iname not in sadj_names:
9✔
1083
            continue
×
1084
        for j, jname in enumerate(cc_mat.row_names[i + 1 :]):
9✔
1085
            if jname not in sadj_names:
9✔
1086
                continue
×
1087
            # print(i,iname,i+j+1,jname)
1088
            cc = cc_mat.x[i, j + i + 1]
9✔
1089
            if cc < abs_drop_tol:
9✔
1090
                continue
9✔
1091
            pilbl.append("pcc_{0}".format(pi_num))
9✔
1092
            iiname = str(iname)
9✔
1093
            if str(ptrans[iname]) == "log":
9✔
1094
                iiname = "log(" + iname + ")"
9✔
1095
            jjname = str(jname)
9✔
1096
            if str(ptrans[jname]) == "log":
9✔
1097
                jjname = "log(" + jname + ")"
9✔
1098
            equation.append("1.0 * {0} - 1.0 * {1} = 0.0".format(iiname, jjname))
9✔
1099
            weight.append(cc)
9✔
1100
            obgnme.append("regul_cc")
9✔
1101
            pi_num += 1
9✔
1102
    df = pd.DataFrame(
9✔
1103
        {"pilbl": pilbl, "equation": equation, "obgnme": obgnme, "weight": weight}
1104
    )
1105
    df.index = df.pilbl
9✔
1106
    if reset:
9✔
1107
        pst.prior_information = df
×
1108
    else:
1109
        pst.prior_information = pd.concat([pst.prior_information, df])
9✔
1110

1111
    if pst.control_data.pestmode == "estimation":
9✔
1112
        pst.control_data.pestmode = "regularization"
×
1113

1114

1115
def simple_tpl_from_pars(parnames, tplfilename="model.input.tpl"):
9✔
1116
    """Make a simple template file from a list of parameter names.
1117

1118
    Args:
1119
        parnames ([`str`]): list of parameter names to put in the
1120
            new template file
1121
        tplfilename (`str`): Name of the template file to create.  Default
1122
            is "model.input.tpl"
1123

1124
    Note:
1125
        Writes a file `tplfilename` with each parameter name in `parnames` on a line
1126

1127
    """
1128
    with open(tplfilename, "w") as ofp:
8✔
1129
        ofp.write("ptf ~\n")
8✔
1130
        [ofp.write("~{0:^12}~\n".format(cname)) for cname in parnames]
8✔
1131

1132

1133
def simple_ins_from_obs(obsnames, insfilename="model.output.ins"):
9✔
1134
    """write a simple instruction file that reads the values named
1135
     in obsnames in order, one per line from a model output file
1136

1137
    Args:
1138
        obsnames (`str`): list of observation names to put in the
1139
            new instruction file
1140
        insfilename (`str`): the name of the instruction file to
1141
            create. Default is "model.output.ins"
1142

1143
    Note:
1144
        writes a file `insfilename` with each observation read off
1145
        of a single line
1146

1147

1148
    """
1149
    with open(insfilename, "w") as ofp:
8✔
1150
        ofp.write("pif ~\n")
8✔
1151
        [ofp.write("l1 !{0}!\n".format(cob)) for cob in obsnames]
8✔
1152

1153

1154
def pst_from_parnames_obsnames(
9✔
1155
    parnames, obsnames, tplfilename="model.input.tpl", insfilename="model.output.ins"
1156
):
1157
    """Creates a Pst object from a list of parameter names and a list of observation names.
1158

1159
    Args:
1160
        parnames (`str`): list of parameter names
1161
        obsnames (`str`): list of observation names
1162
        tplfilename (`str`): template filename. Default is  "model.input.tpl"
1163
        insfilename (`str`): instruction filename. Default is "model.output.ins"
1164

1165
    Returns:
1166
        `pyemu.Pst`: the generic control file
1167

1168
    Example::
1169

1170
        parnames = ["p1","p2"]
1171
        obsnames = ["o1","o2"]
1172
        pst = pyemu.helpers.pst_from_parnames_obsnames(parname,obsnames)
1173

1174

1175
    """
1176
    simple_tpl_from_pars(parnames, tplfilename)
8✔
1177
    simple_ins_from_obs(obsnames, insfilename)
8✔
1178

1179
    modelinputfilename = tplfilename.replace(".tpl", "")
8✔
1180
    modeloutputfilename = insfilename.replace(".ins", "")
8✔
1181

1182
    return pyemu.Pst.from_io_files(
8✔
1183
        tplfilename, modelinputfilename, insfilename, modeloutputfilename
1184
    )
1185

1186

1187
def read_pestpp_runstorage(filename, irun=0, with_metadata=False):
9✔
1188
    """read pars and obs from a specific run in a pest++ serialized
1189
    run storage file (e.g. .rns/.rnj) into dataframes.
1190

1191
    Args:
1192
        filename (`str`): the name of the run storage file
1193
        irun (`int`): the run id to process. If 'all', then all runs are
1194
            read. Default is 0
1195
        with_metadata (`bool`): flag to return run stats and info txt as well
1196

1197
    Returns:
1198
        tuple containing
1199

1200
        - **pandas.DataFrame**: parameter information
1201
        - **pandas.DataFrame**: observation information
1202
        - **pandas.DataFrame**: optionally run status and info txt.
1203

1204
    Note:
1205

1206
        This function can save you heaps of misery of your pest++ run
1207
        died before writing output files...
1208

1209
    """
1210

1211
    header_dtype = np.dtype(
8✔
1212
        [
1213
            ("n_runs", np.int64),
1214
            ("run_size", np.int64),
1215
            ("p_name_size", np.int64),
1216
            ("o_name_size", np.int64),
1217
        ]
1218
    )
1219

1220
    try:
8✔
1221
        irun = int(irun)
8✔
1222
    except:
8✔
1223
        if irun.lower() == "all":
8✔
1224
            irun = irun.lower()
8✔
1225
        else:
1226
            raise Exception(
8✔
1227
                "unrecognized 'irun': should be int or 'all', not '{0}'".format(irun)
1228
            )
1229

1230
    def status_str(r_status):
8✔
1231
        if r_status == 0:
8✔
1232
            return "not completed"
8✔
1233
        if r_status == 1:
8✔
1234
            return "completed"
8✔
1235
        if r_status == -100:
×
1236
            return "canceled"
×
1237
        else:
1238
            return "failed"
×
1239

1240
    assert os.path.exists(filename)
8✔
1241
    f = open(filename, "rb")
8✔
1242
    header = np.fromfile(f, dtype=header_dtype, count=1)
8✔
1243
    p_name_size, o_name_size = header["p_name_size"][0], header["o_name_size"][0]
8✔
1244
    par_names = (
8✔
1245
        struct.unpack("{0}s".format(p_name_size), f.read(p_name_size))[0]
1246
        .strip()
1247
        .lower()
1248
        .decode()
1249
        .split("\0")[:-1]
1250
    )
1251
    obs_names = (
8✔
1252
        struct.unpack("{0}s".format(o_name_size), f.read(o_name_size))[0]
1253
        .strip()
1254
        .lower()
1255
        .decode()
1256
        .split("\0")[:-1]
1257
    )
1258
    n_runs, run_size = header["n_runs"][0], header["run_size"][0]
8✔
1259
    run_start = f.tell()
8✔
1260

1261
    def _read_run(irun):
8✔
1262
        f.seek(run_start + (irun * run_size))
8✔
1263
        r_status = np.fromfile(f, dtype=np.int8, count=1)
8✔
1264
        info_txt = struct.unpack("1001s", f.read(1001))[0].strip().lower().decode()
8✔
1265
        par_vals = np.fromfile(f, dtype=np.float64, count=len(par_names) + 1)[1:]
8✔
1266
        obs_vals = np.fromfile(f, dtype=np.float64, count=len(obs_names) + 1)[:-1]
8✔
1267
        par_df = pd.DataFrame({"parnme": par_names, "parval1": par_vals})
8✔
1268

1269
        par_df.index = par_df.pop("parnme")
8✔
1270
        obs_df = pd.DataFrame({"obsnme": obs_names, "obsval": obs_vals})
8✔
1271
        obs_df.index = obs_df.pop("obsnme")
8✔
1272
        return r_status, info_txt, par_df, obs_df
8✔
1273

1274
    if irun == "all":
8✔
1275
        par_dfs, obs_dfs = [], []
8✔
1276
        r_stats, txts = [], []
8✔
1277
        for irun in range(n_runs):
8✔
1278
            # print(irun)
1279
            r_status, info_txt, par_df, obs_df = _read_run(irun)
8✔
1280
            par_dfs.append(par_df)
8✔
1281
            obs_dfs.append(obs_df)
8✔
1282
            r_stats.append(r_status)
8✔
1283
            txts.append(info_txt)
8✔
1284
        par_df = pd.concat(par_dfs, axis=1).T
8✔
1285
        par_df.index = np.arange(n_runs)
8✔
1286
        obs_df = pd.concat(obs_dfs, axis=1).T
8✔
1287
        obs_df.index = np.arange(n_runs)
8✔
1288
        meta_data = pd.DataFrame({"r_status": r_stats, "info_txt": txts})
8✔
1289
        meta_data.loc[:, "status"] = meta_data.r_status.apply(status_str)
8✔
1290

1291
    else:
1292
        assert irun <= n_runs
8✔
1293
        r_status, info_txt, par_df, obs_df = _read_run(irun)
8✔
1294
        meta_data = pd.DataFrame({"r_status": [r_status], "info_txt": [info_txt]})
8✔
1295
        meta_data.loc[:, "status"] = meta_data.r_status.apply(status_str)
8✔
1296
    f.close()
8✔
1297
    if with_metadata:
8✔
1298
        return par_df, obs_df, meta_data
×
1299
    else:
1300
        return par_df, obs_df
8✔
1301

1302

1303
def jco_from_pestpp_runstorage(rnj_filename, pst_filename):
9✔
1304
    """read pars and obs from a pest++ serialized run storage
1305
    file (e.g., .rnj) and return jacobian matrix instance
1306

1307
    Args:
1308
        rnj_filename (`str`): the name of the run storage file
1309
        pst_filename (`str`): the name of the pst file
1310

1311
    Note:
1312
        This can then be passed to Jco.to_binary or Jco.to_coo, etc., to write jco
1313
        file in a subsequent step to avoid memory resource issues associated
1314
        with very large problems.
1315

1316

1317
    Returns:
1318
        `pyemu.Jco`: a jacobian matrix constructed from the run results and
1319
        pest control file information.
1320

1321
    """
1322

1323
    header_dtype = np.dtype(
8✔
1324
        [
1325
            ("n_runs", np.int64),
1326
            ("run_size", np.int64),
1327
            ("p_name_size", np.int64),
1328
            ("o_name_size", np.int64),
1329
        ]
1330
    )
1331

1332
    pst = pyemu.Pst(pst_filename)
8✔
1333
    par = pst.parameter_data
8✔
1334
    log_pars = set(par.loc[par.partrans == "log", "parnme"].values)
8✔
1335
    with open(rnj_filename, "rb") as f:
8✔
1336
        header = np.fromfile(f, dtype=header_dtype, count=1)
8✔
1337

1338
    try:
8✔
1339
        base_par, base_obs = read_pestpp_runstorage(rnj_filename, irun=0)
8✔
1340
    except:
×
1341
        raise Exception("couldn't get base run...")
×
1342
    par = par.loc[base_par.index, :]
8✔
1343
    li = base_par.index.map(lambda x: par.loc[x, "partrans"] == "log")
8✔
1344
    base_par.loc[li] = base_par.loc[li].apply(np.log10)
8✔
1345
    jco_cols = {}
8✔
1346
    for irun in range(1, int(np.squeeze(header["n_runs"]))):
8✔
1347
        par_df, obs_df = read_pestpp_runstorage(rnj_filename, irun=irun)
8✔
1348
        par_df.loc[li] = par_df.loc[li].apply(np.log10)
8✔
1349
        obs_diff = base_obs - obs_df
8✔
1350
        par_diff = base_par - par_df
8✔
1351
        # check only one non-zero element per col(par)
1352
        if len(par_diff[par_diff.parval1 != 0]) > 1:
8✔
1353
            raise Exception(
×
1354
                "more than one par diff - looks like the file wasn't created during jco filling..."
1355
            )
1356
        parnme = par_diff[par_diff.parval1 != 0].index[0]
8✔
1357
        parval = par_diff.parval1.loc[parnme]
8✔
1358

1359
        # derivatives
1360
        jco_col = obs_diff / parval
8✔
1361
        # some tracking, checks
1362
        print("processing par {0}: {1}...".format(irun, parnme))
8✔
1363
        print(
8✔
1364
            "%nzsens: {0}%...".format(
1365
                (jco_col[abs(jco_col.obsval) > 1e-8].shape[0] / jco_col.shape[0])
1366
                * 100.0
1367
            )
1368
        )
1369

1370
        jco_cols[parnme] = jco_col.obsval
8✔
1371

1372
    jco_cols = pd.DataFrame.from_records(
8✔
1373
        data=jco_cols, index=list(obs_diff.index.values)
1374
    )
1375

1376
    jco_cols = pyemu.Jco.from_dataframe(jco_cols)
8✔
1377

1378
    # write # memory considerations important here for very large matrices - break into chunks...
1379
    # jco_fnam = "{0}".format(filename[:-4]+".jco")
1380
    # jco_cols.to_binary(filename=jco_fnam, droptol=None, chunk=None)
1381

1382
    return jco_cols
8✔
1383

1384

1385
def parse_dir_for_io_files(d, prepend_path=False):
9✔
1386
    """find template/input file pairs and instruction file/output file
1387
    pairs by extension.
1388

1389
    Args:
1390
        d (`str`): directory to search for interface files
1391
        prepend_path (`bool`, optional): flag to prepend `d` to each file name.
1392
            Default is False
1393

1394
    Returns:
1395
        tuple containing
1396

1397
        - **[`str`]**: list of template files in d
1398
        - **[`str`]**: list of input files in d
1399
        - **[`str`]**: list of instruction files in d
1400
        - **[`str`]**: list of output files in d
1401

1402
    Note:
1403
        the return values from this function can be passed straight to
1404
        `pyemu.Pst.from_io_files()` classmethod constructor.
1405

1406
        Assumes the template file names are <input_file>.tpl and instruction file names
1407
        are <output_file>.ins.
1408

1409
    Example::
1410

1411
        files = pyemu.helpers.parse_dir_for_io_files("template",prepend_path=True)
1412
        pst = pyemu.Pst.from_io_files(*files,pst_path=".")
1413

1414

1415
    """
1416

1417
    files = os.listdir(d)
×
1418
    tpl_files = [f for f in files if f.endswith(".tpl")]
×
1419
    in_files = [f.replace(".tpl", "") for f in tpl_files]
×
1420
    ins_files = [f for f in files if f.endswith(".ins")]
×
1421
    out_files = [f.replace(".ins", "") for f in ins_files]
×
1422
    if prepend_path:
×
1423
        tpl_files = [os.path.join(d, item) for item in tpl_files]
×
1424
        in_files = [os.path.join(d, item) for item in in_files]
×
1425
        ins_files = [os.path.join(d, item) for item in ins_files]
×
1426
        out_files = [os.path.join(d, item) for item in out_files]
×
1427

1428
    return tpl_files, in_files, ins_files, out_files
×
1429

1430

1431
def pst_from_io_files(
9✔
1432
    tpl_files, in_files, ins_files, out_files, pst_filename=None, pst_path=None
1433
):
1434
    """create a Pst instance from model interface files.
1435

1436
    Args:
1437
        tpl_files ([`str`]): list of template file names
1438
        in_files ([`str`]): list of model input file names (pairs with template files)
1439
        ins_files ([`str`]): list of instruction file names
1440
        out_files ([`str`]): list of model output file names (pairs with instruction files)
1441
        pst_filename (`str`): name of control file to write.  If None, no file is written.
1442
            Default is None
1443
        pst_path (`str`): the path to append to the template_file and in_file in the control file.  If
1444
            not None, then any existing path in front of the template or in file is split off
1445
            and pst_path is prepended.  If python is being run in a directory other than where the control
1446
            file will reside, it is useful to pass `pst_path` as `.`.  Default is None
1447

1448

1449
    Returns:
1450
        `Pst`: new control file instance with parameter and observation names
1451
        found in `tpl_files` and `ins_files`, repsectively.
1452

1453
    Note:
1454
        calls `pyemu.helpers.pst_from_io_files()`
1455

1456
        Assigns generic values for parameter info.  Tries to use INSCHEK
1457
        to set somewhat meaningful observation values
1458

1459
        all file paths are relatively to where python is running.
1460

1461
    Example::
1462

1463
        tpl_files = ["my.tpl"]
1464
        in_files = ["my.in"]
1465
        ins_files = ["my.ins"]
1466
        out_files = ["my.out"]
1467
        pst = pyemu.Pst.from_io_files(tpl_files,in_files,ins_files,out_files)
1468
        pst.control_data.noptmax = 0
1469
        pst.write("my.pst)
1470

1471

1472

1473
    """
1474
    par_names = set()
9✔
1475
    if not isinstance(tpl_files, list):
9✔
1476
        tpl_files = [tpl_files]
8✔
1477
    if not isinstance(in_files, list):
9✔
1478
        in_files = [in_files]
8✔
1479
    assert len(in_files) == len(tpl_files), "len(in_files) != len(tpl_files)"
9✔
1480

1481
    for tpl_file in tpl_files:
9✔
1482
        assert os.path.exists(tpl_file), "template file not found: " + str(tpl_file)
9✔
1483
        # new_names = [name for name in pyemu.pst_utils.parse_tpl_file(tpl_file) if name not in par_names]
1484
        # par_names.extend(new_names)
1485
        new_names = pyemu.pst_utils.parse_tpl_file(tpl_file)
9✔
1486
        par_names.update(new_names)
9✔
1487

1488
    if not isinstance(ins_files, list):
9✔
1489
        ins_files = [ins_files]
8✔
1490
    if not isinstance(out_files, list):
9✔
1491
        out_files = [out_files]
8✔
1492
    assert len(ins_files) == len(out_files), "len(out_files) != len(out_files)"
9✔
1493

1494
    obs_names = []
9✔
1495
    for ins_file in ins_files:
9✔
1496
        assert os.path.exists(ins_file), "instruction file not found: " + str(ins_file)
9✔
1497
        obs_names.extend(pyemu.pst_utils.parse_ins_file(ins_file))
9✔
1498

1499
    new_pst = pyemu.pst_utils.generic_pst(list(par_names), list(obs_names))
9✔
1500

1501
    if "window" in platform.platform().lower() and pst_path == ".":
9✔
1502
        pst_path = ""
4✔
1503

1504
    # new_pst.instruction_files = ins_files
1505
    # new_pst.output_files = out_files
1506
    new_pst.model_output_data = pd.DataFrame(
9✔
1507
        {"pest_file": ins_files, "model_file": out_files}, index=ins_files
1508
    )
1509

1510
    # try to run inschek to find the observtion values
1511
    # do this here with full paths to files
1512
    pyemu.pst_utils.try_process_output_pst(new_pst)
9✔
1513

1514
    if pst_path is not None:
9✔
1515
        tpl_files = [
8✔
1516
            os.path.join(pst_path, os.path.split(tpl_file)[-1])
1517
            for tpl_file in tpl_files
1518
        ]
1519
        in_files = [
8✔
1520
            os.path.join(pst_path, os.path.split(in_file)[-1]) for in_file in in_files
1521
        ]
1522
        # now set the true path location to instruction files and output files
1523
        ins_files = [
8✔
1524
            os.path.join(pst_path, os.path.split(ins_file)[-1])
1525
            for ins_file in ins_files
1526
        ]
1527
        out_files = [
8✔
1528
            os.path.join(pst_path, os.path.split(out_file)[-1])
1529
            for out_file in out_files
1530
        ]
1531

1532
    new_pst.model_input_data = pd.DataFrame(
9✔
1533
        {"pest_file": tpl_files, "model_file": in_files}, index=tpl_files
1534
    )
1535

1536
    new_pst.model_output_data = pd.DataFrame(
9✔
1537
        {"pest_file": ins_files, "model_file": out_files}, index=ins_files
1538
    )
1539

1540
    new_pst.try_parse_name_metadata()
9✔
1541
    if pst_filename:
9✔
1542
        new_pst.write(pst_filename)
8✔
1543

1544
    return new_pst
9✔
1545

1546

1547
class PstFromFlopyModel(object):
9✔
1548
    """
9✔
1549
    Deprecated Class. Try `pyemu.utils.PstFrom()` instead.
1550
    A legacy version can be accessed from `pyemu.legacy`, if desperate.
1551
    """
1552

1553
    def __init__(*args, **kwargs):
9✔
1554
        dep_warn = (
×
1555
            "\n`PstFromFlopyModel()` is deprecated."
1556
            "Checkout `pyemu.utils.PstFrom()` instead.\n"
1557
            "If you really want `PstFromFlopyModel()` a legacy version "
1558
            "sits in `pyemu.legacy`"
1559
        )
1560
        raise DeprecationWarning(dep_warn)
×
1561

1562

1563
def apply_list_and_array_pars(arr_par_file="mult2model_info.csv", chunk_len=50):
9✔
1564
    """Apply multiplier parameters to list and array style model files
1565

1566
    Args:
1567
        arr_par_file (str):
1568
        chunk_len (`int`): the number of files to process per multiprocessing
1569
            chunk in appl_array_pars().  default is 50.
1570

1571
    Returns:
1572

1573
    Note:
1574
        Used to implement the parameterization constructed by
1575
        PstFrom during a forward run
1576

1577
        Should be added to the forward_run.py script; added programmatically
1578
        by `PstFrom.build_pst()`
1579
    """
1580
    df = pd.read_csv(arr_par_file, index_col=0)
9✔
1581
    if "operator" not in df.columns:
9✔
1582
        df.loc[:, "operator"] = "m"
×
1583
    df.loc[pd.isna(df.operator), "operator"] = "m"
9✔
1584
    file_cols = df.columns.values[df.columns.str.contains("file")]
9✔
1585
    for file_col in file_cols:
9✔
1586
        df.loc[:, file_col] = df.loc[:, file_col].apply(
9✔
1587
            lambda x: os.path.join(*x.replace("\\","/").split("/"))
1588
            if isinstance(x,str) else x
1589
        )
1590
    arr_pars = df.loc[df.index_cols.isna()].copy()
9✔
1591
    list_pars = df.loc[df.index_cols.notna()].copy()
9✔
1592
    # extract lists from string in input df
1593
    list_pars["index_cols"] = list_pars.index_cols.apply(literal_eval)
9✔
1594
    list_pars["use_cols"] = list_pars.use_cols.apply(literal_eval)
9✔
1595
    list_pars["lower_bound"] = list_pars.lower_bound.apply(literal_eval)
9✔
1596
    list_pars["upper_bound"] = list_pars.upper_bound.apply(literal_eval)
9✔
1597

1598
    # TODO check use_cols is always present
1599
    apply_genericlist_pars(list_pars, chunk_len=chunk_len)
9✔
1600
    apply_array_pars(arr_pars, chunk_len=chunk_len)
9✔
1601

1602

1603
def _process_chunk_fac2real(chunk, i):
9✔
1604
    for args in chunk:
9✔
1605
        pyemu.geostats.fac2real(**args)
9✔
1606
    print("process", i, " processed ", len(chunk), "fac2real calls")
9✔
1607

1608

1609
def _process_chunk_array_files(chunk, i, df):
9✔
1610
    for model_file in chunk:
9✔
1611
        _process_array_file(model_file, df)
9✔
1612
    print("process", i, " processed ", len(chunk), "process_array_file calls")
9✔
1613

1614

1615
def _process_array_file(model_file, df):
9✔
1616
    if "operator" not in df.columns:
9✔
1617
        df.loc[:, "operator"] = "m"
1✔
1618
    # find all mults that need to be applied to this array
1619
    df_mf = df.loc[df.model_file == model_file, :]
9✔
1620
    results = []
9✔
1621
    org_file = df_mf.org_file.unique()
9✔
1622
    if org_file.shape[0] != 1:
9✔
1623
        raise Exception("wrong number of org_files for {0}".format(model_file))
×
1624
    org_arr = np.loadtxt(org_file[0])
9✔
1625

1626
    if "mlt_file" in df_mf.columns:
9✔
1627
        for mlt, operator in zip(df_mf.mlt_file, df_mf.operator):
9✔
1628
            if pd.isna(mlt):
9✔
1629
                continue
8✔
1630
            mlt_data = np.loadtxt(mlt)
9✔
1631
            if org_arr.shape != mlt_data.shape:
9✔
1632
                raise Exception(
×
1633
                    "shape of org file {}:{} differs from mlt file {}:{}".format(
1634
                        org_file, org_arr.shape, mlt, mlt_data.shape
1635
                    )
1636
                )
1637
            if operator == "*" or operator.lower()[0] == "m":
9✔
1638
                org_arr *= mlt_data
9✔
1639
            elif operator == "+" or operator.lower()[0] == "a":
×
1640
                org_arr += mlt_data
×
1641
            else:
1642
                raise Exception(
×
1643
                    "unrecognized operator '{0}' for mlt file '{1}'".format(
1644
                        operator, mlt
1645
                    )
1646
                )
1647
        if "upper_bound" in df.columns:
9✔
1648
            ub_vals = df_mf.upper_bound.value_counts().dropna().to_dict()
9✔
1649
            if len(ub_vals) == 0:
9✔
1650
                pass
×
1651
            elif len(ub_vals) > 1:
9✔
1652
                print(ub_vals)
×
1653
                raise Exception("different upper bound values for {0}".format(org_file))
×
1654
            else:
1655
                ub = float(list(ub_vals.keys())[0])
9✔
1656
                org_arr[org_arr > ub] = ub
9✔
1657
        if "lower_bound" in df.columns:
9✔
1658
            lb_vals = df_mf.lower_bound.value_counts().dropna().to_dict()
9✔
1659
            if len(lb_vals) == 0:
9✔
1660
                pass
×
1661
            elif len(lb_vals) > 1:
9✔
1662
                raise Exception("different lower bound values for {0}".format(org_file))
×
1663
            else:
1664
                lb = float(list(lb_vals.keys())[0])
9✔
1665
                org_arr[org_arr < lb] = lb
9✔
1666

1667
    np.savetxt(model_file, np.atleast_2d(org_arr), fmt="%15.6E", delimiter="")
9✔
1668

1669

1670
def apply_array_pars(arr_par="arr_pars.csv", arr_par_file=None, chunk_len=50):
9✔
1671
    """a function to apply array-based multipler parameters.
1672

1673
    Args:
1674
        arr_par (`str` or `pandas.DataFrame`): if type `str`,
1675
        path to csv file detailing parameter array multipliers.
1676
            This file can be written by PstFromFlopy.
1677
        if type `pandas.DataFrame` is Dataframe with columns of
1678
        ['mlt_file', 'model_file', 'org_file'] and optionally
1679
        ['pp_file', 'fac_file'].
1680
        chunk_len (`int`) : the number of files to process per chunk
1681
            with multiprocessing - applies to both fac2real and process_
1682
            input_files. Default is 50.
1683

1684
    Note:
1685
        Used to implement the parameterization constructed by
1686
        PstFromFlopyModel during a forward run
1687

1688
        This function should be added to the forward_run.py script but can
1689
        be called on any correctly formatted csv
1690

1691
        This function using multiprocessing, spawning one process for each
1692
        model input array (and optionally pp files).  This speeds up
1693
        execution time considerably but means you need to make sure your
1694
        forward run script uses the proper multiprocessing idioms for
1695
        freeze support and main thread handling (`PstFrom` does this for you).
1696

1697
    """
1698
    if arr_par_file is not None:
9✔
1699
        warnings.warn(
×
1700
            "`arr_par_file` argument is deprecated and replaced "
1701
            "by arr_par. Method now support passing DataFrame as "
1702
            "arr_par arg.",
1703
            PyemuWarning,
1704
        )
1705
        arr_par = arr_par_file
×
1706
    if isinstance(arr_par, str):
9✔
1707
        df = pd.read_csv(arr_par, index_col=0)
1✔
1708
    elif isinstance(arr_par, pd.DataFrame):
9✔
1709
        df = arr_par
9✔
1710
    else:
1711
        raise TypeError(
×
1712
            "`arr_par` argument must be filename string or "
1713
            "Pandas DataFrame, "
1714
            "type {0} passed".format(type(arr_par))
1715
        )
1716
    # for fname in df.model_file:
1717
    #     try:
1718
    #         os.remove(fname)
1719
    #     except:
1720
    #         print("error removing mult array:{0}".format(fname))
1721

1722
    if "pp_file" in df.columns:
9✔
1723
        print("starting fac2real", datetime.now())
9✔
1724
        pp_df = df.loc[
9✔
1725
            df.pp_file.notna(),
1726
            [
1727
                "pp_file",
1728
                "fac_file",
1729
                "mlt_file",
1730
                "pp_fill_value",
1731
                "pp_lower_limit",
1732
                "pp_upper_limit",
1733
            ],
1734
        ].rename(
1735
            columns={
1736
                "fac_file": "factors_file",
1737
                "mlt_file": "out_file",
1738
                "pp_fill_value": "fill_value",
1739
                "pp_lower_limit": "lower_lim",
1740
                "pp_upper_limit": "upper_lim",
1741
            }
1742
        )
1743
        # don't need to process all (e.g. if const. mults apply across kper...)
1744
        pp_args = pp_df.drop_duplicates().to_dict("records")
9✔
1745
        num_ppargs = len(pp_args)
9✔
1746
        num_chunk_floor = num_ppargs // chunk_len
9✔
1747
        main_chunks = (
9✔
1748
            np.array(pp_args)[: num_chunk_floor * chunk_len]
1749
            .reshape([-1, chunk_len])
1750
            .tolist()
1751
        )
1752
        remainder = np.array(pp_args)[num_chunk_floor * chunk_len :].tolist()
9✔
1753
        chunks = main_chunks + [remainder]
9✔
1754
        print("number of chunks to process:", len(chunks))
9✔
1755
        if len(chunks) == 1:
9✔
1756
            _process_chunk_fac2real(chunks[0], 0)
9✔
1757
        else:
1758
            pool = mp.Pool(processes=min(mp.cpu_count(), len(chunks), 60))
8✔
1759
            x = [
8✔
1760
                pool.apply_async(_process_chunk_fac2real, args=(chunk, i))
1761
                for i, chunk in enumerate(chunks)
1762
            ]
1763
            [xx.get() for xx in x]
8✔
1764
            pool.close()
8✔
1765
            pool.join()
8✔
1766
        # procs = []
1767
        # for chunk in chunks:
1768
        #     p = mp.Process(target=_process_chunk_fac2real, args=[chunk])
1769
        #     p.start()
1770
        #     procs.append(p)
1771
        # for p in procs:
1772
        #     p.join()
1773

1774
        print("finished fac2real", datetime.now())
9✔
1775

1776
    print("starting arr mlt", datetime.now())
9✔
1777
    uniq = df.model_file.unique()  # unique model input files to be produced
9✔
1778
    num_uniq = len(uniq)  # number of input files to be produced
9✔
1779
    # number of files to send to each processor
1780
    # lazy plitting the files to be processed into even chunks
1781
    num_chunk_floor = num_uniq // chunk_len  # number of whole chunks
9✔
1782
    main_chunks = (
9✔
1783
        uniq[: num_chunk_floor * chunk_len].reshape([-1, chunk_len]).tolist()
1784
    )  # the list of files broken down into chunks
1785
    remainder = uniq[num_chunk_floor * chunk_len :].tolist()  # remaining files
9✔
1786
    chunks = main_chunks + [remainder]
9✔
1787
    print("number of chunks to process:", len(chunks))
9✔
1788
    if len(chunks) == 1:
9✔
1789
        _process_chunk_array_files(chunks[0], 0, df)
9✔
1790
    # procs = []
1791
    # for chunk in chunks:  # now only spawn processor for each chunk
1792
    #     p = mp.Process(target=_process_chunk_model_files, args=[chunk, df])
1793
    #     p.start()
1794
    #     procs.append(p)
1795
    # for p in procs:
1796
    #     r = p.get(False)
1797
    #     p.join()
1798
    else:
1799
        pool = mp.Pool(processes=min(mp.cpu_count(), len(chunks), 60))
8✔
1800
        x = [
8✔
1801
            pool.apply_async(_process_chunk_array_files, args=(chunk, i, df))
1802
            for i, chunk in enumerate(chunks)
1803
        ]
1804
        [xx.get() for xx in x]
8✔
1805
        pool.close()
8✔
1806
        pool.join()
8✔
1807
    print("finished arr mlt", datetime.now())
9✔
1808

1809

1810
def setup_temporal_diff_obs(*args, **kwargs):
9✔
1811
    """a helper function to setup difference-in-time observations based on an existing
1812
    set of observations in an instruction file using the observation grouping in the
1813
    control file
1814

1815
    Args:
1816
        pst (`pyemu.Pst`): existing control file
1817
        ins_file (`str`): an existing instruction file
1818
        out_file (`str`, optional): an existing model output file that corresponds to
1819
            the instruction file.  If None, `ins_file.replace(".ins","")` is used
1820
        include_zero_weight (`bool`, optional): flag to include zero-weighted observations
1821
            in the difference observation process.  Default is False so that only non-zero
1822
            weighted observations are used.
1823
        include_path (`bool`, optional): flag to setup the binary file processing in directory where the hds_file
1824
            is located (if different from where python is running).  This is useful for setting up
1825
            the process in separate directory for where python is running.
1826
        sort_by_name (`bool`,optional): flag to sort observation names in each group prior to setting up
1827
            the differencing.  The order of the observations matters for the differencing.  If False, then
1828
            the control file order is used.  If observation names have a datetime suffix, make sure the format is
1829
            year-month-day to use this sorting.  Default is True
1830
        long_names (`bool`, optional): flag to use long, descriptive names by concating the two observation names
1831
            that are being differenced.  This will produce names that are too long for tradtional PEST(_HP).
1832
            Default is True.
1833
        prefix (`str`, optional): prefix to prepend to observation names and group names.  Default is "dif".
1834

1835
    Returns:
1836
        tuple containing
1837

1838
        - **str**: the forward run command to execute the binary file process during model runs.
1839

1840
        - **pandas.DataFrame**: a dataframe of observation information for use in the pest control file
1841

1842
    Note:
1843
        This is the companion function of `helpers.apply_temporal_diff_obs()`.
1844

1845

1846

1847
    """
1848
    warnings.warn("This method (and companion method `apply_temporal_diff_obs()`"
×
1849
                  "are associated with the deprecated `PstFromFlopyModel` class. "
1850
                  "They are now untested.", DeprecationWarning)
1851
    from pyemu.legacy import setup_temporal_diff_obs
×
1852
    return setup_temporal_diff_obs(*args, **kwargs)
×
1853

1854

1855
def calc_array_par_summary_stats(arr_par_file="mult2model_info.csv"):
9✔
1856
    """read and generate summary statistics for the resulting model input arrays from
1857
    applying array par multipliers
1858

1859
    Args:
1860
        arr_par_file (`str`): the array multiplier key file
1861

1862
    Returns:
1863
        pd.DataFrame: dataframe of summary stats for each model_file entry
1864

1865
    Note:
1866
        This function uses an optional "zone_file" column in the `arr_par_file`. If multiple zones
1867
        files are used, then zone arrays are aggregated to a single array
1868

1869
        "dif" values are original array values minus model input array values
1870

1871
        The outputs from the function can be used to monitor model input array
1872
        changes that occur during PE/UQ analyses, especially when the parameters
1873
        are multiplier types and the dimensionality is very high.
1874

1875
        Consider using `PstFrom.add_observations()` to setup obs for the csv file
1876
        that this function writes.
1877

1878
    """
1879
    df = pd.read_csv(arr_par_file, index_col=0)
8✔
1880
    df = df.loc[df.index_cols.isna(), :].copy()
8✔
1881
    if df.shape[0] == 0:
8✔
1882
        return None
×
1883
    file_cols = df.columns.values[df.columns.str.contains("file")]
8✔
1884
    for file_col in file_cols:
8✔
1885
        df.loc[:, file_col] = df.loc[:, file_col].apply(
8✔
1886
            lambda x: os.path.join(*x.replace("\\", "/").split("/")) if isinstance(x, str) else x)
1887
    model_input_files = df.model_file.unique()
8✔
1888
    model_input_files.sort()
8✔
1889
    records = dict()
8✔
1890
    stat_dict = {
8✔
1891
        "mean": np.nanmean,
1892
        "stdev": np.nanstd,
1893
        "median": np.nanmedian,
1894
        "min": np.nanmin,
1895
        "max": np.nanmax,
1896
    }
1897
    quantiles = [0.05, 0.25, 0.75, 0.95]
8✔
1898
    for stat in stat_dict.keys():
8✔
1899
        records[stat] = []
8✔
1900
        records[stat + "_org"] = []
8✔
1901
        records[stat + "_dif"] = []
8✔
1902

1903
    for q in quantiles:
8✔
1904
        records["quantile_{0}".format(q)] = []
8✔
1905
        records["quantile_{0}_org".format(q)] = []
8✔
1906
        records["quantile_{0}_dif".format(q)] = []
8✔
1907
    records["upper_bound"] = []
8✔
1908
    records["lower_bound"] = []
8✔
1909
    records["upper_bound_org"] = []
8✔
1910
    records["lower_bound_org"] = []
8✔
1911
    records["upper_bound_dif"] = []
8✔
1912
    records["lower_bound_dif"] = []
8✔
1913

1914
    for model_input_file in model_input_files:
8✔
1915

1916
        arr = np.loadtxt(model_input_file)
8✔
1917
        org_file = df.loc[df.model_file == model_input_file, "org_file"].values
8✔
1918
        org_file = org_file[0]
8✔
1919
        org_arr = np.loadtxt(org_file)
8✔
1920
        if "zone_file" in df.columns:
8✔
1921
            zone_file = (
×
1922
                df.loc[df.model_file == model_input_file, "zone_file"].dropna().unique()
1923
            )
1924
            zone_arr = None
×
1925
            if len(zone_file) > 1:
×
1926
                zone_arr = np.zeros_like(arr)
×
1927
                for zf in zone_file:
×
1928
                    za = np.loadtxt(zf)
×
1929
                    zone_arr[za != 0] = 1
×
1930
            elif len(zone_file) == 1:
×
1931
                zone_arr = np.loadtxt(zone_file[0])
×
1932
            if zone_arr is not None:
×
1933
                arr[zone_arr == 0] = np.NaN
×
1934
                org_arr[zone_arr == 0] = np.NaN
×
1935

1936
        for stat, func in stat_dict.items():
8✔
1937
            v = func(arr)
8✔
1938
            records[stat].append(v)
8✔
1939
            ov = func(org_arr)
8✔
1940
            records[stat + "_org"].append(ov)
8✔
1941
            records[stat + "_dif"].append(ov - v)
8✔
1942
        for q in quantiles:
8✔
1943
            v = np.nanquantile(arr, q)
8✔
1944
            ov = np.nanquantile(org_arr, q)
8✔
1945
            records["quantile_{0}".format(q)].append(v)
8✔
1946
            records["quantile_{0}_org".format(q)].append(ov)
8✔
1947
            records["quantile_{0}_dif".format(q)].append(ov - v)
8✔
1948
        ub = df.loc[df.model_file == model_input_file, "upper_bound"].max()
8✔
1949
        lb = df.loc[df.model_file == model_input_file, "lower_bound"].min()
8✔
1950
        if pd.isna(ub):
8✔
1951
            records["upper_bound"].append(0)
×
1952
            records["upper_bound_org"].append(0)
×
1953
            records["upper_bound_dif"].append(0)
×
1954

1955
        else:
1956
            iarr = np.zeros_like(arr)
8✔
1957
            iarr[arr == ub] = 1
8✔
1958
            v = iarr.sum()
8✔
1959
            iarr = np.zeros_like(arr)
8✔
1960
            iarr[org_arr == ub] = 1
8✔
1961
            ov = iarr.sum()
8✔
1962
            records["upper_bound"].append(v)
8✔
1963
            records["upper_bound_org"].append(ov)
8✔
1964
            records["upper_bound_dif"].append(ov - v)
8✔
1965

1966
        if pd.isna(lb):
8✔
1967
            records["lower_bound"].append(0)
×
1968
            records["lower_bound_org"].append(0)
×
1969
            records["lower_bound_dif"].append(0)
×
1970

1971
        else:
1972
            iarr = np.zeros_like(arr)
8✔
1973
            iarr[arr == lb] = 1
8✔
1974
            v = iarr.sum()
8✔
1975
            iarr = np.zeros_like(arr)
8✔
1976
            iarr[org_arr == lb] = 1
8✔
1977
            ov = iarr.sum()
8✔
1978
            records["lower_bound"].append(v)
8✔
1979
            records["lower_bound_org"].append(ov)
8✔
1980
            records["lower_bound_dif"].append(ov - v)
8✔
1981

1982
    # scrub model input files
1983
    model_input_files = [
8✔
1984
        f.replace(".", "_").replace("\\", "_").replace("/", "_")
1985
        for f in model_input_files
1986
    ]
1987
    df = pd.DataFrame(records, index=model_input_files)
8✔
1988
    df.index.name = "model_file"
8✔
1989
    df.to_csv("arr_par_summary.csv")
8✔
1990
    return df
8✔
1991

1992

1993
def apply_genericlist_pars(df, chunk_len=50):
9✔
1994
    """a function to apply list style mult parameters
1995

1996
    Args:
1997
        df (pandas.DataFrame): DataFrame that relates files containing
1998
            multipliers to model input file names. Required columns include:
1999
            {"model_file": file name of resulatant model input file,
2000
            "org_file": file name of original file that multipliers act on,
2001
            "fmt": format specifier for model input file (currently on 'free' supported),
2002
            "sep": separator for model input file if 'free' formatted,
2003
            "head_rows": Number of header rows to transfer from orig file to model file,
2004
            "index_cols": list of columns (either indexes or strings) to be used to align mults, orig and model files,
2005
            "use_cols": columns to mults act on,
2006
            "upper_bound": ultimate upper bound for model input file parameter,
2007
            "lower_bound": ultimate lower bound for model input file parameter}
2008
        chunk_len (`int`): number of chunks for each multiprocessing instance to handle.
2009
            Default is 50.
2010

2011
    Note:
2012
        This function is called programmatically during the `PstFrom` forward run process
2013

2014

2015
    """
2016
    print("starting list mlt", datetime.now())
9✔
2017
    uniq = df.model_file.unique()  # unique model input files to be produced
9✔
2018
    num_uniq = len(uniq)  # number of input files to be produced
9✔
2019
    # number of files to send to each processor
2020
    # lazy plitting the files to be processed into even chunks
2021
    num_chunk_floor = num_uniq // chunk_len  # number of whole chunks
9✔
2022
    main_chunks = (
9✔
2023
        uniq[: num_chunk_floor * chunk_len].reshape([-1, chunk_len]).tolist()
2024
    )  # the list of files broken down into chunks
2025
    remainder = uniq[num_chunk_floor * chunk_len :].tolist()  # remaining files
9✔
2026
    chunks = main_chunks + [remainder]
9✔
2027
    print("number of chunks to process:", len(chunks))
9✔
2028
    if len(chunks) == 1:
9✔
2029
        _process_chunk_list_files(chunks[0], 0, df)
9✔
2030
    else:
2031
        pool = mp.Pool(processes=min(mp.cpu_count(), len(chunks), 60))
8✔
2032
        x = [
8✔
2033
            pool.apply_async(_process_chunk_list_files, args=(chunk, i, df))
2034
            for i, chunk in enumerate(chunks)
2035
        ]
2036
        [xx.get() for xx in x]
8✔
2037
        pool.close()
8✔
2038
        pool.join()
8✔
2039
    print("finished list mlt", datetime.now())
9✔
2040

2041

2042
def _process_chunk_list_files(chunk, i, df):
9✔
2043
    for model_file in chunk:
9✔
2044
        try:
9✔
2045
            _process_list_file(model_file, df)
9✔
2046
        except Exception as e:
×
2047
            f"{e}: Issue processing model file {model_file}"
2048
            raise e
×
2049
    print("process", i, " processed ", len(chunk), "process_list_file calls")
9✔
2050

2051

2052
def _list_index_caster(x, add1):
9✔
2053
        vals = []
9✔
2054
        for xx in x:
9✔
2055
            if xx:
9✔
2056
                if (xx.strip().isdigit() or
9✔
2057
                        (xx.strip()[0] == '-' and xx.strip()[1:].isdigit())):
2058
                    vals.append(add1 + int(xx))
9✔
2059
                else:
2060
                    try:
8✔
2061
                        vals.append(float(xx))
8✔
2062
                    except Exception as e:
8✔
2063
                        vals.append(xx.strip().strip("'\" "))
8✔
2064

2065
        return tuple(vals)
9✔
2066

2067

2068
def _list_index_splitter_and_caster(x, add1):
9✔
2069
    return _list_index_caster(x.strip("()").replace('\'', '').split(","), add1)
9✔
2070

2071

2072
def _process_list_file(model_file, df):
9✔
2073
    # print("processing model file:", model_file)
2074
    df_mf = df.loc[df.model_file == model_file, :].copy()
9✔
2075
    # read data stored in org (mults act on this)
2076
    org_file = df_mf.org_file.unique()
9✔
2077
    if org_file.shape[0] != 1:
9✔
2078
        raise Exception("wrong number of org_files for {0}".format(model_file))
×
2079
    org_file = org_file[0]
9✔
2080
    # print("org file:", org_file)
2081
    notfree = df_mf.fmt[df_mf.fmt != "free"]
9✔
2082
    if len(notfree) > 1:
9✔
2083
        raise Exception(
×
2084
            "too many different format specifiers for "
2085
            "model file: {0}".format(model_file)
2086
        )
2087
    elif len(notfree) == 1:
9✔
2088
        fmt = notfree.values[0]
8✔
2089
    else:
2090
        fmt = df_mf.fmt.values[-1]
9✔
2091
    if fmt == "free":
9✔
2092
        if df_mf.sep.dropna().nunique() > 1:
9✔
2093
            raise Exception(
×
2094
                "too many different sep specifiers for "
2095
                "model file: {0}".format(model_file)
2096
            )
2097
        else:
2098
            sep = df_mf.sep.dropna().values[-1]
9✔
2099
    else:
2100
        sep = None
8✔
2101
    datastrtrow = df_mf.head_rows.values[-1]
9✔
2102
    if fmt.lower() == "free" and sep == " ":
9✔
2103
        delim_whitespace = True
9✔
2104
    if datastrtrow > 0:
9✔
2105
        with open(org_file, "r") as fp:
8✔
2106
            storehead = [next(fp) for _ in range(datastrtrow)]
8✔
2107
    else:
2108
        storehead = []
9✔
2109
    # work out if headers are used for index_cols
2110
    # big assumption here that int type index cols will not be written as headers
2111
    index_col_eg = df_mf.index_cols.iloc[-1][0]
9✔
2112
    if isinstance(index_col_eg, str):
9✔
2113
        # TODO: add test for model file with headers
2114
        # index_cols can be from header str
2115
        header = 0
8✔
2116
        hheader = True
8✔
2117
    elif isinstance(index_col_eg, (int, np.integer)):
9✔
2118
        # index_cols are column numbers in input file
2119
        header = None
9✔
2120
        hheader = None
9✔
2121
        # actually do need index cols to be list of strings
2122
        # to be compatible when the saved original file is read in.
2123
        df_mf.loc[:, "index_cols"] = df_mf.index_cols.apply(
9✔
2124
            lambda x: [str(i) for i in x]
2125
        )
2126

2127
    # if writen by PstFrom this should always be comma delim - tidy
2128
    org_data = pd.read_csv(org_file, skiprows=datastrtrow,
9✔
2129
                           header=header, dtype='object')
2130
    # mult columns will be string type, so to make sure they align
2131
    org_data.columns = org_data.columns.astype(str)
9✔
2132
    # print("org_data columns:", org_data.columns)
2133
    # print("org_data shape:", org_data.shape)
2134
    new_df = org_data.copy()
9✔
2135

2136
    for mlt in df_mf.itertuples():
9✔
2137
        new_df.loc[:, mlt.index_cols] = new_df.loc[:, mlt.index_cols].apply(
9✔
2138
            pd.to_numeric, errors='ignore', downcast='integer')
2139
        try:
9✔
2140
            new_df = new_df.reset_index().rename(
9✔
2141
                columns={"index": "oidx"}
2142
            ).set_index(
2143
                mlt.index_cols
2144
            )
2145
            new_df = new_df.sort_index()
9✔
2146
        except Exception as e:
×
2147
            print(
×
2148
                "error setting mlt index_cols: ",
2149
                str(mlt.index_cols),
2150
                " for new_df with cols: ",
2151
                list(new_df.columns),
2152
            )
2153
            raise Exception("error setting mlt index_cols: " + str(e))
×
2154

2155
        if not hasattr(mlt, "mlt_file") or pd.isna(mlt.mlt_file):
9✔
2156
            print("null mlt file for org_file '" + org_file + "', continuing...")
8✔
2157
        else:
2158
            mlts = pd.read_csv(mlt.mlt_file)
9✔
2159
            # get mult index to align with org_data,
2160
            # get mult index to align with org_data,
2161
            # mult idxs will always be written zero based if int
2162
            # if original model files is not zero based need to add 1
2163
            add1 = int(mlt.zero_based == False)
9✔
2164

2165
            mlts.index = pd.MultiIndex.from_tuples(
9✔
2166
                mlts.sidx.apply(
2167
                    lambda x: _list_index_splitter_and_caster(x,add1)
2168
                ),
2169
                names=mlt.index_cols,
2170
            )
2171
            if mlts.index.nlevels < 2:  # just in case only one index col is used
9✔
2172
                mlts.index = mlts.index.get_level_values(0)
8✔
2173
            common_idx = (
9✔
2174
                new_df.index.intersection(mlts.index).sort_values().drop_duplicates()
2175
            )
2176
            mlt_cols = [str(col) for col in mlt.use_cols]
9✔
2177
            assert len(common_idx) == mlt.chkpar, (
9✔
2178
                "Probable miss-alignment in tpl indices and original file:\n"
2179
                f"mult idx[:10] : {mlts.index.sort_values().tolist()[:10]}\n"
2180
                f"org file idx[:10]: {new_df.index.sort_values().to_list()[:10]}\n"
2181
                f"n common: {len(common_idx)}, n cols: {len(mlt_cols)}, "
2182
                f"expected: {mlt.chkpar}."
2183
            )
2184
            operator = mlt.operator
9✔
2185
            if operator == "*" or operator.lower()[0] == "m":
9✔
2186
                new_df.loc[common_idx, mlt_cols] = \
9✔
2187
                    new_df.loc[common_idx, mlt_cols].apply(
2188
                        pd.to_numeric) * mlts.loc[common_idx, mlt_cols]
2189
            elif operator == "+" or operator.lower()[0] == "a":
8✔
2190
                new_df.loc[common_idx, mlt_cols] = \
8✔
2191
                    new_df.loc[common_idx, mlt_cols].apply(
2192
                        pd.to_numeric) + mlts.loc[common_idx, mlt_cols]
2193
            else:
2194
                raise Exception(
×
2195
                    "unsupported operator '{0}' for mlt file '{1}'".format(
2196
                        operator, mlt.mlt_file
2197
                    )
2198
                )
2199
        # bring mult index back to columns AND re-order
2200
        new_df = new_df.reset_index().set_index("oidx")[org_data.columns].sort_index()
9✔
2201
    if "upper_bound" in df.columns:
9✔
2202
        ub = df_mf.apply(
9✔
2203
            lambda x: pd.Series({str(c): b for c, b in zip(x.use_cols, x.upper_bound)}),
2204
            axis=1,
2205
        ).max()
2206
        if ub.notnull().any():
9✔
2207
            for col, val in ub.items():
9✔
2208
                numeric = new_df.loc[new_df[col].apply(np.isreal)]
9✔
2209
                sel = numeric.loc[numeric[col] > val].index
9✔
2210
                new_df.loc[sel, col] = val
9✔
2211
    if "lower_bound" in df.columns:
9✔
2212
        lb = df_mf.apply(
9✔
2213
            lambda x: pd.Series({str(c): b for c, b in zip(x.use_cols, x.lower_bound)}),
2214
            axis=1,
2215
        ).min()
2216
        if lb.notnull().any():
9✔
2217
            for col, val in lb.items():
9✔
2218
                numeric = new_df.loc[new_df[col].apply(np.isreal)]
9✔
2219
                sel = numeric.loc[numeric[col] < val].index
9✔
2220
                new_df.loc[sel, col] = val
9✔
2221
    with open(model_file, "w") as fo:
9✔
2222
        kwargs = {}
9✔
2223
        if "win" in platform.platform().lower():
9✔
2224
            kwargs = {"lineterminator": "\n"}
4✔
2225
        if len(storehead) != 0:
9✔
2226
            fo.write("\n".join(storehead))
8✔
2227
            fo.flush()
8✔
2228
        if fmt.lower() == "free":
9✔
2229
            new_df.to_csv(fo, index=False, mode="a", sep=sep, header=hheader, **kwargs)
9✔
2230
        else:
2231
            np.savetxt(
8✔
2232
                fo,
2233
                np.atleast_2d(new_df.apply(pd.to_numeric, errors="ignore").values),
2234
                fmt=fmt
2235
            )
2236

2237

2238
def build_jac_test_csv(pst, num_steps, par_names=None, forward=True):
9✔
2239
    """build a dataframe of jactest inputs for use with pestpp-swp
2240

2241
    Args:
2242
        pst (`pyemu.Pst`): existing control file
2243
        num_steps (`int`): number of pertubation steps for each parameter
2244
        par_names [`str`]: list of parameter names of pars to test.
2245
            If None, all adjustable pars are used. Default is None
2246
        forward (`bool`): flag to start with forward pertubations.
2247
            Default is True
2248

2249
    Returns:
2250
        `pandas.DataFrame`: the sequence of model runs to evaluate
2251
        for the jactesting.
2252

2253

2254
    """
2255
    if isinstance(pst, str):
8✔
2256
        pst = pyemu.Pst(pst)
×
2257
    # pst.add_transform_columns()
2258
    pst.build_increments()
8✔
2259
    incr = pst.parameter_data.increment.to_dict()
8✔
2260
    irow = 0
8✔
2261
    par = pst.parameter_data
8✔
2262
    if par_names is None:
8✔
2263
        par_names = pst.adj_par_names
8✔
2264
    total_runs = num_steps * len(par_names) + 1
8✔
2265
    idx = ["base"]
8✔
2266
    for par_name in par_names:
8✔
2267
        idx.extend(["{0}_{1}".format(par_name, i) for i in range(num_steps)])
8✔
2268
    df = pd.DataFrame(index=idx, columns=pst.par_names)
8✔
2269
    li = par.partrans == "log"
8✔
2270
    lbnd = par.parlbnd.copy()
8✔
2271
    ubnd = par.parubnd.copy()
8✔
2272
    lbnd.loc[li] = lbnd.loc[li].apply(np.log10)
8✔
2273
    ubnd.loc[li] = ubnd.loc[li].apply(np.log10)
8✔
2274
    lbnd = lbnd.to_dict()
8✔
2275
    ubnd = ubnd.to_dict()
8✔
2276

2277
    org_vals = par.parval1.copy()
8✔
2278
    org_vals.loc[li] = org_vals.loc[li].apply(np.log10)
8✔
2279
    if forward:
8✔
2280
        sign = 1.0
8✔
2281
    else:
2282
        sign = -1.0
8✔
2283

2284
    # base case goes in as first row, no perturbations
2285
    df.loc["base", pst.par_names] = par.parval1.copy()
8✔
2286
    irow = 1
8✔
2287
    full_names = ["base"]
8✔
2288
    for jcol, par_name in enumerate(par_names):
8✔
2289
        org_val = org_vals.loc[par_name]
8✔
2290
        last_val = org_val
8✔
2291
        for step in range(num_steps):
8✔
2292
            vals = org_vals.copy()
8✔
2293
            i = incr[par_name]
8✔
2294

2295
            val = last_val + (sign * incr[par_name])
8✔
2296
            if val > ubnd[par_name]:
8✔
2297
                sign = -1.0
8✔
2298
                val = org_val + (sign * incr[par_name])
8✔
2299
                if val < lbnd[par_name]:
8✔
2300
                    raise Exception("parameter {0} went out of bounds".format(par_name))
×
2301
            elif val < lbnd[par_name]:
8✔
2302
                sign = 1.0
×
2303
                val = org_val + (sign * incr[par_name])
×
2304
                if val > ubnd[par_name]:
×
2305
                    raise Exception("parameter {0} went out of bounds".format(par_name))
×
2306

2307
            vals.loc[par_name] = val
8✔
2308
            vals.loc[li] = 10 ** vals.loc[li]
8✔
2309
            df.loc[idx[irow], pst.par_names] = vals
8✔
2310
            full_names.append(
8✔
2311
                "{0}_{1:<15.6E}".format(par_name, vals.loc[par_name]).strip()
2312
            )
2313

2314
            irow += 1
8✔
2315
            last_val = val
8✔
2316
    df.index = full_names
8✔
2317
    return df
8✔
2318

2319

2320
def _write_df_tpl(filename, df, sep=",", tpl_marker="~", headerlines=None, **kwargs):
9✔
2321
    """function write a pandas dataframe to a template file."""
2322
    if "lineterminator" not in kwargs:
9✔
2323
        if "win" in platform.platform().lower():
9✔
2324
            kwargs["lineterminator"] = "\n"
4✔
2325
    with open(filename, "w") as f:
9✔
2326
        f.write("ptf {0}\n".format(tpl_marker))
9✔
2327
        f.flush()
9✔
2328
        if headerlines is not None:
9✔
2329
            _add_headerlines(f, headerlines)
8✔
2330
        df.to_csv(f, sep=sep, mode="a", **kwargs)
9✔
2331

2332

2333
def _add_headerlines(f, headerlines):
9✔
2334
    lc = 0
8✔
2335
    for key in sorted(headerlines.keys()):
8✔
2336
        if key > lc:
8✔
2337
            lc += 1
×
2338
            continue
×
2339
            # TODO if we want to preserve mid-table comments,
2340
            #  these lines might help - will also need to
2341
            #  pass comment_char through so it can be
2342
            #  used by the apply methods
2343
            # to = key - lc
2344
            # df.iloc[fr:to].to_csv(
2345
            #     fp, sep=',', mode='a', header=hheader, # todo - presence of header may cause an issue with this
2346
            #     **kwargs)
2347
            # lc += to - fr
2348
            # fr = to
2349
        f.write(headerlines[key])
8✔
2350
        f.flush()
8✔
2351
        lc += 1
8✔
2352

2353

2354
def setup_fake_forward_run(
9✔
2355
    pst, new_pst_name, org_cwd=".", bak_suffix="._bak", new_cwd="."
2356
):
2357
    """setup a fake forward run for a pst.
2358

2359
    Args:
2360
        pst (`pyemu.Pst`): existing control file
2361
        new_pst_name (`str`): new control file to write
2362
        org_cwd (`str`): existing working dir.  Default is "."
2363
        bak_suffix (`str`, optional): suffix to add to existing
2364
            model output files when making backup copies.
2365
        new_cwd (`str`): new working dir.  Default is ".".
2366

2367
    Note:
2368
        The fake forward run simply copies existing backup versions of
2369
        model output files to the outfiles pest(pp) is looking
2370
        for.  This is really a development option for debugging
2371
        PEST++ issues.
2372

2373
    """
2374

2375
    if new_cwd != org_cwd and not os.path.exists(new_cwd):
8✔
2376
        os.mkdir(new_cwd)
×
2377
    pairs = {}
8✔
2378

2379
    for output_file in pst.output_files:
8✔
2380
        org_pth = os.path.join(org_cwd, output_file)
8✔
2381
        new_pth = os.path.join(new_cwd, os.path.split(output_file)[-1])
8✔
2382
        assert os.path.exists(org_pth), org_pth
8✔
2383
        shutil.copy2(org_pth, new_pth + bak_suffix)
8✔
2384
        pairs[output_file] = os.path.split(output_file)[-1] + bak_suffix
8✔
2385

2386
    if new_cwd != org_cwd:
8✔
2387
        for files in [pst.template_files, pst.instruction_files]:
×
2388
            for f in files:
×
2389
                raw = os.path.split(f)
×
2390
                if len(raw[0]) == 0:
×
2391
                    raw = raw[1:]
×
2392
                if len(raw) > 1:
×
2393
                    pth = os.path.join(*raw[:-1])
×
2394
                    pth = os.path.join(new_cwd, pth)
×
2395
                    if not os.path.exists(pth):
×
2396
                        os.makedirs(pth)
×
2397

2398
                org_pth = os.path.join(org_cwd, f)
×
2399
                new_pth = os.path.join(new_cwd, f)
×
2400
                assert os.path.exists(org_pth), org_pth
×
2401
                shutil.copy2(org_pth, new_pth)
×
2402
        for f in pst.input_files:
×
2403
            raw = os.path.split(f)
×
2404
            if len(raw[0]) == 0:
×
2405
                raw = raw[1:]
×
2406
            if len(raw) > 1:
×
2407
                pth = os.path.join(*raw[:-1])
×
2408
                pth = os.path.join(new_cwd, pth)
×
2409
                if not os.path.exists(pth):
×
2410
                    os.makedirs(pth)
×
2411

2412
        for key, f in pst.pestpp_options.items():
×
2413
            if not isinstance(f, str):
×
2414
                continue
×
2415
            raw = os.path.split(f)
×
2416
            if len(raw[0]) == 0:
×
2417
                raw = raw[1:]
×
2418
            if len(raw) > 1:
×
2419
                pth = os.path.join(*raw[:-1])
×
2420
                pth = os.path.join(new_cwd, pth)
×
2421
                if not os.path.exists(pth):
×
2422
                    os.makedirs(pth)
×
2423
            org_pth = os.path.join(org_cwd, f)
×
2424
            new_pth = os.path.join(new_cwd, f)
×
2425

2426
            if os.path.exists(org_pth):
×
2427
                shutil.copy2(org_pth, new_pth)
×
2428

2429
    with open(os.path.join(new_cwd, "fake_forward_run.py"), "w") as f:
8✔
2430
        f.write("import os\nimport shutil\n")
8✔
2431
        for org, bak in pairs.items():
8✔
2432
            f.write("shutil.copy2(r'{0}',r'{1}')\n".format(bak, org))
8✔
2433
    pst.model_command = "python fake_forward_run.py"
8✔
2434
    pst.write(os.path.join(new_cwd, new_pst_name))
8✔
2435

2436
    return pst
8✔
2437

2438

2439
# web address of spatial reference dot org
2440
srefhttp = "https://spatialreference.org"
9✔
2441

2442

2443
class SpatialReference(object):
9✔
2444
    """
9✔
2445
    a class to locate a structured model grid in x-y space.
2446
    Lifted wholesale from Flopy, and preserved here...
2447
    ...maybe slighlty over-engineered for here
2448

2449
    Args:
2450

2451
        delr (`numpy ndarray`): the model discretization delr vector (An array of spacings along a row)
2452
        delc (`numpy ndarray`): the model discretization delc vector (An array of spacings along a column)
2453
        lenuni (`int`): the length units flag from the discretization package. Default is 2.
2454
        xul (`float`): The x coordinate of the upper left corner of the grid. Enter either xul and yul or xll and yll.
2455
        yul (`float`): The y coordinate of the upper left corner of the grid. Enter either xul and yul or xll and yll.
2456
        xll (`float`): The x coordinate of the lower left corner of the grid. Enter either xul and yul or xll and yll.
2457
        yll (`float`): The y coordinate of the lower left corner of the grid. Enter either xul and yul or xll and yll.
2458
        rotation (`float`): The counter-clockwise rotation (in degrees) of the grid
2459
        proj4_str (`str`): a PROJ4 string that identifies the grid in space. warning: case sensitive!
2460
        units (`string`): Units for the grid.  Must be either "feet" or "meters"
2461
        epsg (`int`): EPSG code that identifies the grid in space. Can be used in lieu of
2462
            proj4. PROJ4 attribute will auto-populate if there is an internet
2463
            connection(via get_proj4 method).
2464
            See https://www.epsg-registry.org/ or spatialreference.org
2465
        length_multiplier (`float`): multiplier to convert model units to spatial reference units.
2466
            delr and delc above will be multiplied by this value. (default=1.)
2467

2468

2469
    """
2470

2471
    xul, yul = None, None
9✔
2472
    xll, yll = None, None
9✔
2473
    rotation = 0.0
9✔
2474
    length_multiplier = 1.0
9✔
2475
    origin_loc = "ul"  # or ll
9✔
2476

2477
    defaults = {
9✔
2478
        "xul": None,
2479
        "yul": None,
2480
        "rotation": 0.0,
2481
        "proj4_str": None,
2482
        "units": None,
2483
        "lenuni": 2,
2484
        "length_multiplier": None,
2485
        "source": "defaults",
2486
    }
2487

2488
    lenuni_values = {"undefined": 0, "feet": 1, "meters": 2, "centimeters": 3}
9✔
2489
    lenuni_text = {v: k for k, v in lenuni_values.items()}
9✔
2490

2491
    def __init__(
9✔
2492
        self,
2493
        delr=np.array([]),
2494
        delc=np.array([]),
2495
        lenuni=2,
2496
        xul=None,
2497
        yul=None,
2498
        xll=None,
2499
        yll=None,
2500
        rotation=0.0,
2501
        proj4_str=None,
2502
        epsg=None,
2503
        prj=None,
2504
        units=None,
2505
        length_multiplier=None,
2506
        source=None,
2507
    ):
2508

2509
        for delrc in [delr, delc]:
9✔
2510
            if isinstance(delrc, float) or isinstance(delrc, int):
9✔
2511
                msg = (
×
2512
                    "delr and delcs must be an array or sequences equal in "
2513
                    "length to the number of rows/columns."
2514
                )
2515
                raise TypeError(msg)
×
2516

2517
        self.delc = np.atleast_1d(np.array(delc)).astype(
9✔
2518
            np.float64
2519
        )  # * length_multiplier
2520
        self.delr = np.atleast_1d(np.array(delr)).astype(
9✔
2521
            np.float64
2522
        )  # * length_multiplier
2523

2524
        if self.delr.sum() == 0 or self.delc.sum() == 0:
9✔
2525
            if xll is None or yll is None:
8✔
2526
                msg = (
8✔
2527
                    "Warning: no grid spacing. "
2528
                    "Lower-left corner offset calculation methods requires "
2529
                    "arguments for delr and delc. Origin will be set to "
2530
                    "upper-left"
2531
                )
2532
                warnings.warn(msg, PyemuWarning)
8✔
2533
                xll, yll = None, None
8✔
2534
                # xul, yul = None, None
2535

2536
        self._lenuni = lenuni
9✔
2537
        self._proj4_str = proj4_str
9✔
2538
        #
2539
        self._epsg = epsg
9✔
2540
        # if epsg is not None:
2541
        #     self._proj4_str = getproj4(self._epsg)
2542
        # self.prj = prj
2543
        # self._wkt = None
2544
        # self.crs = CRS(prj=prj, epsg=epsg)
2545

2546
        self.supported_units = ["feet", "meters"]
9✔
2547
        self._units = units
9✔
2548
        self._length_multiplier = length_multiplier
9✔
2549
        self._reset()
9✔
2550
        self.set_spatialreference(xul, yul, xll, yll, rotation)
9✔
2551
        self.grid_type = "structured"
9✔
2552

2553
    @property
9✔
2554
    def ncpl(self):
9✔
2555
        raise Exception("unstructured grids not supported by SpatialReference")
×
2556

2557
    @property
9✔
2558
    def xll(self):
9✔
2559
        if self.origin_loc == "ll":
9✔
2560
            xll = self._xll if self._xll is not None else 0.0
×
2561
        elif self.origin_loc == "ul":
9✔
2562
            # calculate coords for lower left corner
2563
            xll = self._xul - (
9✔
2564
                np.sin(self.theta) * self.yedge[0] * self.length_multiplier
2565
            )
2566
        return xll
9✔
2567

2568
    @property
9✔
2569
    def yll(self):
9✔
2570
        if self.origin_loc == "ll":
9✔
2571
            yll = self._yll if self._yll is not None else 0.0
×
2572
        elif self.origin_loc == "ul":
9✔
2573
            # calculate coords for lower left corner
2574
            yll = self._yul - (
9✔
2575
                np.cos(self.theta) * self.yedge[0] * self.length_multiplier
2576
            )
2577
        return yll
9✔
2578

2579
    @property
9✔
2580
    def xul(self):
9✔
2581
        if self.origin_loc == "ll":
1✔
2582
            # calculate coords for upper left corner
2583
            xul = self._xll + (
×
2584
                np.sin(self.theta) * self.yedge[0] * self.length_multiplier
2585
            )
2586
        if self.origin_loc == "ul":
1✔
2587
            # calculate coords for lower left corner
2588
            xul = self._xul if self._xul is not None else 0.0
1✔
2589
        return xul
1✔
2590

2591
    @property
9✔
2592
    def yul(self):
9✔
2593
        if self.origin_loc == "ll":
1✔
2594
            # calculate coords for upper left corner
2595
            yul = self._yll + (
×
2596
                np.cos(self.theta) * self.yedge[0] * self.length_multiplier
2597
            )
2598

2599
        if self.origin_loc == "ul":
1✔
2600
            # calculate coords for lower left corner
2601
            yul = self._yul if self._yul is not None else 0.0
1✔
2602
        return yul
1✔
2603

2604
    @property
9✔
2605
    def proj4_str(self):
9✔
2606
        proj4_str = None
9✔
2607
        if self._proj4_str is not None:
9✔
2608
            if "epsg" in self._proj4_str.lower():
9✔
2609
                if "init" not in self._proj4_str.lower():
×
2610
                    proj4_str = "+init=" + self._proj4_str
×
2611
                else:
2612
                    proj4_str = self._proj4_str
×
2613
                # set the epsg if proj4 specifies it
2614
                tmp = [i for i in self._proj4_str.split() if "epsg" in i.lower()]
×
2615
                self._epsg = int(tmp[0].split(":")[1])
×
2616
            else:
2617
                proj4_str = self._proj4_str
9✔
2618
        elif self.epsg is not None:
9✔
2619
            proj4_str = "+init=epsg:{}".format(self.epsg)
×
2620
        return proj4_str
9✔
2621

2622
    @property
9✔
2623
    def epsg(self):
9✔
2624
        # don't reset the proj4 string here
2625
        # because proj4 attribute may already be populated
2626
        # (with more details than getprj would return)
2627
        # instead reset proj4 when epsg is set
2628
        # (on init or setattr)
2629
        return self._epsg
9✔
2630

2631
    # @property
2632
    # def wkt(self):
2633
    #     if self._wkt is None:
2634
    #         if self.prj is not None:
2635
    #             with open(self.prj) as src:
2636
    #                 wkt = src.read()
2637
    #         elif self.epsg is not None:
2638
    #             wkt = getprj(self.epsg)
2639
    #         else:
2640
    #             return None
2641
    #         return wkt
2642
    #     else:
2643
    #         return self._wkt
2644

2645
    @property
9✔
2646
    def lenuni(self):
9✔
2647
        return self._lenuni
9✔
2648

2649
    def _parse_units_from_proj4(self):
9✔
2650
        units = None
9✔
2651
        try:
9✔
2652
            # need this because preserve_units doesn't seem to be
2653
            # working for complex proj4 strings.  So if an
2654
            # epsg code was passed, we have no choice, but if a
2655
            # proj4 string was passed, we can just parse it
2656
            proj_str = self.proj4_str
9✔
2657
            # if "EPSG" in self.proj4_str.upper():
2658
            #     import pyproj
2659
            #
2660
            #     crs = pyproj.Proj(self.proj4_str,
2661
            #                       preserve_units=True,
2662
            #                       errcheck=True)
2663
            #     proj_str = crs.srs
2664
            # else:
2665
            #     proj_str = self.proj4_str
2666
            # http://proj4.org/parameters.html#units
2667
            # from proj4 source code
2668
            # "us-ft", "0.304800609601219", "U.S. Surveyor's Foot",
2669
            # "ft", "0.3048", "International Foot",
2670
            if "units=m" in proj_str:
9✔
2671
                units = "meters"
9✔
2672
            elif (
×
2673
                "units=ft" in proj_str
2674
                or "units=us-ft" in proj_str
2675
                or "to_meters:0.3048" in proj_str
2676
            ):
2677
                units = "feet"
×
2678
            return units
9✔
2679
        except:
9✔
2680
            if self.proj4_str is not None:
9✔
2681
                print("   could not parse units from {}".format(self.proj4_str))
×
2682

2683
    @property
9✔
2684
    def units(self):
9✔
2685
        if self._units is not None:
9✔
2686
            units = self._units.lower()
9✔
2687
        else:
2688
            units = self._parse_units_from_proj4()
9✔
2689
        if units is None:
9✔
2690
            # print("warning: assuming SpatialReference units are meters")
2691
            units = "meters"
9✔
2692
        assert units in self.supported_units
9✔
2693
        return units
9✔
2694

2695
    @property
9✔
2696
    def length_multiplier(self):
9✔
2697
        """
2698
        Attempt to identify multiplier for converting from
2699
        model units to sr units, defaulting to 1.
2700
        """
2701
        lm = None
9✔
2702
        if self._length_multiplier is not None:
9✔
2703
            lm = self._length_multiplier
×
2704
        else:
2705
            if self.model_length_units == "feet":
9✔
2706
                if self.units == "meters":
×
2707
                    lm = 0.3048
×
2708
                elif self.units == "feet":
×
2709
                    lm = 1.0
×
2710
            elif self.model_length_units == "meters":
9✔
2711
                if self.units == "feet":
9✔
2712
                    lm = 1 / 0.3048
×
2713
                elif self.units == "meters":
9✔
2714
                    lm = 1.0
9✔
2715
            elif self.model_length_units == "centimeters":
8✔
2716
                if self.units == "meters":
×
2717
                    lm = 1 / 100.0
×
2718
                elif self.units == "feet":
×
2719
                    lm = 1 / 30.48
×
2720
            else:  # model units unspecified; default to 1
2721
                lm = 1.0
8✔
2722
        return lm
9✔
2723

2724
    @property
9✔
2725
    def model_length_units(self):
9✔
2726
        return self.lenuni_text[self.lenuni]
9✔
2727

2728
    @property
9✔
2729
    def bounds(self):
9✔
2730
        """
2731
        Return bounding box in shapely order.
2732
        """
2733
        xmin, xmax, ymin, ymax = self.get_extent()
×
2734
        return xmin, ymin, xmax, ymax
×
2735

2736
    @staticmethod
9✔
2737
    def load(namefile=None, reffile="usgs.model.reference"):
9✔
2738
        """
2739
        Attempts to load spatial reference information from
2740
        the following files (in order):
2741
        1) usgs.model.reference
2742
        2) NAM file (header comment)
2743
        3) SpatialReference.default dictionary
2744
        """
2745
        reffile = os.path.join(os.path.split(namefile)[0], reffile)
×
2746
        d = SpatialReference.read_usgs_model_reference_file(reffile)
×
2747
        if d is not None:
×
2748
            return d
×
2749
        d = SpatialReference.attribs_from_namfile_header(namefile)
×
2750
        if d is not None:
×
2751
            return d
×
2752
        else:
2753
            return SpatialReference.defaults
×
2754

2755
    @staticmethod
9✔
2756
    def attribs_from_namfile_header(namefile):
9✔
2757
        # check for reference info in the nam file header
2758
        d = SpatialReference.defaults.copy()
9✔
2759
        d["source"] = "namfile"
9✔
2760
        if namefile is None:
9✔
2761
            return None
×
2762
        header = []
9✔
2763
        with open(namefile, "r") as f:
9✔
2764
            for line in f:
9✔
2765
                if not line.startswith("#"):
9✔
2766
                    break
9✔
2767
                header.extend(
9✔
2768
                    line.strip().replace("#", "").replace(",", ";").split(";")
2769
                )
2770

2771
        for item in header:
9✔
2772
            if "xul" in item.lower():
9✔
2773
                try:
9✔
2774
                    d["xul"] = float(item.split(":")[1])
9✔
2775
                except:
×
2776
                    print("   could not parse xul " + "in {}".format(namefile))
×
2777
            elif "yul" in item.lower():
9✔
2778
                try:
9✔
2779
                    d["yul"] = float(item.split(":")[1])
9✔
2780
                except:
×
2781
                    print("   could not parse yul " + "in {}".format(namefile))
×
2782
            elif "rotation" in item.lower():
9✔
2783
                try:
9✔
2784
                    d["rotation"] = float(item.split(":")[1])
9✔
2785
                except:
×
2786
                    print("   could not parse rotation " + "in {}".format(namefile))
×
2787
            elif "proj4_str" in item.lower():
9✔
2788
                try:
9✔
2789
                    proj4_str = ":".join(item.split(":")[1:]).strip()
9✔
2790
                    if proj4_str.lower() == "none":
9✔
2791
                        proj4_str = None
×
2792
                    d["proj4_str"] = proj4_str
9✔
2793
                except:
×
2794
                    print("   could not parse proj4_str " + "in {}".format(namefile))
×
2795
            elif "start" in item.lower():
9✔
2796
                try:
9✔
2797
                    d["start_datetime"] = item.split(":")[1].strip()
9✔
2798
                except:
×
2799
                    print("   could not parse start " + "in {}".format(namefile))
×
2800

2801
            # spatial reference length units
2802
            elif "units" in item.lower():
9✔
2803
                d["units"] = item.split(":")[1].strip()
9✔
2804
            # model length units
2805
            elif "lenuni" in item.lower():
9✔
2806
                d["lenuni"] = int(item.split(":")[1].strip())
9✔
2807
            # multiplier for converting from model length units to sr length units
2808
            elif "length_multiplier" in item.lower():
9✔
2809
                d["length_multiplier"] = float(item.split(":")[1].strip())
×
2810
        return d
9✔
2811

2812
    @staticmethod
9✔
2813
    def read_usgs_model_reference_file(reffile="usgs.model.reference"):
9✔
2814
        """
2815
        read spatial reference info from the usgs.model.reference file
2816
        https://water.usgs.gov/ogw/policy/gw-model/modelers-setup.html
2817
        """
2818

2819
        ITMUNI = {
×
2820
            0: "undefined",
2821
            1: "seconds",
2822
            2: "minutes",
2823
            3: "hours",
2824
            4: "days",
2825
            5: "years",
2826
        }
2827
        itmuni_values = {v: k for k, v in ITMUNI.items()}
×
2828

2829
        d = SpatialReference.defaults.copy()
×
2830
        d["source"] = "usgs.model.reference"
×
2831
        # discard default to avoid confusion with epsg code if entered
2832
        d.pop("proj4_str")
×
2833
        if os.path.exists(reffile):
×
2834
            with open(reffile) as fref:
×
2835
                for line in fref:
×
2836
                    if len(line) > 1:
×
2837
                        if line.strip()[0] != "#":
×
2838
                            info = line.strip().split("#")[0].split()
×
2839
                            if len(info) > 1:
×
2840
                                d[info[0].lower()] = " ".join(info[1:])
×
2841
            d["xul"] = float(d["xul"])
×
2842
            d["yul"] = float(d["yul"])
×
2843
            d["rotation"] = float(d["rotation"])
×
2844

2845
            # convert the model.reference text to a lenuni value
2846
            # (these are the model length units)
2847
            if "length_units" in d.keys():
×
2848
                d["lenuni"] = SpatialReference.lenuni_values[d["length_units"]]
×
2849
            if "time_units" in d.keys():
×
2850
                d["itmuni"] = itmuni_values[d["time_units"]]
×
2851
            if "start_date" in d.keys():
×
2852
                start_datetime = d.pop("start_date")
×
2853
                if "start_time" in d.keys():
×
2854
                    start_datetime += " {}".format(d.pop("start_time"))
×
2855
                d["start_datetime"] = start_datetime
×
2856
            if "epsg" in d.keys():
×
2857
                try:
×
2858
                    d["epsg"] = int(d["epsg"])
×
2859
                except Exception as e:
×
2860
                    raise Exception("error reading epsg code from file:\n" + str(e))
×
2861
            # this prioritizes epsg over proj4 if both are given
2862
            # (otherwise 'proj4' entry will be dropped below)
2863
            elif "proj4" in d.keys():
×
2864
                d["proj4_str"] = d["proj4"]
×
2865

2866
            # drop any other items that aren't used in sr class
2867
            d = {
×
2868
                k: v
2869
                for k, v in d.items()
2870
                if k.lower() in SpatialReference.defaults.keys()
2871
                or k.lower() in {"epsg", "start_datetime", "itmuni", "source"}
2872
            }
2873
            return d
×
2874
        else:
2875
            return None
×
2876

2877
    def __setattr__(self, key, value):
9✔
2878
        reset = True
9✔
2879
        if key == "delr":
9✔
2880
            super(SpatialReference, self).__setattr__(
9✔
2881
                "delr", np.atleast_1d(np.array(value))
2882
            )
2883
        elif key == "delc":
9✔
2884
            super(SpatialReference, self).__setattr__(
9✔
2885
                "delc", np.atleast_1d(np.array(value))
2886
            )
2887
        elif key == "xul":
9✔
2888
            super(SpatialReference, self).__setattr__("_xul", float(value))
×
2889
            self.origin_loc = "ul"
×
2890
        elif key == "yul":
9✔
2891
            super(SpatialReference, self).__setattr__("_yul", float(value))
×
2892
            self.origin_loc = "ul"
×
2893
        elif key == "xll":
9✔
2894
            super(SpatialReference, self).__setattr__("_xll", float(value))
×
2895
            self.origin_loc = "ll"
×
2896
        elif key == "yll":
9✔
2897
            super(SpatialReference, self).__setattr__("_yll", float(value))
×
2898
            self.origin_loc = "ll"
×
2899
        elif key == "length_multiplier":
9✔
2900
            super(SpatialReference, self).__setattr__(
×
2901
                "_length_multiplier", float(value)
2902
            )
2903
        elif key == "rotation":
9✔
2904
            super(SpatialReference, self).__setattr__("rotation", float(value))
9✔
2905
        elif key == "lenuni":
9✔
2906
            super(SpatialReference, self).__setattr__("_lenuni", int(value))
×
2907
        elif key == "units":
9✔
2908
            value = value.lower()
×
2909
            assert value in self.supported_units
×
2910
            super(SpatialReference, self).__setattr__("_units", value)
×
2911
        elif key == "proj4_str":
9✔
2912
            super(SpatialReference, self).__setattr__("_proj4_str", value)
×
2913
            # reset the units and epsg
2914
            units = self._parse_units_from_proj4()
×
2915
            if units is not None:
×
2916
                self._units = units
×
2917
            self._epsg = None
×
2918
        elif key == "epsg":
9✔
2919
            super(SpatialReference, self).__setattr__("_epsg", value)
×
2920
            # reset the units and proj4
2921
            # self._units = None
2922
            # self._proj4_str = getproj4(self._epsg)
2923
            # self.crs = crs(epsg=value)
2924
        elif key == "prj":
9✔
2925
            super(SpatialReference, self).__setattr__("prj", value)
×
2926
            # translation to proj4 strings in crs class not robust yet
2927
            # leave units and proj4 alone for now.
2928
            # self.crs = CRS(prj=value, epsg=self.epsg)
2929
        else:
2930
            super(SpatialReference, self).__setattr__(key, value)
9✔
2931
            reset = False
9✔
2932
        if reset:
9✔
2933
            self._reset()
9✔
2934

2935
    def reset(self, **kwargs):
9✔
2936
        for key, value in kwargs.items():
×
2937
            setattr(self, key, value)
×
2938
        return
×
2939

2940
    def _reset(self):
9✔
2941
        self._xgrid = None
9✔
2942
        self._ygrid = None
9✔
2943
        self._ycentergrid = None
9✔
2944
        self._xcentergrid = None
9✔
2945
        self._vertices = None
9✔
2946
        return
9✔
2947

2948
    @property
9✔
2949
    def nrow(self):
9✔
2950
        return self.delc.shape[0]
9✔
2951

2952
    @property
9✔
2953
    def ncol(self):
9✔
2954
        return self.delr.shape[0]
9✔
2955

2956
    def __eq__(self, other):
9✔
2957
        if not isinstance(other, SpatialReference):
×
2958
            return False
×
2959
        if other.xul != self.xul:
×
2960
            return False
×
2961
        if other.yul != self.yul:
×
2962
            return False
×
2963
        if other.rotation != self.rotation:
×
2964
            return False
×
2965
        if other.proj4_str != self.proj4_str:
×
2966
            return False
×
2967
        return True
×
2968

2969
    @classmethod
9✔
2970
    def from_namfile(cls, namefile, delr=np.array([]), delc=np.array([])):
9✔
2971
        if delr is None or delc is None:
9✔
2972
            warnings.warn(
×
2973
                "One or both of grid spacing information "
2974
                "missing,\n    required for most pyemu methods "
2975
                "that use sr,\n    can be passed later if desired "
2976
                "(e.g. sr.delr = row spacing)",
2977
                PyemuWarning,
2978
            )
2979
        attribs = SpatialReference.attribs_from_namfile_header(namefile)
9✔
2980
        attribs["delr"] = delr
9✔
2981
        attribs["delc"] = delc
9✔
2982
        try:
9✔
2983
            attribs.pop("start_datetime")
9✔
2984
        except:
9✔
2985
            print("   could not remove start_datetime")
9✔
2986
        return SpatialReference(**attribs)
9✔
2987

2988
    @classmethod
9✔
2989
    def from_gridspec(cls, gridspec_file, lenuni=0):
9✔
2990
        f = open(gridspec_file, "r")
8✔
2991
        raw = f.readline().strip().split()
8✔
2992
        nrow = int(raw[0])
8✔
2993
        ncol = int(raw[1])
8✔
2994
        raw = f.readline().strip().split()
8✔
2995
        xul, yul, rot = float(raw[0]), float(raw[1]), float(raw[2])
8✔
2996
        delr = []
8✔
2997
        j = 0
8✔
2998
        while j < ncol:
8✔
2999
            raw = f.readline().strip().split()
8✔
3000
            for r in raw:
8✔
3001
                if "*" in r:
8✔
3002
                    rraw = r.split("*")
×
3003
                    for n in range(int(rraw[0])):
×
3004
                        delr.append(float(rraw[1]))
×
3005
                        j += 1
×
3006
                else:
3007
                    delr.append(float(r))
8✔
3008
                    j += 1
8✔
3009
        delc = []
8✔
3010
        i = 0
8✔
3011
        while i < nrow:
8✔
3012
            raw = f.readline().strip().split()
8✔
3013
            for r in raw:
8✔
3014
                if "*" in r:
8✔
3015
                    rraw = r.split("*")
×
3016
                    for n in range(int(rraw[0])):
×
3017
                        delc.append(float(rraw[1]))
×
3018
                        i += 1
×
3019
                else:
3020
                    delc.append(float(r))
8✔
3021
                    i += 1
8✔
3022
        f.close()
8✔
3023
        return cls(
8✔
3024
            np.array(delr), np.array(delc), lenuni, xul=xul, yul=yul, rotation=rot
3025
        )
3026

3027
    @property
9✔
3028
    def attribute_dict(self):
9✔
3029
        return {
×
3030
            "xul": self.xul,
3031
            "yul": self.yul,
3032
            "rotation": self.rotation,
3033
            "proj4_str": self.proj4_str,
3034
        }
3035

3036
    def set_spatialreference(
9✔
3037
        self, xul=None, yul=None, xll=None, yll=None, rotation=0.0
3038
    ):
3039
        """
3040
        set spatial reference - can be called from model instance
3041

3042
        """
3043
        if xul is not None and xll is not None:
9✔
3044
            msg = (
×
3045
                "Both xul and xll entered. Please enter either xul, yul or " "xll, yll."
3046
            )
3047
            raise ValueError(msg)
×
3048
        if yul is not None and yll is not None:
9✔
3049
            msg = (
×
3050
                "Both yul and yll entered. Please enter either xul, yul or " "xll, yll."
3051
            )
3052
            raise ValueError(msg)
×
3053
        # set the origin priority based on the left corner specified
3054
        # (the other left corner will be calculated).  If none are specified
3055
        # then default to upper left
3056
        if xul is None and yul is None and xll is None and yll is None:
9✔
3057
            self.origin_loc = "ul"
9✔
3058
            xul = 0.0
9✔
3059
            yul = self.delc.sum()
9✔
3060
        elif xll is not None:
9✔
3061
            self.origin_loc = "ll"
×
3062
        else:
3063
            self.origin_loc = "ul"
9✔
3064

3065
        self.rotation = rotation
9✔
3066
        self._xll = xll if xll is not None else 0.0
9✔
3067
        self._yll = yll if yll is not None else 0.0
9✔
3068
        self._xul = xul if xul is not None else 0.0
9✔
3069
        self._yul = yul if yul is not None else 0.0
9✔
3070
        return
9✔
3071

3072
    def __repr__(self):
9✔
3073
        s = "xul:{0:<.10G}; yul:{1:<.10G}; rotation:{2:<G}; ".format(
1✔
3074
            self.xul, self.yul, self.rotation
3075
        )
3076
        s += "proj4_str:{0}; ".format(self.proj4_str)
1✔
3077
        s += "units:{0}; ".format(self.units)
1✔
3078
        s += "lenuni:{0}; ".format(self.lenuni)
1✔
3079
        s += "length_multiplier:{}".format(self.length_multiplier)
1✔
3080
        return s
1✔
3081

3082
    @property
9✔
3083
    def theta(self):
9✔
3084
        return -self.rotation * np.pi / 180.0
9✔
3085

3086
    @property
9✔
3087
    def xedge(self):
9✔
3088
        return self.get_xedge_array()
×
3089

3090
    @property
9✔
3091
    def yedge(self):
9✔
3092
        return self.get_yedge_array()
9✔
3093

3094
    @property
9✔
3095
    def xgrid(self):
9✔
3096
        if self._xgrid is None:
×
3097
            self._set_xygrid()
×
3098
        return self._xgrid
×
3099

3100
    @property
9✔
3101
    def ygrid(self):
9✔
3102
        if self._ygrid is None:
×
3103
            self._set_xygrid()
×
3104
        return self._ygrid
×
3105

3106
    @property
9✔
3107
    def xcenter(self):
9✔
3108
        return self.get_xcenter_array()
9✔
3109

3110
    @property
9✔
3111
    def ycenter(self):
9✔
3112
        return self.get_ycenter_array()
9✔
3113

3114
    @property
9✔
3115
    def ycentergrid(self):
9✔
3116
        if self._ycentergrid is None:
9✔
3117
            self._set_xycentergrid()
×
3118
        return self._ycentergrid
9✔
3119

3120
    @property
9✔
3121
    def xcentergrid(self):
9✔
3122
        if self._xcentergrid is None:
9✔
3123
            self._set_xycentergrid()
9✔
3124
        return self._xcentergrid
9✔
3125

3126
    def _set_xycentergrid(self):
9✔
3127
        self._xcentergrid, self._ycentergrid = np.meshgrid(self.xcenter, self.ycenter)
9✔
3128
        self._xcentergrid, self._ycentergrid = self.transform(
9✔
3129
            self._xcentergrid, self._ycentergrid
3130
        )
3131

3132
    def _set_xygrid(self):
9✔
3133
        self._xgrid, self._ygrid = np.meshgrid(self.xedge, self.yedge)
×
3134
        self._xgrid, self._ygrid = self.transform(self._xgrid, self._ygrid)
×
3135

3136
    @staticmethod
9✔
3137
    def rotate(x, y, theta, xorigin=0.0, yorigin=0.0):
9✔
3138
        """
3139
        Given x and y array-like values calculate the rotation about an
3140
        arbitrary origin and then return the rotated coordinates.  theta is in
3141
        degrees.
3142

3143
        """
3144
        # jwhite changed on Oct 11 2016 - rotation is now positive CCW
3145
        # theta = -theta * np.pi / 180.
3146
        theta = theta * np.pi / 180.0
9✔
3147

3148
        xrot = xorigin + np.cos(theta) * (x - xorigin) - np.sin(theta) * (y - yorigin)
9✔
3149
        yrot = yorigin + np.sin(theta) * (x - xorigin) + np.cos(theta) * (y - yorigin)
9✔
3150
        return xrot, yrot
9✔
3151

3152
    def transform(self, x, y, inverse=False):
9✔
3153
        """
3154
        Given x and y array-like values, apply rotation, scale and offset,
3155
        to convert them from model coordinates to real-world coordinates.
3156
        """
3157
        if isinstance(x, list):
9✔
3158
            x = np.array(x)
×
3159
            y = np.array(y)
×
3160
        if not np.isscalar(x):
9✔
3161
            x, y = x.copy(), y.copy()
9✔
3162

3163
        if not inverse:
9✔
3164
            x *= self.length_multiplier
9✔
3165
            y *= self.length_multiplier
9✔
3166
            x += self.xll
9✔
3167
            y += self.yll
9✔
3168
            x, y = SpatialReference.rotate(
9✔
3169
                x, y, theta=self.rotation, xorigin=self.xll, yorigin=self.yll
3170
            )
3171
        else:
3172
            x, y = SpatialReference.rotate(x, y, -self.rotation, self.xll, self.yll)
×
3173
            x -= self.xll
×
3174
            y -= self.yll
×
3175
            x /= self.length_multiplier
×
3176
            y /= self.length_multiplier
×
3177
        return x, y
9✔
3178

3179
    def get_extent(self):
9✔
3180
        """
3181
        Get the extent of the rotated and offset grid
3182

3183
        """
3184
        x0 = self.xedge[0]
×
3185
        x1 = self.xedge[-1]
×
3186
        y0 = self.yedge[0]
×
3187
        y1 = self.yedge[-1]
×
3188

3189
        # upper left point
3190
        x0r, y0r = self.transform(x0, y0)
×
3191

3192
        # upper right point
3193
        x1r, y1r = self.transform(x1, y0)
×
3194

3195
        # lower right point
3196
        x2r, y2r = self.transform(x1, y1)
×
3197

3198
        # lower left point
3199
        x3r, y3r = self.transform(x0, y1)
×
3200

3201
        xmin = min(x0r, x1r, x2r, x3r)
×
3202
        xmax = max(x0r, x1r, x2r, x3r)
×
3203
        ymin = min(y0r, y1r, y2r, y3r)
×
3204
        ymax = max(y0r, y1r, y2r, y3r)
×
3205

3206
        return (xmin, xmax, ymin, ymax)
×
3207

3208
    def get_grid_lines(self):
9✔
3209
        """
3210
        Get the grid lines as a list
3211

3212
        """
3213
        xmin = self.xedge[0]
×
3214
        xmax = self.xedge[-1]
×
3215
        ymin = self.yedge[-1]
×
3216
        ymax = self.yedge[0]
×
3217
        lines = []
×
3218
        # Vertical lines
3219
        for j in range(self.ncol + 1):
×
3220
            x0 = self.xedge[j]
×
3221
            x1 = x0
×
3222
            y0 = ymin
×
3223
            y1 = ymax
×
3224
            x0r, y0r = self.transform(x0, y0)
×
3225
            x1r, y1r = self.transform(x1, y1)
×
3226
            lines.append([(x0r, y0r), (x1r, y1r)])
×
3227

3228
        # horizontal lines
3229
        for i in range(self.nrow + 1):
×
3230
            x0 = xmin
×
3231
            x1 = xmax
×
3232
            y0 = self.yedge[i]
×
3233
            y1 = y0
×
3234
            x0r, y0r = self.transform(x0, y0)
×
3235
            x1r, y1r = self.transform(x1, y1)
×
3236
            lines.append([(x0r, y0r), (x1r, y1r)])
×
3237
        return lines
×
3238

3239
    # def get_grid_line_collection(self, **kwargs):
3240
    #     """
3241
    #     Get a LineCollection of the grid
3242
    #
3243
    #     """
3244
    #     from flopy.plot import ModelMap
3245
    #
3246
    #     map = ModelMap(sr=self)
3247
    #     lc = map.plot_grid(**kwargs)
3248
    #     return lc
3249

3250
    def get_xcenter_array(self):
9✔
3251
        """
3252
        Return a numpy one-dimensional float array that has the cell center x
3253
        coordinate for every column in the grid in model space - not offset or rotated.
3254

3255
        """
3256
        assert self.delr is not None and len(self.delr) > 0, (
9✔
3257
            "delr not passed to " "spatial reference object"
3258
        )
3259
        x = np.add.accumulate(self.delr) - 0.5 * self.delr
9✔
3260
        return x
9✔
3261

3262
    def get_ycenter_array(self):
9✔
3263
        """
3264
        Return a numpy one-dimensional float array that has the cell center x
3265
        coordinate for every row in the grid in model space - not offset of rotated.
3266

3267
        """
3268
        assert self.delc is not None and len(self.delc) > 0, (
9✔
3269
            "delc not passed to " "spatial reference object"
3270
        )
3271
        Ly = np.add.reduce(self.delc)
9✔
3272
        y = Ly - (np.add.accumulate(self.delc) - 0.5 * self.delc)
9✔
3273
        return y
9✔
3274

3275
    def get_xedge_array(self):
9✔
3276
        """
3277
        Return a numpy one-dimensional float array that has the cell edge x
3278
        coordinates for every column in the grid in model space - not offset
3279
        or rotated.  Array is of size (ncol + 1)
3280

3281
        """
3282
        assert self.delr is not None and len(self.delr) > 0, (
×
3283
            "delr not passed to " "spatial reference object"
3284
        )
3285
        xedge = np.concatenate(([0.0], np.add.accumulate(self.delr)))
×
3286
        return xedge
×
3287

3288
    def get_yedge_array(self):
9✔
3289
        """
3290
        Return a numpy one-dimensional float array that has the cell edge y
3291
        coordinates for every row in the grid in model space - not offset or
3292
        rotated. Array is of size (nrow + 1)
3293

3294
        """
3295
        assert self.delc is not None and len(self.delc) > 0, (
9✔
3296
            "delc not passed to " "spatial reference object"
3297
        )
3298
        length_y = np.add.reduce(self.delc)
9✔
3299
        yedge = np.concatenate(([length_y], length_y - np.add.accumulate(self.delc)))
9✔
3300
        return yedge
9✔
3301

3302
    def write_gridspec(self, filename):
9✔
3303
        """write a PEST-style grid specification file"""
3304
        f = open(filename, "w")
×
3305
        f.write("{0:10d} {1:10d}\n".format(self.delc.shape[0], self.delr.shape[0]))
×
3306
        f.write(
×
3307
            "{0:15.6E} {1:15.6E} {2:15.6E}\n".format(
3308
                self.xul * self.length_multiplier,
3309
                self.yul * self.length_multiplier,
3310
                self.rotation,
3311
            )
3312
        )
3313

3314
        for r in self.delr:
×
3315
            f.write("{0:15.6E} ".format(r))
×
3316
        f.write("\n")
×
3317
        for c in self.delc:
×
3318
            f.write("{0:15.6E} ".format(c))
×
3319
        f.write("\n")
×
3320
        return
×
3321

3322
    def get_vertices(self, i, j):
9✔
3323
        """Get vertices for a single cell or sequence if i, j locations."""
3324
        pts = []
×
3325
        xgrid, ygrid = self.xgrid, self.ygrid
×
3326
        pts.append([xgrid[i, j], ygrid[i, j]])
×
3327
        pts.append([xgrid[i + 1, j], ygrid[i + 1, j]])
×
3328
        pts.append([xgrid[i + 1, j + 1], ygrid[i + 1, j + 1]])
×
3329
        pts.append([xgrid[i, j + 1], ygrid[i, j + 1]])
×
3330
        pts.append([xgrid[i, j], ygrid[i, j]])
×
3331
        if np.isscalar(i):
×
3332
            return pts
×
3333
        else:
3334
            vrts = np.array(pts).transpose([2, 0, 1])
×
3335
            return [v.tolist() for v in vrts]
×
3336

3337
    def get_rc(self, x, y):
9✔
3338
        return self.get_ij(x, y)
×
3339

3340
    def get_ij(self, x, y):
9✔
3341
        """Return the row and column of a point or sequence of points
3342
        in real-world coordinates.
3343

3344
        Args:
3345
            x (`float`): scalar or sequence of x coordinates
3346
            y (`float`): scalar or sequence of y coordinates
3347

3348
        Returns:
3349
            tuple of
3350

3351
            - **int** : row or sequence of rows (zero-based)
3352
            - **int** : column or sequence of columns (zero-based)
3353
        """
3354
        if np.isscalar(x):
×
3355
            c = (np.abs(self.xcentergrid[0] - x)).argmin()
×
3356
            r = (np.abs(self.ycentergrid[:, 0] - y)).argmin()
×
3357
        else:
3358
            xcp = np.array([self.xcentergrid[0]] * (len(x)))
×
3359
            ycp = np.array([self.ycentergrid[:, 0]] * (len(x)))
×
3360
            c = (np.abs(xcp.transpose() - x)).argmin(axis=0)
×
3361
            r = (np.abs(ycp.transpose() - y)).argmin(axis=0)
×
3362
        return r, c
×
3363

3364
    @property
9✔
3365
    def vertices(self):
9✔
3366
        """
3367
        Returns a list of vertices for
3368
        """
3369
        if self._vertices is None:
×
3370
            self._set_vertices()
×
3371
        return self._vertices
×
3372

3373
    def _set_vertices(self):
9✔
3374
        """
3375
        Populate vertices for the whole grid
3376
        """
3377
        jj, ii = np.meshgrid(range(self.ncol), range(self.nrow))
×
3378
        jj, ii = jj.ravel(), ii.ravel()
×
3379
        self._vertices = self.get_vertices(ii, jj)
×
3380

3381

3382
def maha_based_pdc(sim_en):
9✔
3383
    """prototype for detecting prior-data conflict following Alfonso and Oliver 2019
3384

3385
    Args:
3386
        sim_en (`pyemu.ObservationEnsemble`): a simulated outputs ensemble
3387

3388
    Returns:
3389

3390
        tuple containing
3391

3392
        - **pandas.DataFrame**: 1-D subspace squared mahalanobis distances
3393
            that exceed the `l1_crit_val` threshold
3394
        - **pandas.DataFrame**: 2-D subspace squared mahalanobis distances
3395
            that exceed the `l2_crit_val` threshold
3396

3397
    Note:
3398
        Noise realizations are added to `sim_en` to account for measurement
3399
            noise.
3400

3401

3402

3403
    """
3404
    groups = sim_en.pst.nnz_obs_groups
8✔
3405
    obs = sim_en.pst.observation_data
8✔
3406
    z_scores = {}
8✔
3407
    dm_xs = {}
8✔
3408
    p_vals = {}
8✔
3409
    for group in groups:
8✔
3410
        nzobs = obs.loc[obs.obgnme==group,:]
8✔
3411
        nzobs = nzobs.loc[nzobs.weight > 0,:].copy()
8✔
3412
        nzsim_en = sim_en._df.loc[:,nzobs.obsnme].copy()
8✔
3413
        ne,nx = nzsim_en.shape
8✔
3414
        ns = ne - 1
8✔
3415
        delta = 2.0/(float(ns) + 2.)
8✔
3416
        v = nzsim_en.var(axis=0).mean()
8✔
3417
        x = nzsim_en.values.copy()
8✔
3418
        z = nzobs.obsval.values.copy()
8✔
3419
        dm_x,dm_z = [],[]
8✔
3420
        for ireal in range(ne):
8✔
3421
            x_s = x.copy()
8✔
3422
            x_s = np.delete(x_s,(ireal),axis=0)
8✔
3423
            first = delta * v * (((ns -1)/(1-delta))*np.eye(ns))
8✔
3424
            a_s = first + np.dot(x_s, x_s.T)
8✔
3425
            lower = np.linalg.cholesky(a_s)
8✔
3426
            lower = np.linalg.inv(lower)
8✔
3427
            mu_hat = x_s.mean(axis=0)
8✔
3428
            dm_x.append(_maha(delta,v,x_s,x[ireal,:] - mu_hat,lower))
8✔
3429
            dm_z.append(_maha(delta,v,x_s,z - mu_hat,lower))
8✔
3430
        dm_x = np.array(dm_x)
8✔
3431
        dm_z = np.array(dm_z)
8✔
3432
        mu_x = np.median(dm_x)
8✔
3433
        mu_z = np.median(dm_z)
8✔
3434
        mad = np.median(np.abs(dm_x - mu_x))
8✔
3435
        sigma_x = 1.4826 * mad
8✔
3436
        z_score = (mu_z - mu_x) / sigma_x
8✔
3437
        z_scores[group] = z_score
8✔
3438
        dm_xs[group] = dm_x
8✔
3439
        dm_x.sort()
8✔
3440

3441
        p = np.argmin(np.abs(dm_x - mu_z))/dm_x.shape[0]
8✔
3442
        p_vals[group] = 1 - p
8✔
3443

3444
    z_scores, p_vals, dm_xs = pd.Series(z_scores), pd.Series(p_vals), pd.DataFrame(dm_xs)
8✔
3445
    # dm_xs.loc[:,"z_scores"] = z_scores.loc[dm_xs.index]
3446
    # dm_xs.loc[:,"p_vals"] = p_vals.loc[dm_xs.index]
3447
    df = pd.DataFrame({"z_scores":z_scores,"p_vals":p_vals})
8✔
3448
    df.index.name = "obgnme"
8✔
3449
    return df,pd.DataFrame(dm_xs)
8✔
3450

3451
def _maha(delta,v,x,z,lower_inv):
9✔
3452

3453
    d_m = np.dot(z.transpose(),z)
8✔
3454
    first = np.dot(np.dot(lower_inv,x),z)
8✔
3455
    first = np.dot(first.transpose(),first)
8✔
3456
    d_m = (1.0/(delta * v)) * (d_m - first)
8✔
3457
    return d_m
8✔
3458

3459

3460
def get_maha_obs_summary(sim_en, l1_crit_val=6.34, l2_crit_val=9.2):
9✔
3461
    """calculate the 1-D and 2-D mahalanobis distance between simulated
3462
    ensemble and observed values.  Used for detecting prior-data conflict
3463

3464
    Args:
3465
        sim_en (`pyemu.ObservationEnsemble`): a simulated outputs ensemble
3466
        l1_crit_val (`float`): the chi squared critical value for the 1-D
3467
            mahalanobis distance.  Default is 6.4 (p=0.01,df=1)
3468
        l2_crit_val (`float`): the chi squared critical value for the 2-D
3469
            mahalanobis distance.  Default is 9.2 (p=0.01,df=2)
3470

3471
    Returns:
3472

3473
        tuple containing
3474

3475
        - **pandas.DataFrame**: 1-D subspace squared mahalanobis distances
3476
            that exceed the `l1_crit_val` threshold
3477
        - **pandas.DataFrame**: 2-D subspace squared mahalanobis distances
3478
            that exceed the `l2_crit_val` threshold
3479

3480
    Note:
3481
        Noise realizations are added to `sim_en` to account for measurement
3482
            noise.
3483

3484
    """
3485

3486
    if not isinstance(sim_en, pyemu.ObservationEnsemble):
×
3487
        raise Exception("'sim_en' must be a " + " pyemu.ObservationEnsemble instance")
×
3488
    if sim_en.pst.nnz_obs < 1:
×
3489
        raise Exception(" at least one non-zero weighted obs is needed")
×
3490

3491
    # process the simulated ensemblet to only have non-zero weighted obs
3492
    obs = sim_en.pst.observation_data
×
3493
    nz_names = sim_en.pst.nnz_obs_names
×
3494
    # get the full cov matrix
3495
    nz_cov_df = sim_en.covariance_matrix().to_dataframe()
×
3496
    nnz_en = sim_en.loc[:, nz_names].copy()
×
3497
    nz_cov_df = nz_cov_df.loc[nz_names, nz_names]
×
3498
    # get some noise realizations
3499
    nnz_en.reseed()
×
3500
    obsmean = obs.loc[nnz_en.columns.values, "obsval"]
×
3501
    noise_en = pyemu.ObservationEnsemble.from_gaussian_draw(
×
3502
        sim_en.pst, num_reals=sim_en.shape[0]
3503
    )
3504
    noise_en -= obsmean  # subtract off the obs val bc we just want the noise
×
3505
    noise_en.index = nnz_en.index
×
3506
    nnz_en += noise_en
×
3507

3508
    # obsval_dict = obs.loc[nnz_en.columns.values,"obsval"].to_dict()
3509

3510
    # first calculate the 1-D subspace maha distances
3511
    print("calculating L-1 maha distances")
×
3512
    sim_mean = nnz_en.mean()
×
3513
    obs_mean = obs.loc[nnz_en.columns.values, "obsval"]
×
3514
    simvar_inv = 1.0 / (nnz_en.std() ** 2)
×
3515
    res_mean = sim_mean - obs_mean
×
3516
    l1_maha_sq_df = res_mean ** 2 * simvar_inv
×
3517
    l1_maha_sq_df = l1_maha_sq_df.loc[l1_maha_sq_df > l1_crit_val]
×
3518
    # now calculate the 2-D subspace maha distances
3519
    print("preparing L-2 maha distance containers")
×
3520
    manager = mp.Manager()
×
3521
    ns = manager.Namespace()
×
3522
    results = manager.dict()
×
3523
    mean = manager.dict(res_mean.to_dict())
×
3524
    var = manager.dict()
×
3525
    cov = manager.dict()
×
3526
    var_arr = np.diag(nz_cov_df.values)
×
3527
    for i1, o1 in enumerate(nz_names):
×
3528
        var[o1] = var_arr[i1]
×
3529

3530
        cov_vals = nz_cov_df.loc[o1, :].values[i1 + 1 :]
×
3531
        ostr_vals = ["{0}_{1}".format(o1, o2) for o2 in nz_names[i1 + 1 :]]
×
3532
        cd = {o: c for o, c in zip(ostr_vals, cov_vals)}
×
3533
        cov.update(cd)
×
3534
    print("starting L-2 maha distance parallel calcs")
×
3535
    # pool = mp.Pool(processes=5)
3536
    with mp.get_context("spawn").Pool(
×
3537
            processes=min(mp.cpu_count(), 60)) as pool:
3538
        for i1, o1 in enumerate(nz_names):
×
3539
            o2names = [o2 for o2 in nz_names[i1 + 1 :]]
×
3540
            rresults = [
×
3541
                pool.apply_async(
3542
                    _l2_maha_worker,
3543
                    args=(o1, o2names, mean, var, cov, results, l2_crit_val),
3544
                )
3545
            ]
3546
        [r.get() for r in rresults]
×
3547

3548
        print("closing pool")
×
3549
        pool.close()
×
3550

3551
        print("joining pool")
×
3552
        pool.join()
×
3553

3554
    # print(results)
3555
    # print(len(results),len(ostr_vals))
3556

3557
    keys = list(results.keys())
×
3558
    onames1 = [k.split("|")[0] for k in keys]
×
3559
    onames2 = [k.split("|")[1] for k in keys]
×
3560
    l2_maha_sq_vals = [results[k] for k in keys]
×
3561
    l2_maha_sq_df = pd.DataFrame(
×
3562
        {"obsnme_1": onames1, "obsnme_2": onames2, "sq_distance": l2_maha_sq_vals}
3563
    )
3564

3565
    return l1_maha_sq_df, l2_maha_sq_df
×
3566

3567

3568
def _l2_maha_worker(o1, o2names, mean, var, cov, results, l2_crit_val):
9✔
3569

3570
    rresults = {}
×
3571
    v1 = var[o1]
×
3572
    c = np.zeros((2, 2))
×
3573
    c[0, 0] = v1
×
3574
    r1 = mean[o1]
×
3575
    for o2 in o2names:
×
3576
        ostr = "{0}_{1}".format(o1, o2)
×
3577
        cv = cov[ostr]
×
3578
        v2 = var[o2]
×
3579
        c[1, 1] = v2
×
3580
        c[0, 1] = cv
×
3581
        c[1, 0] = cv
×
3582
        c_inv = np.linalg.inv(c)
×
3583

3584
        r2 = mean[o2]
×
3585
        r_vec = np.array([r1, r2])
×
3586
        l2_maha_sq_val = np.dot(np.dot(r_vec, c_inv), r_vec.transpose())
×
3587
        if l2_maha_sq_val > l2_crit_val:
×
3588
            rresults[ostr] = l2_maha_sq_val
×
3589
    results.update(rresults)
×
3590
    print(o1, "done")
×
3591

3592

3593
def parse_rmr_file(rmr_file):
9✔
3594
    """parse a run management record file into a data frame of tokens
3595

3596
    Args:
3597
        rmr_file (`str`):  an rmr file name
3598

3599
    Returns:
3600
        pd.DataFrame: a dataframe of timestamped information
3601

3602
    Note:
3603
        only supports rmr files generated by pest++ version >= 5.1.21
3604

3605
    """
3606

3607
    if not os.path.exists(rmr_file):
8✔
3608
        raise FileExistsError("rmr file not found")
×
3609
    data = {}
8✔
3610
    dts = []
8✔
3611
    with open(rmr_file,'r') as f:
8✔
3612
        for line in f:
8✔
3613
            if "->" in line:
8✔
3614
                raw = line.split("->")
8✔
3615
                dt = datetime.strptime(raw[0],"%m/%d/%y %H:%M:%S")
8✔
3616
                tokens = raw[1].split()
8✔
3617
                colon_token = [ t for t in tokens if ":" in t]
8✔
3618
                if len(colon_token) > 0:
8✔
3619
                    idx = tokens.index(colon_token[0])
8✔
3620
                    action = "_".join(tokens[:idx+1])
8✔
3621
                else:
3622
                    action = '_'.join(tokens)
8✔
3623
                if "action" not in data:
8✔
3624
                    data["action"] = []
8✔
3625
                data["action"].append(action)
8✔
3626

3627
                tokens = colon_token
8✔
3628
                tokens = {t.split(":")[0]:t.split(':')[1] for t in tokens}
8✔
3629
                for t in tokens:
8✔
3630
                    if t not in data:
8✔
3631
                        data[t] = [np.nan] * len(dts)
8✔
3632
                for k in data:
8✔
3633
                    if k == "action":
8✔
3634
                        continue
8✔
3635
                    if k not in tokens:
8✔
3636
                        data[k].append(np.nan)
8✔
3637
                    else:
3638
                        data[k].append(tokens[k])
8✔
3639
                dts.append(dt)
8✔
3640

3641
    df = pd.DataFrame(data,index=dts)
8✔
3642
    return df
8✔
3643

3644

3645
def setup_threshold_pars(orgarr_file,cat_dict,testing_workspace=".",inact_arr=None):
9✔
3646
    """setup a thresholding 2-category binary array prcoess.
3647

3648
    Parameters:
3649
        orgarr_file (`str`): the input array that will ultimately be created at runtime
3650
        cat_dict (`str`): dict of info for the two categories.  Keys are (unused) category names.
3651
            values are a len 2 iterable of requested proportion and fill value.
3652
        testing_workspace (`str`): directory where the apply process can be tested.
3653
        inact_arr (`np.ndarray`): an array that indicates inactive nodes (inact_arr=0)
3654

3655
    Returns:
3656
        thresharr_file (`str`): thresholding array file (to be parameterized)
3657
        csv_file (`str`): the csv file that has the inputs needed for the apply process
3658

3659
    Note:
3660
        all required files are based on the `orgarr_file` with suffixes appended to them
3661

3662
    """
3663
    assert os.path.exists(orgarr_file)
8✔
3664
    #atleast 2d for xsections
3665
    org_arr = np.atleast_2d(np.loadtxt(orgarr_file))
8✔
3666

3667
    if len(cat_dict) != 2:
8✔
3668
        raise Exception("only two categories currently supported, {0} found in target_proportions_dict".\
×
3669
                        format(len(cat_dict)))
3670

3671
    tol = 1.0e-10
8✔
3672
    if org_arr.std() < tol * 2.0:
8✔
3673
        print("WARNING: org_arr has very low standard deviation, adding some noise for now...")
×
3674
        org_arr += np.random.normal(0,1e-6,org_arr.shape)
×
3675
    prop_tags,prop_vals,fill_vals = [],[],[]
8✔
3676
    for key,(proportion,fill_val) in cat_dict.items():
8✔
3677
        if int(key) not in cat_dict:
8✔
3678
            raise Exception("integer type of key '{0}' not found in target_proportions_dict".format(key))
×
3679
        prop_tags.append(int(key))
8✔
3680
        prop_vals.append(float(proportion))
8✔
3681
        # use the key as the fill value for testing purposes
3682
        fill_vals.append(float(fill_val))
8✔
3683

3684
    thresharr = org_arr.copy()
8✔
3685
    thresharr = thresharr / thresharr.max()
8✔
3686
    thresharr_file = orgarr_file+".thresharr.dat"
8✔
3687
    np.savetxt(thresharr_file,thresharr,fmt="%15.6E")
8✔
3688

3689
    if inact_arr is not None:
8✔
3690
        assert inact_arr.shape == org_arr.shape
8✔
3691
        inactarr_file = orgarr_file+".threshinact.dat"
8✔
3692
        np.savetxt(inactarr_file,inact_arr,fmt="%4.0f")
8✔
3693

3694
    df = pd.DataFrame({"threshcat":prop_tags,"threshval":prop_vals,"threshfill":fill_vals})
8✔
3695
    csv_file = orgarr_file+".threshprops.csv"
8✔
3696
    df.to_csv(csv_file,index=False)
8✔
3697

3698
    # test that it seems to be working
3699
    bd = os.getcwd()
8✔
3700
    os.chdir(testing_workspace)
8✔
3701
    apply_threshold_pars(os.path.split(csv_file)[1])
8✔
3702
    os.chdir(bd)
8✔
3703
    return thresharr_file,csv_file
8✔
3704

3705

3706
def apply_threshold_pars(csv_file):
9✔
3707
    """apply the thresholding process.  everything keys off of csv_file name...
3708

3709
    """
3710
    df = pd.read_csv(csv_file,index_col=0)
8✔
3711
    thresarr_file = csv_file.replace("props.csv","arr.dat")
8✔
3712
    tarr = np.loadtxt(thresarr_file)
8✔
3713
    if np.any(tarr < 0):
8✔
3714
        print(tarr)
×
3715
        raise Exception("negatives in thresholding array")
×
3716
    #norm tarr
3717
    tarr = tarr / tarr.max()
8✔
3718
    orgarr_file = csv_file.replace(".threshprops.csv","")
8✔
3719
    inactarr_file = csv_file.replace(".threshprops.csv",".threshinact.dat")
8✔
3720

3721
    tvals = df.threshval.astype(float).values.copy()
8✔
3722
    #ttags = df.threshcat.astype(int).values.copy()
3723
    tfill = df.threshfill.astype(float).values.copy()
8✔
3724
    # for now...
3725
    assert len(tvals) == 2
8✔
3726
    if np.any(tvals <= 0.0):
8✔
3727
        print(tvals)
×
3728
        raise Exception("threshold values much be strictly positive")
×
3729

3730
    #since we only have two categories, we can just focus on the first proportion
3731
    target_prop = tvals[0]
8✔
3732

3733
    tol = 1.0e-10
8✔
3734
    if tarr.std() < tol * 2.0:
8✔
3735
        raise Exception("thresholding array has very low standard deviation")
×
3736

3737
    # a classic:
3738
    gr = (np.sqrt(5.) + 1.) / 2.
8✔
3739
    a = tarr.min()
8✔
3740
    b = tarr.max()
8✔
3741
    c = b - ((b - a) / gr)
8✔
3742
    d = a + ((b - a) / gr)
8✔
3743

3744
    iarr = None
8✔
3745
    tot_shape = tarr.shape[0] * tarr.shape[1]
8✔
3746
    if os.path.exists(inactarr_file):
8✔
3747
        iarr = np.loadtxt(inactarr_file, dtype=int)
8✔
3748
        # this keeps inact from interfering with calcs later...
3749
        tarr[iarr == 0] = 1.0e+30
8✔
3750
        tiarr = iarr.copy()
8✔
3751
        tiarr[tiarr <= 0] = 0
8✔
3752
        tiarr[tiarr > 0] = 1
8✔
3753
        tot_shape = tiarr.sum()
8✔
3754

3755
    def get_current_prop(_cur_thresh):
8✔
3756
        itarr = np.zeros_like(tarr)
8✔
3757
        itarr[tarr <= _cur_thresh] = 1
8✔
3758
        current_prop = itarr.sum() / tot_shape
8✔
3759
        return current_prop
8✔
3760

3761
    numiter = 0
8✔
3762
    while True:
4✔
3763
        if (b - a) <= tol:
8✔
3764
            break
8✔
3765
        if numiter > 10000:
8✔
3766
            raise Exception("golden section failed to converge")
×
3767

3768
        cprop = get_current_prop(c)
8✔
3769
        dprop = get_current_prop(d)
8✔
3770
        #print(a,b,c,d,cprop,dprop,target_prop)
3771
        cphi = (cprop - target_prop)**2
8✔
3772
        dphi = (dprop - target_prop)**2
8✔
3773
        if cphi < dphi:
8✔
3774
            b = d
8✔
3775
        else:
3776
            a = c
8✔
3777
        c = b - ((b - a) / gr)
8✔
3778
        d = a + ((b - a) / gr)
8✔
3779
        numiter += 1
8✔
3780

3781
    #print(a,b,c,d)
3782
    thresh = (a+b) / 2.0
8✔
3783
    prop = get_current_prop(thresh)
8✔
3784
    #print(thresh,prop)
3785
    farr = np.zeros_like(tarr) - 1
8✔
3786
    farr[tarr>thresh] = tfill[1]
8✔
3787
    farr[tarr<=thresh] = tfill[0]
8✔
3788
    if iarr is not None:
8✔
3789
        farr[iarr==0] = -1.0e+30
8✔
3790
    np.savetxt(orgarr_file,farr,fmt="%15.6E")
8✔
3791
    return thresh, prop
8✔
3792

3793

3794

3795

3796

3797

3798

3799

3800

3801

3802

3803

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc