• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pypest / pyemu / 5887625428

17 Aug 2023 06:23AM UTC coverage: 79.857% (+1.5%) from 78.319%
5887625428

push

github

briochh
Merge branch 'develop'

11386 of 14258 relevant lines covered (79.86%)

6.77 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.71
/pyemu/utils/pst_from.py
1
from __future__ import print_function, division
9✔
2
import os
9✔
3
from pathlib import Path
9✔
4
import warnings
9✔
5
import platform
9✔
6
import numpy as np
9✔
7
import pandas as pd
9✔
8
import pyemu
9✔
9
from ..pyemu_warnings import PyemuWarning
9✔
10
import copy
9✔
11
import string
9✔
12

13

14
# the tolerable percent difference (100 * (max - min)/mean)
15
# used when checking that constant and zone type parameters are in fact constant (within
16
# a given zone)
17
DIRECT_PAR_PERCENT_DIFF_TOL = 1.0
9✔
18

19

20
def _get_datetime_from_str(sdt):
9✔
21
    # could be expanded if someone is feeling clever.
22
    if isinstance(sdt, str):
9✔
23
        PyemuWarning(
9✔
24
            "Assuming passed reference start date time is "
25
            "year first str {0}".format(sdt)
26
        )
27
        sdt = pd.to_datetime(sdt, yearfirst=True)
9✔
28
    assert isinstance(sdt, pd.Timestamp), "Error interpreting start_datetime"
9✔
29
    return sdt
9✔
30

31

32
def _check_var_len(var, n, fill=None):
9✔
33
    if not isinstance(var, list):
9✔
34
        var = [var]
9✔
35
    if fill is not None:
9✔
36
        if fill == "first":
9✔
37
            fill = var[0]
9✔
38
        elif fill == "last":
9✔
39
            fill = var[-1]
×
40
    nv = len(var)
9✔
41
    if nv < n:
9✔
42
        var.extend([fill for _ in range(n - nv)])
9✔
43
    return var
9✔
44

45

46
class PstFrom(object):
9✔
47
    """construct high-dimensional PEST(++) interfaces with all the bells and whistles
9✔
48

49
    Args:
50
        original_d (`str` or Path): the path to a complete set of model input and output files
51
        new_d (`str` or Path): the path to where the model files and PEST interface files will be copied/built
52
        longnames (`bool`): flag to use longer-than-PEST-likes parameter and observation names.  Default is True
53
        remove_existing (`bool`): flag to destroy any existing files and folders in `new_d`.  Default is False
54
        spatial_reference (varies): an object that faciliates geo-locating model cells based on index.  Default is None
55
        zero_based (`bool`): flag if the model uses zero-based indices, Default is True
56
        start_datetime (`str` or Timestamp): a string that can be case to a datatime instance the represents the starting datetime
57
            of the model
58
        tpl_subfolder (`str`): option to write template files to a subfolder within ``new_d``.
59
            Default is False (write template files to ``new_d``).
60

61
        chunk_len (`int`): the size of each "chunk" of files to spawn a multiprocessing.Pool member to process.
62
            On windows, beware setting this much smaller than 50 because of the overhead associated with
63
            spawning the pool.  This value is added to the call to `apply_list_and_array_pars`. Default is 50
64
        echo (`bool`): flag to echo logger messages to the screen.  Default is True
65

66
    Note:
67
        This is the way...
68

69
    Example::
70

71
        pf = PstFrom("path_to_model_files","new_dir_with_pest_stuff",start_datetime="1-1-2020")
72
        pf.add_parameters("hk.dat")
73
        pf.add_observations("heads.csv")
74
        pf.build_pst("pest.pst")
75
        pe = pf.draw(100)
76
        pe.to_csv("prior.csv")
77

78
    """
79

80
    def __init__(
9✔
81
        self,
82
        original_d,
83
        new_d,
84
        longnames=True,
85
        remove_existing=False,
86
        spatial_reference=None,
87
        zero_based=True,
88
        start_datetime=None,
89
        tpl_subfolder=None,
90
        chunk_len=50,
91
        echo=True,
92
    ):
93

94
        self.original_d = Path(original_d)
9✔
95
        self.new_d = Path(new_d)
9✔
96
        self.original_file_d = None
9✔
97
        self.mult_file_d = None
9✔
98
        self.tpl_d = self.new_d
9✔
99
        if tpl_subfolder is not None:
9✔
100
            self.tpl_d = Path(self.new_d, tpl_subfolder)
8✔
101
        self.remove_existing = bool(remove_existing)
9✔
102
        self.zero_based = bool(zero_based)
9✔
103
        self._spatial_reference = spatial_reference
9✔
104
        self._spatial_ref_xarray = None
9✔
105
        self._spatial_ref_yarray = None
9✔
106
        self.spatial_reference = None
9✔
107
        if start_datetime is not None:
9✔
108
            start_datetime = _get_datetime_from_str(start_datetime)
9✔
109
        self.start_datetime = start_datetime
9✔
110
        self.geostruct = None
9✔
111
        self.par_struct_dict = {}
9✔
112
        # self.par_struct_dict_l = {}
113

114
        self.mult_files = []
9✔
115
        self.org_files = []
9✔
116

117
        self.par_dfs = []
9✔
118
        self.unique_parnmes = set()  # set of unique parameters added so far through add_parameters method
9✔
119
        self.obs_dfs = []
9✔
120
        self.py_run_file = "forward_run.py"
9✔
121
        self.mod_command = "python {0}".format(self.py_run_file)
9✔
122
        self.pre_py_cmds = []
9✔
123
        self.pre_sys_cmds = []  # a list of preprocessing commands to add to
9✔
124
        # the forward_run.py script commands are executed with os.system()
125
        # within forward_run.py.
126
        self.mod_py_cmds = []
9✔
127
        self.mod_sys_cmds = []
9✔
128
        self.post_py_cmds = []
9✔
129
        self.post_sys_cmds = []  # a list of post-processing commands to add to
9✔
130
        # the forward_run.py script. Commands are executed with os.system()
131
        # within forward_run.py.
132
        self.extra_py_imports = []
9✔
133
        self.tmp_files = []
9✔
134

135
        self.tpl_filenames, self.input_filenames = [], []
9✔
136
        self.ins_filenames, self.output_filenames = [], []
9✔
137
        self.insfile_obsmap = {}
9✔
138

139
        self.longnames = bool(longnames)
9✔
140
        self.logger = pyemu.Logger("PstFrom.log", echo=echo)
9✔
141

142
        self.logger.statement("starting PstFrom process")
9✔
143

144
        self._prefix_count = {}
9✔
145

146
        self.get_xy = None
9✔
147
        self.add_pars_callcount = 0
9✔
148
        self.ijwarned = {}
9✔
149
        self.initialize_spatial_reference()
9✔
150

151
        self._setup_dirs()
9✔
152
        self._parfile_relations = []
9✔
153
        self._pp_facs = {}
9✔
154
        self.pst = None
9✔
155
        self._function_lines_list = []  # each function is itself a list of lines
9✔
156
        self.direct_org_files = []
9✔
157
        self.ult_ubound_fill = 1.0e30
9✔
158
        self.ult_lbound_fill = -1.0e30
9✔
159
        self.chunk_len = int(chunk_len)
9✔
160
        self.py_functions = set()
9✔
161

162

163
    @property
9✔
164
    def parfile_relations(self):
9✔
165
        """build up a container of parameter file information.  Called
166
        programmatically..."""
167
        if isinstance(self._parfile_relations, list):
9✔
168
            pr = pd.concat(self._parfile_relations, ignore_index=True)
9✔
169
        else:
170
            pr = self._parfile_relations
×
171
        # quick checker
172
        for name, g in pr.groupby("model_file"):
9✔
173
            if g.sep.nunique() > 1:
9✔
174
                self.logger.warn(
×
175
                    "separator mismatch for {0}, seps passed {1}"
176
                    "".format(name, [s for s in g.sep.unique()])
177
                )
178
            if g.fmt.nunique() > 1:
9✔
179
                self.logger.warn(
8✔
180
                    "format mismatch for {0}, fmt passed {1}"
181
                    "".format(name, [f for f in g.fmt.unique()])
182
                )
183
            # if ultimate parameter bounds have been set for only one instance
184
            # of the model file we need to pass this through to all
185
            ubound = g.apply(
9✔
186
                lambda x: pd.Series(
187
                    {
188
                        k: v
189
                        for n, c in enumerate(x.use_cols)
190
                        for k, v in [["ubound{0}".format(c), x.upper_bound[n]]]
191
                    }
192
                )
193
                if x.use_cols is not None
194
                else pd.Series({k: v for k, v in [["ubound", x.upper_bound]]}),
195
                axis=1,
196
            )
197
            if ubound.nunique(0, False).gt(1).any():
9✔
198
                ub_min = ubound.min().fillna(self.ult_ubound_fill).to_dict()
8✔
199
                pr.loc[g.index, "upper_bound"] = g.use_cols.apply(
8✔
200
                    lambda x: [ub_min["ubound{0}".format(c)] for c in x]
201
                    if x is not None
202
                    else ub_min["ubound"]
203
                )
204
            # repeat for lower bounds
205
            lbound = g.apply(
9✔
206
                lambda x: pd.Series(
207
                    {
208
                        k: v
209
                        for n, c in enumerate(x.use_cols)
210
                        for k, v in [["lbound{0}".format(c), x.lower_bound[n]]]
211
                    }
212
                )
213
                if x.use_cols is not None
214
                else pd.Series({k: v for k, v in [["lbound", x.lower_bound]]}),
215
                axis=1,
216
            )
217
            if lbound.nunique(0, False).gt(1).any():
9✔
218
                lb_max = lbound.max().fillna(self.ult_lbound_fill).to_dict()
8✔
219
                pr.loc[g.index, "lower_bound"] = g.use_cols.apply(
8✔
220
                    lambda x: [lb_max["lbound{0}".format(c)] for c in x]
221
                    if x is not None
222
                    else lb_max["lbound"]
223
                )
224
        pr["zero_based"] = self.zero_based   # todo -- chase this out if going to file specific zero based def
9✔
225
        return pr
9✔
226

227
    def _generic_get_xy(self, args, **kwargs):
9✔
228
        i, j = self.parse_kij_args(args, kwargs)
×
229
        return i, j
×
230

231
    def _dict_get_xy(self, arg, **kwargs):
9✔
232
        if isinstance(arg, list):
8✔
233
            arg = tuple(arg)
8✔
234
        xy = self._spatial_reference.get(arg, None)
8✔
235
        if xy is None:
8✔
236
            arg_len = None
8✔
237
            try:
8✔
238
                arg_len = len(arg)
8✔
239
            except Exception as e:
×
240
                self.logger.lraise(
×
241
                    "Pstfrom._dict_get_xy() error getting xy from arg:'{0}' - no len support".format(
242
                        arg
243
                    )
244
                )
245
            if arg_len == 1:
8✔
246
                xy = self._spatial_reference.get(arg[0], None)
8✔
247
            elif arg_len == 2 and arg[0] == 0:
8✔
248
                xy = self._spatial_reference.get(arg[1], None)
8✔
249
            elif arg_len == 2 and arg[1] == 0:
×
250
                xy = self._spatial_reference.get(arg[0], None)
×
251
            else:
252
                self.logger.lraise(
×
253
                    "Pstfrom._dict_get_xy() error getting xy from arg:'{0}'".format(arg)
254
                )
255
        if xy is None:
8✔
256
            self.logger.lraise(
×
257
                "Pstfrom._dict_get_xy() error getting xy from arg:'{0}' - still None".format(
258
                    arg
259
                )
260
            )
261
        return xy[0], xy[1]
8✔
262

263
    def _flopy_sr_get_xy(self, args, **kwargs):
9✔
264
        i, j = self.parse_kij_args(args, kwargs)
9✔
265
        if all([ij is None for ij in [i, j]]):
9✔
266
            return i, j
×
267
        else:
268
            if (hasattr(self._spatial_reference, "grid_type") and
9✔
269
                    self._spatial_reference.grid_type == 'vertex'):
270
                return (
×
271
                    self._spatial_reference.xcentergrid[i, ],
272
                    self._spatial_reference.ycentergrid[i, ],
273
                )
274
            else:
275
                return (
9✔
276
                    self._spatial_reference.xcentergrid[i, j],
277
                    self._spatial_reference.ycentergrid[i, j],
278
                )
279

280
    def _flopy_mg_get_xy(self, args, **kwargs):
9✔
281
        i, j = self.parse_kij_args(args, kwargs)
9✔
282
        if all([ij is None for ij in [i, j]]):
9✔
283
            return i, j
8✔
284
        else:
285
            if self._spatial_ref_xarray is None:
9✔
286
                self._spatial_ref_xarray = self._spatial_reference.xcellcenters
9✔
287
                self._spatial_ref_yarray = self._spatial_reference.ycellcenters
9✔
288
            if (hasattr(self._spatial_reference, "grid_type") and
9✔
289
                    self._spatial_reference.grid_type == 'vertex'):
290
                return (self._spatial_ref_xarray[i, ],
8✔
291
                        self._spatial_ref_yarray[i, ])
292

293
            return (self._spatial_ref_xarray[i, j],
9✔
294
                    self._spatial_ref_yarray[i, j])
295

296
    def parse_kij_args(self, args, kwargs):
9✔
297
        """parse args into kij indices.  Called programmatically"""
298
        if len(args) >= 2:
9✔
299
            ij_id = None
9✔
300
            if "ij_id" in kwargs:
9✔
301
                ij_id = kwargs["ij_id"]
9✔
302
            if ij_id is not None:
9✔
303
                i, j = [args[ij] for ij in ij_id]
9✔
304
            else:
305
                if not self.ijwarned[self.add_pars_callcount]:
8✔
306
                    self.logger.warn(
8✔
307
                        (
308
                            "get_xy() warning: position of i and j in index_cols "
309
                            "not specified, assume (i,j) are final two entries in "
310
                            "index_cols."
311
                        )
312
                    )
313
                    self.ijwarned[self.add_pars_callcount] = True
8✔
314
                # assume i and j are the final two entries in index_cols
315
                i, j = args[-2], args[-1]
8✔
316

317
                # vertex/list based i == cell number
318
                if (hasattr(self._spatial_reference, "grid_type") and
8✔
319
                    self._spatial_reference.grid_type == 'vertex'):
320
                    i, l = args[-1], args[-2]
8✔
321

322
        else:
323
            if not self.ijwarned[self.add_pars_callcount]:
8✔
324
                self.logger.warn(
8✔
325
                    (
326
                        "get_xy() warning: need locational information "
327
                        "(e.g. i,j) to generate xy, "
328
                        "insufficient index cols passed to interpret: {}."
329
                        "i,j will be set to (None,None)"
330
                    ).format(str(args))
331
                )
332
                self.ijwarned[self.add_pars_callcount] = True
8✔
333
            i, j = None, None
8✔
334
        return i, j
9✔
335

336
    def initialize_spatial_reference(self):
9✔
337
        """process the spatial reference argument.  Called programmatically"""
338
        if self._spatial_reference is None:
9✔
339
            self.get_xy = self._generic_get_xy
8✔
340
        elif hasattr(self._spatial_reference, "xcentergrid") and hasattr(
9✔
341
            self._spatial_reference, "ycentergrid"
342
        ):
343
            self.get_xy = self._flopy_sr_get_xy
9✔
344
        elif hasattr(self._spatial_reference, "xcellcenters") and hasattr(
9✔
345
            self._spatial_reference, "ycellcenters"
346
        ):
347
            # support modelgrid style cell locs
348
            self._spatial_reference.xcentergrid = self._spatial_reference.xcellcenters
9✔
349
            self._spatial_reference.ycentergrid = self._spatial_reference.ycellcenters
9✔
350
            self.get_xy = self._flopy_mg_get_xy
9✔
351
        elif isinstance(self._spatial_reference, dict):
8✔
352
            self.logger.statement("dictionary-based spatial reference detected...")
8✔
353
            self.get_xy = self._dict_get_xy
8✔
354
        else:
355
            self.logger.lraise(
×
356
                "initialize_spatial_reference() error: " "unsupported spatial_reference"
357
            )
358
        self.spatial_reference = self._spatial_reference
9✔
359

360
    def write_forward_run(self):
9✔
361
        """write the forward run script.  Called by build_pst()"""
362
        # update python commands with system style commands
363
        for alist, ilist in zip(
9✔
364
            [self.pre_py_cmds, self.mod_py_cmds, self.post_py_cmds],
365
            [self.pre_sys_cmds, self.mod_sys_cmds, self.post_sys_cmds],
366
        ):
367
            if ilist is None:
9✔
368
                continue
×
369

370
            if not isinstance(ilist, list):
9✔
371
                ilist = [ilist]
×
372
            for cmd in ilist:
9✔
373
                new_sys_cmd = "pyemu.os_utils.run(r'{0}')\n".format(cmd)
9✔
374
                if new_sys_cmd in alist:
9✔
375
                    self.logger.warn(
9✔
376
                        "sys_cmd '{0}' already in sys cmds, skipping...".format(
377
                            new_sys_cmd
378
                        )
379
                    )
380
                else:
381
                    self.logger.statement("forward_run line:{0}".format(new_sys_cmd))
9✔
382
                    alist.append(new_sys_cmd)
9✔
383

384
        with open(self.new_d / self.py_run_file, "w") as f:
9✔
385
            f.write(
9✔
386
                "import os\nimport multiprocessing as mp\nimport numpy as np"
387
                + "\nimport pandas as pd\n"
388
            )
389
            f.write("import pyemu\n")
9✔
390
            for ex_imp in self.extra_py_imports:
9✔
391
                f.write("import {0}\n".format(ex_imp))
9✔
392

393
            for func_lines in self._function_lines_list:
9✔
394
                f.write("\n")
9✔
395
                f.write("# function added thru PstFrom.add_py_function()\n")
9✔
396
                for func_line in func_lines:
9✔
397
                    f.write(func_line)
9✔
398
                f.write("\n")
9✔
399
            f.write("def main():\n")
9✔
400
            f.write("\n")
9✔
401
            s = "    "
9✔
402
            for tmp_file in self.tmp_files:
9✔
403
                f.write(s + "try:\n")
9✔
404
                f.write(s + "   os.remove(r'{0}')\n".format(tmp_file))
9✔
405
                f.write(s + "except Exception as e:\n")
9✔
406
                f.write(
9✔
407
                    s + "   print(r'error removing tmp file:{0}')\n".format(tmp_file)
408
                )
409
            for line in self.pre_py_cmds:
9✔
410
                f.write(s + line + "\n")
9✔
411
            for line in self.mod_py_cmds:
9✔
412
                f.write(s + line + "\n")
9✔
413
            for line in self.post_py_cmds:
9✔
414
                f.write(s + line + "\n")
9✔
415
            f.write("\n")
9✔
416
            f.write("if __name__ == '__main__':\n")
9✔
417
            f.write("    mp.freeze_support()\n    main()\n\n")
9✔
418

419
    def _pivot_par_struct_dict(self):
9✔
420
        struct_dict = {}
9✔
421
        for gs, gps in self.par_struct_dict.items():
9✔
422
            par_dfs = []
9✔
423
            for _, l in gps.items():
9✔
424
                df = pd.concat(l)
9✔
425
                if "timedelta" in df.columns:
9✔
426
                    df["y"] = 0  #
9✔
427
                    df["x"] = df.timedelta.dt.days
9✔
428
                par_dfs.append(df)
9✔
429
            struct_dict[gs] = par_dfs
9✔
430
        return struct_dict
9✔
431

432
    def _rename_par_struct_dict(self, mapdict):
9✔
433
        for gs, gps in self.par_struct_dict.items():
8✔
434
            df_dict = {}
8✔
435
            for k, li in gps.items():
8✔
436
                tdf = []
8✔
437
                for df in li:
8✔
438
                    df.parnme.update(mapdict)
8✔
439
                    df = df.rename(index=mapdict)
8✔
440
                    tdf.append(df)
8✔
441
                df_dict[k] = tdf
8✔
442
            self.par_struct_dict[gs] = df_dict
8✔
443

444
    def build_prior(
9✔
445
        self, fmt="ascii", filename=None, droptol=None, chunk=None, sigma_range=6
446
    ):
447
        """Build the prior parameter covariance matrix
448

449
        Args:
450
            fmt (`str`): the file format to save to.  Default is "ASCII", can be "binary", "coo", or "none"
451
            filename (`str`): the filename to save the cov to
452
            droptol (`float`): absolute value of prior cov entries that are smaller than `droptol` are treated as
453
                zero.
454
            chunk (`int`): number of entries to write to binary/coo at once.  Default is None (write all elements at once
455
            sigma_range (`int`): number of standard deviations represented by parameter bounds.  Default is 6 (99%
456
                confidence).  4 would be approximately 95% confidence bounds
457

458
        Returns:
459
            `pyemu.Cov`: the prior parameter covariance matrix
460

461
        Note:
462
            This method processes parameters by group names
463

464
            For really large numbers of parameters (>30K), this method
465
            will cause memory errors.  Luckily, in most cases, users
466
            only want this matrix to generate a prior parameter ensemble
467
            and the `PstFrom.draw()` is a better choice...
468

469
        """
470
        struct_dict = self._pivot_par_struct_dict()
9✔
471
        self.logger.log("building prior covariance matrix")
9✔
472
        if len(struct_dict) > 0:
9✔
473
            cov = pyemu.helpers.geostatistical_prior_builder(
9✔
474
                self.pst, struct_dict=struct_dict, sigma_range=sigma_range
475
            )
476
        else:
477
            cov = pyemu.Cov.from_parameter_data(self.pst, sigma_range=sigma_range)
8✔
478

479
        if filename is None:
9✔
480
            filename = self.pst.filename.with_suffix(".prior.cov")
9✔
481
        if fmt != "none":
9✔
482
            self.logger.statement(
9✔
483
                "saving prior covariance matrix to file {0}".format(filename)
484
            )
485
        if fmt == "ascii":
9✔
486
            cov.to_ascii(filename)
9✔
487
        elif fmt == "binary":
9✔
488
            cov.to_binary(filename, droptol=droptol, chunk=chunk)
×
489
        elif fmt == "uncfile":
9✔
490
            cov.to_uncfile(filename)
×
491
        elif fmt == "coo":
9✔
492
            cov.to_coo(filename, droptol=droptol, chunk=chunk)
×
493
        self.logger.log("building prior covariance matrix")
9✔
494
        return cov
9✔
495

496
    def draw(self, num_reals=100, sigma_range=6, use_specsim=False, scale_offset=True):
9✔
497
        """Draw a parameter ensemble from the distribution implied by the initial parameter values in the
498
        control file and the prior parameter covariance matrix.
499

500
        Args:
501
            num_reals (`int`): the number of realizations to draw
502
            sigma_range (`int`): number of standard deviations represented by parameter bounds.  Default is 6 (99%
503
                confidence).  4 would be approximately 95% confidence bounds
504
            use_specsim (`bool`): flag to use spectral simulation for grid-scale pars (highly recommended).
505
                Default is False
506
            scale_offset (`bool`): flag to apply scale and offset to parameter bounds before calculating prior variance.
507
                Dfault is True.  If you are using non-default scale and/or offset and you get an exception during
508
                draw, try changing this value to False.
509

510
        Returns:
511
            `pyemu.ParameterEnsemble`: a prior parameter ensemble
512

513
        Note:
514
            This method draws by parameter group
515

516
            If you are using grid-style parameters, please use spectral simulation (`use_specsim=True`)
517

518
        """
519
        self.logger.log("drawing realizations")
9✔
520
        if self.pst.npar_adj == 0:
9✔
521
            self.logger.warn("no adjustable parameters, nothing to draw...")
×
522
            return
×
523
        # precondition {geostruct:{group:df}} dict to {geostruct:[par_dfs]}
524
        struct_dict = self._pivot_par_struct_dict()
9✔
525
        # list for holding grid style groups
526
        gr_pe_l = []
9✔
527
        subset = self.pst.parameter_data.index
9✔
528
        gr_par_pe = None
9✔
529
        if use_specsim:
9✔
530
            if not pyemu.geostats.SpecSim2d.grid_is_regular(
9✔
531
                self.spatial_reference.delr, self.spatial_reference.delc
532
            ):
533
                self.logger.lraise(
×
534
                    "draw() error: can't use spectral simulation with irregular grid"
535
                )
536
            self.logger.log("spectral simulation for grid-scale pars")
9✔
537
            # loop over geostructures defined in PestFrom object
538
            # (setup through add_parameters)
539
            for geostruct, par_df_l in struct_dict.items():
9✔
540
                par_df = pd.concat(par_df_l)  # force to single df
9✔
541
                par_df = par_df.loc[par_df.partype == "grid", :]
9✔
542
                if "i" in par_df.columns:  # need 'i' and 'j' for specsim
9✔
543
                    grd_p = pd.notna(par_df.i)
9✔
544
                else:
545
                    grd_p = np.array([0])
9✔
546
                # if there are grid pars (also grid pars with i,j info)
547
                if grd_p.sum() > 0:
9✔
548
                    # select pars to use specsim for
549
                    gr_df = par_df.loc[grd_p]
9✔
550
                    gr_df = gr_df.astype({"i": int, "j": int})  # make sure int
9✔
551
                    # (won't be if there were nans in concatenated df)
552
                    if len(gr_df) > 0:
9✔
553
                        # get specsim object for this geostruct
554
                        ss = pyemu.geostats.SpecSim2d(
9✔
555
                            delx=self.spatial_reference.delr,
556
                            dely=self.spatial_reference.delc,
557
                            geostruct=geostruct,
558
                        )
559
                        # specsim draw (returns df)
560
                        gr_pe1 = ss.grid_par_ensemble_helper(
9✔
561
                            pst=self.pst,
562
                            gr_df=gr_df,
563
                            num_reals=num_reals,
564
                            sigma_range=sigma_range,
565
                            logger=self.logger,
566
                        )
567
                        # append to list of specsim drawn pars
568
                        gr_pe_l.append(gr_pe1)
9✔
569
                        # rebuild struct_dict entry for this geostruct
570
                        # to not include specsim pars
571
                        struct_dict[geostruct] = []
9✔
572
                        # loop over all in list associated with geostruct
573
                        for p_df in par_df_l:
9✔
574
                            # if pars are not in the specsim pars just created
575
                            # assign them to this struct_dict entry
576
                            # needed if none specsim pars are linked to same geostruct
577
                            if not p_df.index.isin(gr_df.index).all():
9✔
578
                                struct_dict[geostruct].append(p_df)
8✔
579
                            else:
580
                                subset = subset.difference(p_df.index)
9✔
581
            if len(gr_pe_l) > 0:
9✔
582
                gr_par_pe = pd.concat(gr_pe_l, axis=1)
9✔
583
            self.logger.log("spectral simulation for grid-scale pars")
9✔
584
        # draw remaining pars based on their geostruct
585
        if not subset.empty:
9✔
586
            self.logger.log(f"Drawing {len(subset)} non-specsim pars")
9✔
587
            pe = pyemu.helpers.geostatistical_draws(
9✔
588
                self.pst,
589
                struct_dict=struct_dict,
590
                num_reals=num_reals,
591
                sigma_range=sigma_range,
592
                scale_offset=scale_offset,
593
                subset=subset
594
            )
595
            self.logger.log(f"Drawing {len(subset)} non-specsim pars")
9✔
596
            if gr_par_pe is not None:
9✔
597
                self.logger.log(f"Joining specsim and non-specsim pars")
9✔
598
                exist = gr_par_pe.columns.intersection(pe.columns)
9✔
599
                pe = pe._df.drop(exist, axis=1)  # specsim par take precedence
9✔
600
                pe = pd.concat([pe, gr_par_pe], axis=1)
9✔
601
                pe = pyemu.ParameterEnsemble(pst=self.pst, df=pe)
9✔
602
                self.logger.log(f"Joining specsim and non-specsim pars")
9✔
603
        else:
604
            pe = pyemu.ParameterEnsemble(pst=self.pst, df=gr_par_pe)
×
605
        self.logger.log("drawing realizations")
9✔
606
        return pe
9✔
607

608
    def build_pst(self, filename=None, update=False, version=1):
9✔
609
        """Build control file from i/o files in PstFrom object.
610
        Warning: This builds a pest control file from scratch, overwriting
611
        anything already in self.pst object and anything already writen to `filename`
612

613
        Args:
614
            filename (`str`): the filename to save the control file to.
615
                If None, the name is formed from the `PstFrom.original_d`
616
                ,the orginal directory name from which the forward model
617
                was extracted.  Default is None.
618
                The control file is saved in the `PstFrom.new_d` directory.
619
            update (`bool`) or (str): flag to add to existing Pst object and
620
                rewrite. If string {'pars', 'obs'} just update respective
621
                components of Pst. Default is False - build from PstFrom
622
                components.
623
            version (`int`): control file version to write, Default is 1.
624
                If None, option to not write pst to file at pst_build() call --
625
                handy when control file is huge pst object will be modified
626
                again before running.
627
        Note:
628
            This builds a pest control file from scratch, overwriting anything already
629
            in self.pst object and anything already writen to `filename`
630

631
            The new pest control file is assigned an NOPTMAX value of 0
632

633
        """
634
        import cProfile
9✔
635
        pd.set_option('display.max_rows', 500)
9✔
636
        pd.set_option('display.max_columns', 500)
9✔
637
        pd.set_option('display.width', 1000)
9✔
638

639
        par_data_cols = pyemu.pst_utils.pst_config["par_fieldnames"]
9✔
640
        obs_data_cols = pyemu.pst_utils.pst_config["obs_fieldnames"]
9✔
641
        if update:
9✔
642
            if self.pst is None:
8✔
643
                self.logger.warn(
×
644
                    "Can't update Pst object not initialised. "
645
                    "Setting update to False"
646
                )
647
                update = False
×
648
            else:
649
                if filename is None:
8✔
650
                    filename = get_filepath(self.new_d, self.pst.filename)
×
651
        else:
652
            if filename is None:
9✔
653
                filename = Path(self.new_d, self.original_d.name).with_suffix(".pst")
9✔
654
        filename = get_filepath(self.new_d, filename)
9✔
655

656
        # if os.path.dirname(filename) in ["", "."]:
657
        #    filename = os.path.join(self.new_d, filename)
658

659
        if update:
9✔
660
            pst = self.pst
8✔
661
            if update is True:
8✔
662
                update = {"pars": False, "obs": False}
×
663
            elif isinstance(update, str):
8✔
664
                update = {update: True}
8✔
665
            elif isinstance(update, (set, list)):
×
666
                update = {s: True for s in update}
×
667
            uupdate = True
8✔
668
        else:
669
            update = {"pars": False, "obs": False}
9✔
670
            uupdate = False
9✔
671
            pst = pyemu.Pst(filename, load=False)
9✔
672

673
        # TODO should this be under an if:? incase updating and prior info has been set
674
        pst.prior_information = pst.null_prior.merge(pd.DataFrame(
9✔
675
            data=[], columns=pst.prior_fieldnames))
676

677
        if "pars" in update.keys() or not uupdate:
9✔
678
            if len(self.par_dfs) > 0:
9✔
679
                # parameter data from object
680
                full_par_dat = pd.concat(self.par_dfs)
9✔
681
                par_data = full_par_dat.loc[:, par_data_cols]
9✔
682
                # info relating parameter multiplier files to model input files
683
                parfile_relations = self.parfile_relations
9✔
684
                parfile_relations.to_csv(self.new_d / "mult2model_info.csv")
9✔
685
                if not any(
9✔
686
                    ["apply_list_and_array_pars" in s for s in self.pre_py_cmds]
687
                ):
688
                    self.pre_py_cmds.insert(
9✔
689
                        0,
690
                        "pyemu.helpers.apply_list_and_array_pars("
691
                        "arr_par_file='mult2model_info.csv',chunk_len={0})".format(
692
                            self.chunk_len
693
                        ),
694
                    )
695
            else:
696
                par_data = pyemu.pst_utils._populate_dataframe(
×
697
                    [], pst.par_fieldnames, pst.par_defaults, pst.par_dtype
698
                )
699
            # set parameter data
700
            # some pre conditioning to support rentry here is more add_par
701
            # calls are made with update/rebuild pst
702
            shtmx = 0
9✔
703
            gshtmx = 0
9✔
704
            if pst.parameter_data is not None:
9✔
705
                # copy existing par data (incase it has been edited)
706
                par_data_orig = pst.parameter_data.copy()
8✔
707
                if "longname" in par_data_orig.columns:
8✔
708
                    # Support existing long names mapping
709
                    par_data_orig = par_data_orig.set_index(
8✔
710
                        "longname")
711
                    # starting point for updated mapping
712
                    shtmx = par_data_orig.parnme.str.strip('p').astype(int).max() + 1
8✔
713
                    gshtmx = par_data_orig.pargp.str.strip('pg').astype(int).max() + 1
8✔
714
                # index of new pars (might need this later)
715
                new_par_data = par_data.index.difference(par_data_orig.index)
8✔
716
            else:
717
                new_par_data = slice(None)
9✔
718
            # build or update par data
719
            pst.parameter_data = pd.concat(
9✔
720
                [pst.parameter_data,
721
                 par_data.loc[new_par_data]], axis=0)
722
            # pst.template_files = self.tpl_filenames
723
            # pst.input_files = self.input_filenames
724
            pst.model_input_data = pd.DataFrame(
9✔
725
                {"pest_file": self.tpl_filenames, "model_file": self.input_filenames},
726
                index=self.tpl_filenames,
727
            )
728
            # rename pars and obs in case short names are desired
729
            if not self.longnames:
9✔
730
                self.logger.log("Converting parameters to shortnames")
8✔
731
                # pull out again for shorthand access
732
                par_data = pst.parameter_data
8✔
733
                # new pars will not be mapped, start this mapping
734
                npd = par_data.loc[new_par_data]
8✔
735
                par_data.loc[npd.index, 'longname'] = npd.parnme
8✔
736
                # get short names (using existing names as index starting point)
737
                par_data.loc[npd.index, "shortname"] = [
8✔
738
                    'p' + f"{i}" for i in range(shtmx, shtmx+len(npd))
739
                ]
740
                # set to dict
741
                parmap = par_data.loc[npd.index, "shortname"].to_dict()
8✔
742
                # get shortlist of pars for each tpl
743
                tpl_shortlist = full_par_dat.loc[npd.index].groupby(
8✔
744
                    'tpl_filename').parnme.groups
745
                tpl_shortlist = {str(k): v.to_list()
8✔
746
                                 for k, v in tpl_shortlist.items()}
747
                # rename parnames and propagate to tpls etc.
748
                self.logger.log("Renaming parameters for shortnames")
8✔
749
                # pr = cProfile.Profile()
750
                # pr.enable()
751
                pst.rename_parameters(parmap,
8✔
752
                                      pst_path=self.new_d,
753
                                      tplmap=tpl_shortlist)
754
                # pr.disable()
755
                self.logger.log("Renaming parameters for shortnames")
8✔
756
                # rename in struct dicts
757
                self._rename_par_struct_dict(parmap)
8✔
758
                # save whole shortname-longname mapping (will over write previous)
759
                par_data.set_index("shortname")["longname"].to_csv(
8✔
760
                    filename.with_name('parlongname.map'))
761
                npd.index = npd.index.map(parmap)
8✔
762

763
                # build group mapping df
764
                pargpmap = pd.DataFrame(npd.pargp.unique(),
8✔
765
                                        columns=['longname'])
766
                # shortnames from using previous a starting point (if existing)
767
                pargpmap["shortname"] = "pg" + (pargpmap.index+gshtmx).astype(str)
8✔
768
                pargpmap_dict = pargpmap.set_index('longname').shortname.to_dict()
8✔
769
                par_data.loc[npd.index, "pglong"] = npd.pargp
8✔
770
                par_data.loc[npd.index, 'pargp'] = npd.pargp.map(pargpmap_dict)
8✔
771
                par_data.groupby('pargp').pglong.first().to_csv(
8✔
772
                    filename.with_name('pglongname.map'))
773
                self.logger.log("Converting parameters to shortnames")
8✔
774
        if "obs" in update.keys() or not uupdate:
9✔
775
            if len(self.obs_dfs) > 0:
9✔
776
                full_obs_data = pd.concat(self.obs_dfs)
9✔
777
                obs_data = full_obs_data.loc[:, obs_data_cols]
9✔
778
            else:
779
                obs_data = pyemu.pst_utils._populate_dataframe(
8✔
780
                    [], pst.obs_fieldnames, pst.obs_defaults, pst.obs_dtype
781
                )
782
                obs_data.loc[:, "obsnme"] = []
8✔
783
                obs_data.index = []
8✔
784
            # set observation data
785
            # some pre conditioning to support rentry here is more add_obs
786
            # calls are made with update/rebuild pst
787
            shtmx = 0
9✔
788
            gshtmx = 0
9✔
789
            if pst.observation_data is not None:
9✔
790
                # copy existing obs data (incase it has been edited)
791
                obs_data_orig = pst.observation_data.copy()
8✔
792
                if "longname" in obs_data_orig.columns:
8✔
793
                    # Support existing long names mapping
794
                    obs_data_orig = obs_data_orig.set_index(
8✔
795
                        "longname")
796
                    # starting point for updated mapping
797
                    shtmx = obs_data_orig.obsnme.str.lstrip('ob').astype(int).max() + 1
8✔
798
                    gshtmx = obs_data_orig.obgnme.str.lstrip('l_obg').astype(int).max() + 1
8✔
799
                # index of new obs (might need this later)
800
                new_obs_data = obs_data.index.difference(obs_data_orig.index)
8✔
801
            else:
802
                new_obs_data = slice(None)
9✔
803
            # build or update obs data
804
            pst.observation_data = pd.concat(
9✔
805
                [pst.observation_data,
806
                 obs_data.loc[new_obs_data]], axis=0)
807
            # pst.instruction_files = self.ins_filenames
808
            # pst.output_files = self.output_filenames
809
            pst.model_output_data = pd.DataFrame(
9✔
810
                {"pest_file": self.ins_filenames, "model_file": self.output_filenames},
811
                index=self.ins_filenames,
812
            )
813
            # rename pars and obs in case short names are desired
814
            if not self.longnames:
9✔
815
                self.logger.log("Converting observations to shortnames")
8✔
816
                # pull out again for shorthand access
817
                obs_data = pst.observation_data
8✔
818
                # new obs will not be mapped so start this mapping
819
                nod = obs_data.loc[new_obs_data]
8✔
820
                obs_data.loc[nod.index, "longname"] = nod.obsnme
8✔
821
                # get short names (using existing names as index starting point)
822
                obs_data.loc[nod.index, "shortname"] = [
8✔
823
                    'ob' + f"{i}" for i in range(shtmx, shtmx+len(nod))
824
                ]
825
                obsmap = obs_data.loc[nod.index, "shortname"].to_dict()
8✔
826
                insmap = {str(Path(self.new_d, k)): v
8✔
827
                          for k, v in self.insfile_obsmap.items()
828
                          if len(nod.index.intersection(v))>0}
829
                # rename obsnames and propagate to ins files
830
                # pr2 = cProfile.Profile()
831
                # pr2.enable()
832
                self.logger.log("Renaming observations for shortnames")
8✔
833
                pst.rename_observations(obsmap,
8✔
834
                                        pst_path=self.new_d,
835
                                        insmap=insmap)
836
                self.logger.log("Renaming observations for shortnames")
8✔
837
                # pr2.disable()
838
                obs_data.set_index("shortname")["longname"].to_csv(
8✔
839
                    filename.with_name('obslongname.map'))
840
                nod.index = nod.index.map(obsmap)
8✔
841

842
                # build group mapping df
843
                obgpmap = pd.DataFrame(nod.obgnme.unique(),
8✔
844
                                       columns=['longname'])
845
                # shortnames from using previous a starting point (if existing)
846
                obgpmap["shortname"] = "obg" + (obgpmap.index+gshtmx).astype(str)
8✔
847
                ltobs = obgpmap.longname.str.startswith(
8✔
848
                    pyemu.Pst.get_constraint_tags('lt')
849
                )
850
                obgpmap.loc[ltobs, "shortname"] = "l_" + obgpmap.loc[ltobs, "shortname"]
8✔
851
                gtobs = obgpmap.longname.str.startswith(
8✔
852
                    pyemu.Pst.get_constraint_tags('gt')
853
                )
854
                obgpmap.loc[gtobs, "shortname"] = "g_" + obgpmap.loc[gtobs, "shortname"]
8✔
855
                obgpmap_dict = obgpmap.set_index('longname').shortname.to_dict()
8✔
856
                obs_data.loc[nod.index, "oglong"] = nod.obgnme
8✔
857
                obs_data.loc[nod.index, 'obgnme'] = nod.obgnme.map(obgpmap_dict)
8✔
858
                obs_data.groupby('obgnme').oglong.first().to_csv(
8✔
859
                    filename.with_name('oglongname.map'))
860
                self.logger.log("Converting observations to shortnames")
8✔
861
                # pr.disable()
862
                # pr.print_stats(sort="cumtime")
863
            # obs_data.sort_index(inplace=True)  #TODO
864

865
        if not uupdate:
9✔
866
            pst.model_command = self.mod_command
9✔
867

868
        pst.control_data.noptmax = 0
9✔
869
        self.pst = pst
9✔
870
        if version is not None:
9✔
871
            self.pst.write(filename, version=version)
9✔
872
        self.write_forward_run()
9✔
873
        pst.try_parse_name_metadata()
9✔
874
        # pr.print_stats(sort="cumtime")
875
        # pr2.print_stats(sort="cumtime")
876
        return pst
9✔
877

878
    def _setup_dirs(self):
9✔
879
        self.logger.log("setting up dirs")
9✔
880
        if not os.path.exists(self.original_d):
9✔
881
            self.logger.lraise(f"original_d '{self.original_d}' not found")
×
882
        if not os.path.isdir(self.original_d):
9✔
883
            self.logger.lraise(f"original_d '{self.original_d}' is not a directory")
×
884
        if self.new_d.exists():
9✔
885
            if self.remove_existing:
×
886
                self.logger.log(f"removing existing new_d '{self.new_d}'")
×
887
                pyemu.os_utils._try_remove_existing(self.new_d)
×
888
                self.logger.log(f"removing existing new_d '{self.new_d}'")
×
889
            else:
890
                self.logger.lraise(
×
891
                    f"new_d '{self.new_d}' already exists " "- use remove_existing=True"
892
                )
893

894
        self.logger.log(
9✔
895
            f"copying original_d '{self.original_d}' to new_d '{self.new_d}'"
896
        )
897
        pyemu.os_utils._try_copy_dir(self.original_d, self.new_d)
9✔
898
        self.logger.log(
9✔
899
            f"copying original_d '{self.original_d}' to new_d '{self.new_d}'"
900
        )
901

902
        self.original_file_d = self.new_d / "org"
9✔
903
        if self.original_file_d.exists():
9✔
904
            self.logger.lraise(f"'org' subdir already exists in new_d '{self.new_d}'")
×
905
        self.original_file_d.mkdir(exist_ok=True)
9✔
906

907
        self.mult_file_d = self.new_d / "mult"
9✔
908
        if self.mult_file_d.exists():
9✔
909
            self.logger.lraise(f"'mult' subdir already exists in new_d '{self.new_d}'")
×
910
        self.mult_file_d.mkdir(exist_ok=True)
9✔
911

912
        self.tpl_d.mkdir(exist_ok=True)
9✔
913

914
        self.logger.log("setting up dirs")
9✔
915

916
    def _par_prep(
9✔
917
        self,
918
        filenames,
919
        index_cols,
920
        use_cols,
921
        fmts=None,
922
        seps=None,
923
        skip_rows=None,
924
        c_char=None,
925
    ):
926

927
        # todo: cast str column names, index_cols and use_cols to lower if str?
928
        # todo: check that all index_cols and use_cols are the same type
929
        file_dict = {}
9✔
930
        fmt_dict = {}
9✔
931
        sep_dict = {}
9✔
932
        skip_dict = {}
9✔
933
        (
9✔
934
            filenames,
935
            fmts,
936
            seps,
937
            skip_rows,
938
            index_cols,
939
            use_cols,
940
        ) = self._prep_arg_list_lengths(
941
            filenames, fmts, seps, skip_rows, index_cols, use_cols
942
        )
943
        storehead = None
9✔
944
        if index_cols is not None:
9✔
945
            for filename, sep, fmt, skip in zip(filenames, seps, fmts, skip_rows):
9✔
946
                # cast to pathlib.Path instance
947
                # input file path may or may not include original_d
948
                # input_filepath = get_filepath(self.original_d, filename)
949
                rel_filepath = get_relative_filepath(self.original_d, filename)
9✔
950
                dest_filepath = self.new_d / rel_filepath
9✔
951

952
                # data file in dest_ws/org/ folder
953
                org_file = self.original_file_d / rel_filepath.name
9✔
954

955
                self.logger.log(f"loading list-style {dest_filepath}")
9✔
956
                df, storehead, _ = self._load_listtype_file(
9✔
957
                    rel_filepath, index_cols, use_cols, fmt, sep, skip, c_char
958
                )
959
                # Currently just passing through comments in header (i.e. before the table data)
960
                stkeys = np.array(
9✔
961
                    sorted(storehead.keys())
962
                )  # comments line numbers as sorted array
963
                if (
9✔
964
                    stkeys.size > 0 and stkeys.min() == 0
965
                ):  # TODO pass comment_char through to par_file_rel so mid-table comments can be preserved
966
                    skip = 1 + np.sum(np.diff(stkeys) == 1)
8✔
967
                # # looping over model input filenames
968
                if fmt.lower() == "free":
9✔
969
                    if sep is None:
9✔
970
                        sep = " "
9✔
971
                        if rel_filepath.suffix.lower() == ".csv":
9✔
972
                            sep = ","
8✔
973
                if pd.api.types.is_integer_dtype(df.columns):  # df.columns.is_integer(): # really!???
9✔
974
                    hheader = False
9✔
975
                else:
976
                    hheader = df.columns
8✔
977

978
                self.logger.statement(
9✔
979
                    f"loaded list-style '{dest_filepath}' of shape {df.shape}"
980
                )
981
                # TODO BH: do we need to be careful of the format of the model
982
                #  files? -- probs not necessary for the version in
983
                #  original_file_d - but for the eventual product model file,
984
                #  it might be format sensitive - yuck
985
                # Update, BH: I think the `original files` saved can always
986
                # be comma delim --they are never directly used
987
                # as model inputs-- as long as we pass the required model
988
                # input file format (and sep), right?
989
                # write orig version of input file to `org` (e.g.) dir
990

991
                # make any subfolders if they don't exist
992
                # org_path = Path(self.original_file_d, rel_file_path.parent)
993
                # org_path.mkdir(exist_ok=True)
994

995
                if len(storehead) != 0:
9✔
996
                    kwargs = {}
8✔
997
                    if "win" in platform.platform().lower():
8✔
998
                        kwargs = {"lineterminator": "\n"}
4✔
999
                    with open(org_file, "w") as fp:
8✔
1000
                        lc = 0
8✔
1001
                        fr = 0
8✔
1002
                        for key in sorted(storehead.keys()):
8✔
1003
                            if key > lc:
8✔
1004
                                self.logger.warn(
8✔
1005
                                    "Detected mid-table comment "
1006
                                    f"on line {key + 1} tabular model file, "
1007
                                    "comment will be lost"
1008
                                )
1009
                                lc += 1
8✔
1010
                                continue
8✔
1011
                                # TODO if we want to preserve mid-table comments,
1012
                                #  these lines might help - will also need to
1013
                                #  pass comment_char through so it can be
1014
                                #  used by the apply methods
1015
                                # to = key - lc
1016
                                # df.iloc[fr:to].to_csv(
1017
                                #     fp, sep=',', mode='a', header=hheader, # todo - presence of header may cause an issue with this
1018
                                #     **kwargs)
1019
                                # lc += to - fr
1020
                                # fr = to
1021
                            fp.write(storehead[key])
8✔
1022
                            fp.flush()
8✔
1023
                            lc += 1
8✔
1024
                        # if lc < len(df):  # finish off remaining table (todo: when supporting mid-table comments...)
1025
                        df.iloc[fr:].to_csv(
8✔
1026
                            fp,
1027
                            sep=",",
1028
                            mode="a",
1029
                            header=hheader,
1030
                            index=False,
1031
                            **kwargs,
1032
                        )
1033
                else:
1034
                    df.to_csv(
9✔
1035
                        org_file,
1036
                        index=False,
1037
                        sep=",",
1038
                        header=hheader,
1039
                    )
1040
                file_dict[rel_filepath] = df.apply(pd.to_numeric, errors='ignore')  # make sure numeric (if reasonable)
9✔
1041
                fmt_dict[rel_filepath] = fmt
9✔
1042
                sep_dict[rel_filepath] = sep
9✔
1043
                skip_dict[rel_filepath] = skip
9✔
1044
                self.logger.log(f"loading list-style {dest_filepath}")
9✔
1045

1046
            # check for compatibility
1047
            fnames = list(file_dict.keys())
9✔
1048
            for i in range(len(fnames)):
9✔
1049
                for j in range(i + 1, len(fnames)):
9✔
1050
                    if file_dict[fnames[i]].shape[1] != file_dict[fnames[j]].shape[1]:
8✔
1051
                        self.logger.lraise(
×
1052
                            f"shape mismatch for array style, '{fnames[i]}' "
1053
                            f"shape {file_dict[fnames[i]].shape[1]} != "
1054
                            f"'{fnames[j]}' "
1055
                            f"shape {file_dict[fnames[j]].shape[1]}"
1056
                        )
1057
        else:  # load array type files
1058
            # loop over model input files
1059
            for input_filena, sep, fmt, skip in zip(filenames, seps, fmts, skip_rows):
9✔
1060
                # cast to pathlib.Path instance
1061
                # input file path may or may not include original_d
1062
                input_filena = get_filepath(self.original_d, input_filena)
9✔
1063
                if fmt.lower() == "free":
9✔
1064
                    # cast to string to work with pathlib objects
1065
                    if input_filena.suffix.lower() == ".csv":
9✔
1066
                        if sep is None:
×
1067
                            sep = ","
×
1068
                else:
1069
                    # TODO - or not?
1070
                    raise NotImplementedError(
×
1071
                        "Only free format array par files currently supported"
1072
                    )
1073
                # file path relative to model workspace
1074
                rel_filepath = input_filena.relative_to(self.original_d)
9✔
1075
                dest_filepath = self.new_d / rel_filepath
9✔
1076
                self.logger.log(f"loading array {dest_filepath}")
9✔
1077
                if not dest_filepath.exists():
9✔
1078
                    self.logger.lraise(f"par filename '{dest_filepath}' not found ")
×
1079
                # read array type input file
1080
                arr = np.loadtxt(dest_filepath, delimiter=sep, ndmin=2)
9✔
1081
                self.logger.log(f"loading array {dest_filepath}")
9✔
1082
                self.logger.statement(
9✔
1083
                    f"loaded array '{input_filena}' of shape {arr.shape}"
1084
                )
1085
                # save copy of input file to `org` dir
1086
                # make any subfolders if they don't exist
1087
                np.savetxt(self.original_file_d / rel_filepath.name, arr)
9✔
1088
                file_dict[rel_filepath] = arr
9✔
1089
                fmt_dict[rel_filepath] = fmt
9✔
1090
                sep_dict[rel_filepath] = sep
9✔
1091
                skip_dict[rel_filepath] = skip
9✔
1092
            # check for compatibility
1093
            fnames = list(file_dict.keys())
9✔
1094
            for i in range(len(fnames)):
9✔
1095
                for j in range(i + 1, len(fnames)):
9✔
1096
                    if file_dict[fnames[i]].shape != file_dict[fnames[j]].shape:
9✔
1097
                        self.logger.lraise(
×
1098
                            f"shape mismatch for array style, '{fnames[i]}' "
1099
                            f"shape {file_dict[fnames[i]].shape[1]} != "
1100
                            f"'{fnames[j]}' "
1101
                            f"shape {file_dict[fnames[j]].shape[1]}"
1102
                        )
1103
        return (
9✔
1104
            index_cols,
1105
            use_cols,
1106
            file_dict,
1107
            fmt_dict,
1108
            sep_dict,
1109
            skip_dict,
1110
            storehead,
1111
        )
1112

1113
    def _next_count(self, prefix):
9✔
1114
        if prefix not in self._prefix_count:
9✔
1115
            self._prefix_count[prefix] = 0
9✔
1116
        else:
1117
            self._prefix_count[prefix] += 1
9✔
1118

1119
        return self._prefix_count[prefix]
9✔
1120

1121
    def add_py_function(
9✔
1122
            self, file_name, call_str=None, is_pre_cmd=True,
1123
            function_name=None
1124
    ):
1125
        """add a python function to the forward run script
1126

1127
        Args:
1128
            file_name (`str`): a python source file
1129
            call_str (`str`): the call string for python function in
1130
                `file_name`.
1131
                `call_str` will be added to the forward run script, as is.
1132
            is_pre_cmd (`bool` or `None`): flag to include `call_str` in
1133
                PstFrom.pre_py_cmds.  If False, `call_str` is
1134
                added to PstFrom.post_py_cmds instead. If passed as `None`,
1135
                then the function `call_str` is added to the forward run
1136
                script but is not called.  Default is True.
1137
            function_name (`str`): DEPRECATED, used `call_str`
1138
        Returns:
1139
            None
1140

1141
        Note:
1142
            `call_str` is expected to reference standalone a function
1143
            that contains all the imports it needs or these imports
1144
            should have been added to the forward run script through the
1145
            `PstFrom.extra_py_imports` list.
1146

1147
            This function adds the `call_str` call to the forward
1148
            run script (either as a pre or post command or function not 
1149
            directly called by main). It is up to users
1150
            to make sure `call_str` is a valid python function call
1151
            that includes the parentheses and requisite arguments
1152

1153
            This function expects "def " + `function_name` to be flushed left
1154
            at the outer most indentation level
1155

1156
        Example::
1157

1158
            pf = PstFrom()
1159
            # add the function "mult_well_function" from the script file "preprocess.py" as a
1160
            # command to run before the model is run
1161
            pf.add_py_function("preprocess.py",
1162
                               "mult_well_function(arg1='userarg')",
1163
                               is_pre_cmd = True)
1164
            # add the post processor function "made_it_good" from the script file "post_processors.py"
1165
            pf.add_py_function("post_processors.py","make_it_good()",is_pre_cmd=False)
1166
            # add the function "another_func" from the script file "utils.py" as a
1167
            # function not called by main
1168
            pf.add_py_function("utils.py","another_func()",is_pre_cmd=None)
1169

1170

1171
        """
1172
        if function_name is not None:
9✔
1173
            warnings.warn(
×
1174
                "add_py_function(): 'function_name' argument is deprecated, "
1175
                "use 'call_str' instead",
1176
                DeprecationWarning,
1177
            )
1178
            if call_str is None:
×
1179
                call_str = function_name
×
1180
        if call_str is None:
9✔
1181
            self.logger.lraise(
×
1182
                "add_py_function(): No function call string passed in arg " "'call_str'"
1183
            )
1184
        if not os.path.exists(file_name):
9✔
1185
            self.logger.lraise(
×
1186
                "add_py_function(): couldnt find python source file '{0}'".format(
1187
                    file_name
1188
                )
1189
            )
1190
        if "(" not in call_str or ")" not in call_str:
9✔
1191
            self.logger.lraise(
×
1192
                "add_py_function(): call_str '{0}' missing paretheses".format(call_str)
1193
            )
1194
        function_name = call_str[
9✔
1195
            : call_str.find("(")
1196
        ]  # strip to first occurance of '('
1197
        if function_name in self.py_functions:
9✔
1198
            # todo: could add more duplication options here: override, increment
1199
            warnings.warn(
8✔
1200
                f"add_py_function(): {function_name} already "
1201
                f"in forward run python functions, not overriding here, "
1202
                f"original will be maintained",
1203
                PyemuWarning,
1204
            )
1205
        else:
1206
            func_lines = []
9✔
1207
            search_str = "def " + function_name + "("
9✔
1208
            abet_set = set(string.ascii_uppercase)
9✔
1209
            abet_set.update(set(string.ascii_lowercase))
9✔
1210
            with open(file_name, "r") as f:
9✔
1211
                while True:
4✔
1212
                    line = f.readline()
9✔
1213
                    if line == "":
9✔
1214
                        self.logger.lraise(
×
1215
                            "add_py_function(): EOF while searching for function '{0}'".format(
1216
                                search_str
1217
                            )
1218
                        )
1219
                    if line.startswith(
9✔
1220
                        search_str
1221
                    ):  # case sens and no strip since 'def' should be flushed left
1222
                        func_lines.append(line)
9✔
1223
                        while True:
4✔
1224
                            line = f.readline()
9✔
1225
                            if line == "":
9✔
1226
                                break
×
1227
                            if line[0] in abet_set:
9✔
1228
                                break
9✔
1229
                            func_lines.append(line)
9✔
1230
                        break
4✔
1231

1232
            self._function_lines_list.append(func_lines)
9✔
1233
        if is_pre_cmd is True:
9✔
1234
            self.pre_py_cmds.append(call_str)
×
1235
        elif is_pre_cmd is False:
9✔
1236
            self.post_py_cmds.append(call_str)
9✔
1237
        else:
1238
            self.logger.warn(
8✔
1239
                "add_py_function() command: {0} is not being called directly".format(
1240
                    call_str
1241
                )
1242
            )
1243
        self.py_functions.update({function_name})
9✔
1244

1245
    def _process_array_obs(
9✔
1246
        self,
1247
        out_filename,
1248
        ins_filename,
1249
        prefix,
1250
        ofile_sep,
1251
        ofile_skip,
1252
        zone_array,
1253
    ):
1254
        """private method to setup observations for an array-style file
1255

1256
        Args:
1257
            out_filename (`str`): the output array file
1258
            ins_filename (`str`): the instruction file to create
1259
            prefix (`str`): the prefix to add to the observation names and also to use as the
1260
                observation group name.
1261
            ofile_sep (`str`): the separator in the output file.  This is currently just a
1262
                placeholder arg, only whitespace-delimited files are supported
1263
            ofile_skip (`int`): number of header and/or comment lines to skip at the
1264
                top of the output file
1265
            zone_array (numpy.ndarray): an integer array used to identify positions to skip in the
1266
                output array
1267

1268
        Returns:
1269
            None
1270

1271
        Note:
1272
            This method is called programmatically by `PstFrom.add_observations()`
1273

1274
        """
1275
        if ofile_sep is not None:
8✔
1276
            self.logger.lraise(
×
1277
                "array obs are currently only supported for whitespace delim"
1278
            )
1279
        if not os.path.exists(self.new_d / out_filename):
8✔
1280
            self.logger.lraise(
×
1281
                "array obs output file '{0}' not found".format(out_filename)
1282
            )
1283
        if len(prefix) == 0:
8✔
1284
            prefix = Path(out_filename).stem
8✔
1285
        f_out = open(self.new_d / out_filename, "r")
8✔
1286
        f_ins = open(self.new_d / ins_filename, "w")
8✔
1287
        f_ins.write("pif ~\n")
8✔
1288
        iline = 0
8✔
1289
        if ofile_skip is not None:
8✔
1290
            ofile_skip = int(ofile_skip)
×
1291
            f_ins.write("l{0}\n".format(ofile_skip))
×
1292
            # read the output file forward
1293
            for _ in range(ofile_skip):
×
1294
                f_out.readline()
×
1295
                iline += 1
×
1296
        onames, ovals = [], []
8✔
1297
        iidx = 0
8✔
1298
        for line in f_out:
8✔
1299
            raw = line.split()
8✔
1300
            f_ins.write("l1 ")
8✔
1301
            for jr, r in enumerate(raw):
8✔
1302

1303
                try:
8✔
1304
                    fr = float(r)
8✔
1305
                except Exception as e:
×
1306
                    self.logger.lraise(
×
1307
                        "array obs error casting: '{0}' on line {1} to a float: {2}".format(
1308
                            r, iline, str(e)
1309
                        )
1310
                    )
1311

1312
                zval = None
8✔
1313
                if zone_array is not None:
8✔
1314
                    try:
8✔
1315
                        zval = zone_array[iidx, jr]
8✔
1316
                    except Exception as e:
×
1317
                        self.logger.lraise(
×
1318
                            "array obs error getting zone value for i,j {0},{1} in line {2}: {3}".format(
1319
                                iidx, jr, iline, str(e)
1320
                            )
1321
                        )
1322
                    if zval <= 0:
8✔
1323
                        f_ins.write(" !dum! ")
8✔
1324
                        if jr < len(raw) - 1:
8✔
1325
                            f_ins.write(" w ")
8✔
1326
                        continue
8✔
1327

1328
                oname = "oname:{0}_otype:arr_i:{1}_j:{2}".format(prefix, iidx, jr)
8✔
1329
                if zval is not None:
8✔
1330
                    oname += "_zone:{0}".format(zval)
8✔
1331
                f_ins.write(" !{0}! ".format(oname))
8✔
1332
                if jr < len(raw) - 1:
8✔
1333
                    f_ins.write(" w ")
8✔
1334
            f_ins.write("\n")
8✔
1335
            iline += 1
8✔
1336
            iidx += 1
8✔
1337

1338
    def add_observations(
9✔
1339
        self,
1340
        filename,
1341
        insfile=None,
1342
        index_cols=None,
1343
        use_cols=None,
1344
        use_rows=None,
1345
        prefix="",
1346
        ofile_skip=None,
1347
        ofile_sep=None,
1348
        rebuild_pst=False,
1349
        obsgp=None,
1350
        zone_array=None,
1351
        includes_header=True,
1352
    ):
1353
        """
1354
        Add values in output files as observations to PstFrom object
1355

1356
        Args:
1357
            filename (`str`): model output file name(s) to set up
1358
                as observations. By default filename should give relative
1359
                loction from top level of pest template directory
1360
                (`new_d` as passed to `PstFrom()`).
1361
            insfile (`str`): desired instructions file filename
1362
            index_cols (`list`-like or `int`): columns to denote are indices for obs
1363
            use_cols (`list`-like or `int`): columns to set up as obs. If None,
1364
                and `index_cols` is not None (i.e list-style obs assumed),
1365
                observations will be set up for all columns in `filename` that
1366
                are not in `index_cols`.
1367
            use_rows (`list`-like or `int`): select only specific row of file for obs
1368
            prefix (`str`): prefix for obsnmes
1369
            ofile_skip (`int`): number of lines to skip in model output file
1370
            ofile_sep (`str`): delimiter in output file.
1371
                If `None`, the delimiter is eventually governed by the file
1372
                extension (`,` for .csv).
1373
            rebuild_pst (`bool`): (Re)Construct PstFrom.pst object after adding
1374
                new obs
1375
            obsgp (`str` of `list`-like): observation group name(s). If type
1376
                `str` (or list of len == 1) and `use_cols` is None (i.e. all
1377
                non-index cols are to  be set up as obs), the same group name
1378
                will be mapped to all obs in call. If None the obs group name
1379
                will be derived from the base of the constructed observation
1380
                name. If passed as `list` (and len(`list`) = `n` > 1), the
1381
                entries in obsgp will be interpreted to explicitly define the
1382
                grouped for the first `n` cols in `use_cols`, any remaining
1383
                columns will default to None and the base of the observation
1384
                name will be used. Default is None.
1385
            zone_array (`np.ndarray`): array defining spatial limits or zones
1386
                for array-style observations. Default is None
1387
            includes_header (`bool`): flag indicating that the list-style file
1388
                includes a header row.  Default is True.
1389

1390
        Returns:
1391
            `Pandas.DataFrame`: dataframe with info for new observations
1392

1393
        Note:
1394
            This is the main entry for adding observations to the pest interface
1395

1396
            If `index_cols` and `use_cols` are both None, then it is assumed that
1397
            array-style observations are being requested.  In this case,
1398
            `filenames` must be only one filename.
1399

1400
            `zone_array` is only used for array-style observations.  Zone values
1401
            less than or equal to zero are skipped (using the "dum" option)
1402

1403

1404
        Example::
1405

1406
            # setup observations for the 2nd thru 5th columns of the csv file
1407
            # using the first column as the index
1408
            df = pf.add_observations("heads.csv",index_col=0,use_cols=[1,2,3,4],
1409
                                     ofile_sep=",")
1410
            # add array-style observations, skipping model cells with an ibound
1411
            # value less than or equal to zero
1412
            df = pf.add_observations("conce_array.dat,index_col=None,use_cols=None,
1413
                                     zone_array=ibound)
1414

1415

1416
        """
1417
        use_cols_psd = copy.copy(use_cols)  # store passed use_cols argument
9✔
1418
        if insfile is None:  # setup instruction file name
9✔
1419
            insfile = "{0}.ins".format(filename)
8✔
1420
        self.logger.log("adding observations from output file " "{0}".format(filename))
9✔
1421
        # precondition arguments
1422
        (
9✔
1423
            filenames,
1424
            fmts,
1425
            seps,
1426
            skip_rows,
1427
            index_cols,
1428
            use_cols,
1429
        ) = self._prep_arg_list_lengths(
1430
            filename,
1431
            index_cols=index_cols,
1432
            use_cols=use_cols,
1433
            fmts=None,
1434
            seps=ofile_sep,
1435
            skip_rows=ofile_skip,
1436
        )
1437
        # array style obs, if both index_cols and use_cols are None (default)
1438
        if index_cols is None and use_cols is None:
9✔
1439
            if not isinstance(filenames, str):
8✔
1440
                if len(filenames) > 1:
8✔
1441
                    self.logger.lraise(
×
1442
                        "only a single filename can be used for array-style observations"
1443
                    )
1444
                filenames = filenames[0]
8✔
1445
            self.logger.log(
8✔
1446
                "adding observations from array output file '{0}'".format(filenames)
1447
            )
1448
            # Setup obs for array style output, build and write instruction file
1449
            self._process_array_obs(
8✔
1450
                filenames,
1451
                insfile,
1452
                prefix,
1453
                ofile_sep,
1454
                ofile_skip,
1455
                zone_array,
1456
            )
1457

1458
            # Add obs from ins file written by _process_array_obs()
1459
            new_obs = self.add_observations_from_ins(
8✔
1460
                ins_file=self.new_d / insfile, out_file=self.new_d / filename
1461
            )
1462
            # Try to add an observation group name -- should default to `obgnme`
1463
            # TODO: note list style default to base of obs name, here array default to `obgnme`
1464
            if obsgp is not None:  # if a group name is passed
8✔
1465
                new_obs.loc[:, "obgnme"] = obsgp
8✔
1466
            elif prefix is not None and len(prefix) != 0:  # if prefix is passed
8✔
1467
                new_obs.loc[:, "obgnme"] = prefix
8✔
1468
            else:
1469
                new_obs.loc[:, "obgnme"] = "oname:{0}_otype:arr".format(filenames)
8✔
1470
            # else will default to `obgnme`
1471
            self.logger.log(
8✔
1472
                "adding observations from array output file '{0}'".format(filenames)
1473
            )
1474
        else:
1475
            # list style obs
1476
            self.logger.log(
9✔
1477
                "adding observations from tabular output file " "'{0}'".format(filenames)
1478
            )
1479
            # -- will end up here if either of index_cols or use_cols is not None
1480
            df, storehead, inssep = self._load_listtype_file(
9✔
1481
                filenames, index_cols, use_cols, fmts, seps, skip_rows
1482
            )
1483
            # parse to numeric (read as dtype object to preserve mixed types)
1484
            df = df.apply(pd.to_numeric, errors="ignore")
9✔
1485
            if inssep != ",":
9✔
1486
                inssep = seps
8✔
1487
            else:
1488
                inssep = [inssep]
9✔
1489
            # rectify df?
1490
            # if iloc[0] are strings and index_cols are ints,
1491
            #   can we assume that there were infact column headers?
1492
            if all(isinstance(c, str) for c in df.iloc[0]) and all(
9✔
1493
                isinstance(a, int) for a in index_cols
1494
            ):
1495
                index_cols = df.iloc[0][index_cols].to_list()  # redefine index_cols
8✔
1496
                if use_cols is not None:
8✔
1497
                    use_cols = df.iloc[0][use_cols].to_list()  # redefine use_cols
8✔
1498
                df = (
8✔
1499
                    df.rename(columns=df.iloc[0].to_dict())
1500
                    .drop(0)
1501
                    .reset_index(drop=True)
1502
                    .apply(pd.to_numeric, errors="ignore")
1503
                )
1504
            # Select all non index cols if use_cols is None
1505
            if use_cols is None:
9✔
1506
                use_cols = df.columns.drop(index_cols).tolist()
8✔
1507
            # Currently just passing through comments in header (i.e. before the table data)
1508
            lenhead = 0
9✔
1509
            stkeys = np.array(
9✔
1510
                sorted(storehead.keys())
1511
            )  # comments line numbers as sorted array
1512
            if stkeys.size > 0 and stkeys.min() == 0:
9✔
1513
                lenhead += 1 + np.sum(np.diff(stkeys) == 1)
8✔
1514
            new_obs_l = []
9✔
1515
            for filename, sep in zip(
9✔
1516
                filenames, inssep
1517
            ):  # should only ever be one but hey...
1518
                self.logger.log(
9✔
1519
                    "building insfile for tabular output file {0}" "".format(filename)
1520
                )
1521
                # Build dataframe from output file df for use in insfile
1522
                df_temp = _get_tpl_or_ins_df(
9✔
1523
                    df,
1524
                    prefix,
1525
                    typ="obs",
1526
                    index_cols=index_cols,
1527
                    use_cols=use_cols,
1528
                )
1529
                df.loc[:, "idx_str"] = df_temp.idx_strs
9✔
1530
                # Select only certain rows if requested
1531
                if use_rows is not None:
9✔
1532
                    if isinstance(use_rows, str):
8✔
1533
                        if use_rows not in df.idx_str:
×
1534
                            self.logger.warn(
×
1535
                                "can't find {0} in generated observation idx_str. "
1536
                                "setting up obs for all rows instead"
1537
                                "".format(use_rows)
1538
                            )
1539
                            use_rows = None
×
1540
                    elif isinstance(use_rows, int):
8✔
1541
                        use_rows = [use_rows]
×
1542
                    use_rows = [r for r in use_rows if r <= len(df)]
8✔
1543
                    use_rows = df.iloc[use_rows].idx_str.unique()
8✔
1544

1545
                # Construct ins_file from df
1546
                # first rectify group name with number of columns
1547
                ncol = len(use_cols)
9✔
1548
                fill = True  # default fill=True means that the groupname will be
9✔
1549
                # derived from the base of the observation name
1550
                # if passed group name is a string or list with len < ncol
1551
                # and passed use_cols was None or of len > len(obsgp)
1552
                if obsgp is not None:
9✔
1553
                    if use_cols_psd is None:  # no use_cols defined (all are setup)
8✔
1554
                        if len([obsgp] if isinstance(obsgp, str) else obsgp) == 1:
8✔
1555
                            # only 1 group provided, assume passed obsgp applys
1556
                            # to all use_cols
1557
                            fill = "first"
8✔
1558
                        else:
1559
                            # many obs groups passed, assume last will fill if < ncol
1560
                            fill = "last"
×
1561
                    # else fill will be set to True (base of obs name will be used)
1562
                else:
1563
                    obsgp = True  # will use base of col
9✔
1564
                obsgp = _check_var_len(obsgp, ncol, fill=fill)
9✔
1565
                nprefix = prefix
9✔
1566

1567
                if len(nprefix) == 0:
9✔
1568
                    nprefix = filenames[0]
9✔
1569
                nprefix = "oname:{0}_otype:lst".format(nprefix)
9✔
1570
                df_ins = pyemu.pst_utils.csv_to_ins_file(
9✔
1571
                    df.set_index("idx_str"),
1572
                    ins_filename=self.new_d / insfile,
1573
                    only_cols=use_cols,
1574
                    only_rows=use_rows,
1575
                    marker="~",
1576
                    includes_header=includes_header,
1577
                    includes_index=False,
1578
                    prefix=nprefix,
1579
                    head_lines_len=lenhead,
1580
                    sep=sep,
1581
                    gpname=obsgp,
1582
                )
1583
                self.logger.log(
9✔
1584
                    "building insfile for tabular output file {0}" "".format(filename)
1585
                )
1586
                new_obs = self.add_observations_from_ins(
9✔
1587
                    ins_file=self.new_d / insfile, out_file=self.new_d / filename
1588
                )
1589
                if "obgnme" in df_ins.columns:
9✔
1590
                    new_obs.loc[:, "obgnme"] = df_ins.loc[new_obs.index, "obgnme"]
9✔
1591
                new_obs_l.append(new_obs)
9✔
1592
            new_obs = pd.concat(new_obs_l)
9✔
1593
            self.logger.log(
9✔
1594
                "adding observations from tabular output file " "'{0}'".format(filenames)
1595
            )
1596
        self.insfile_obsmap[insfile] = new_obs.obsnme.to_list()
9✔
1597
        self.logger.log("adding observations from output file " "{0}".format(filename))
9✔
1598
        if rebuild_pst:
9✔
1599
            if self.pst is not None:
8✔
1600
                self.logger.log("Adding obs to control file " "and rewriting pst")
8✔
1601
                self.build_pst(filename=self.pst.filename, update="obs")
8✔
1602
                self.logger.log("Adding obs to control file " "and rewriting pst")
8✔
1603
            else:
1604
                pstname = Path(self.new_d, self.original_d.name)
×
1605
                self.logger.warn(
×
1606
                    "pst object not available, "
1607
                    f"new control file will be written with filename {pstname}"
1608
                )
1609
                self.build_pst(filename=None, update=False)
×
1610

1611
        return new_obs
9✔
1612

1613
    def add_observations_from_ins(
9✔
1614
        self, ins_file, out_file=None, pst_path=None, inschek=True
1615
    ):
1616
        """add new observations to a control file from an existing instruction file
1617

1618
        Args:
1619
            ins_file (`str`): instruction file with exclusively new
1620
               observation names. N.B. if `ins_file` just contains base
1621
               filename string (i.e. no directory name), the path to PEST
1622
               directory will be automatically appended.
1623
            out_file (`str`): model output file.  If None, then
1624
               ins_file.replace(".ins","") is used. Default is None.
1625
               If `out_file` just contains base filename string
1626
               (i.e. no directory name), the path to PEST directory will be
1627
               automatically appended.
1628
            pst_path (`str`): the path to append to the instruction file and
1629
               out file in the control file.  If not None, then any existing
1630
               path in front of the template or ins file is split off and
1631
               pst_path is prepended.  If python is being run in a directory
1632
               other than where the control file will reside, it is useful
1633
               to pass `pst_path` as `.`. Default is None
1634
            inschek (`bool`): flag to try to process the existing output file
1635
               using the `pyemu.InstructionFile` class.  If successful,
1636
               processed outputs are used as obsvals
1637

1638
        Returns:
1639
            `pandas.DataFrame`: the data for the new observations that were
1640
               added
1641

1642
        Note:
1643
            populates the new observation information with default values
1644

1645
        Example::
1646

1647
            pf = pyemu.PstFrom("temp","template")
1648
            pf.add_observations_from_ins(os.path.join("template","new_obs.dat.ins"),
1649
                                 pst_path=".")
1650

1651
        """
1652
        # lifted almost completely from `Pst().add_observation()`
1653
        if os.path.dirname(ins_file) in ["", "."]:
9✔
1654
            # if insfile is passed as just a filename,
1655
            # append pest directory name
1656
            ins_file = self.new_d / ins_file
8✔
1657
            pst_path = "."  # reset and new assumed pst_path
8✔
1658
        # else:
1659
        # assuming that passed insfile is the full path to file from current location
1660
        if not os.path.exists(ins_file):
9✔
1661
            self.logger.lraise(
×
1662
                "ins file not found: {0}, {1}" "".format(os.getcwd(), ins_file)
1663
            )
1664
        if out_file is None:
9✔
1665
            out_file = str(ins_file).replace(".ins", "")
8✔
1666
        elif os.path.dirname(out_file) in ["", "."]:
9✔
1667
            out_file = self.new_d / out_file
×
1668
        if ins_file == out_file:
9✔
1669
            self.logger.lraise("ins_file == out_file, doh!")
×
1670

1671
        # get the obs names in the instructions file
1672
        self.logger.log(
9✔
1673
            "adding observation from instruction file '{0}'".format(ins_file)
1674
        )
1675
        obsnme = pyemu.pst_utils.parse_ins_file(ins_file)
9✔
1676

1677
        sobsnme = set(obsnme)
9✔
1678
        if len(self.obs_dfs) > 0:
9✔
1679
            sexist = pd.concat(self.obs_dfs).obsnme
9✔
1680
        else:
1681
            sexist = []
9✔
1682
        sexist = set(sexist)  # todo need to check this here?
9✔
1683
        sint = sobsnme.intersection(sexist)
9✔
1684
        if len(sint) > 0:
9✔
1685
            self.logger.lraise(
×
1686
                "the following obs instruction file {0} are already in the "
1687
                "control file:{1}".format(ins_file, ",".join(sint))
1688
            )
1689

1690
        # find "new" obs that are not already in the control file
1691
        new_obsnme = sobsnme - sexist
9✔
1692
        if len(new_obsnme) == 0:
9✔
1693
            self.logger.lraise(
×
1694
                "no new observations found in instruction file {0}".format(ins_file)
1695
            )
1696

1697
        # extend observation_data
1698
        new_obsnme = np.sort(list(new_obsnme))
9✔
1699
        new_obs_data = pyemu.pst_utils._populate_dataframe(
9✔
1700
            new_obsnme,
1701
            pyemu.pst_utils.pst_config["obs_fieldnames"],
1702
            pyemu.pst_utils.pst_config["obs_defaults"],
1703
            pyemu.pst_utils.pst_config["obs_dtype"],
1704
        )
1705
        new_obs_data.loc[new_obsnme, "obsnme"] = new_obsnme
9✔
1706
        new_obs_data.index = new_obsnme
9✔
1707

1708
        # need path relative to where control file
1709
        ins_file_pstrel = Path(ins_file).relative_to(self.new_d)
9✔
1710
        out_file_pstrel = Path(out_file).relative_to(self.new_d)
9✔
1711
        if pst_path is not None:
9✔
1712
            ins_file_pstrel = pst_path / ins_file_pstrel
9✔
1713
            out_file_pstrel = pst_path / out_file_pstrel
9✔
1714
        self.ins_filenames.append(ins_file_pstrel)
9✔
1715
        self.output_filenames.append(out_file_pstrel)
9✔
1716
        # add to temporary files to be removed at start of forward run
1717
        self.tmp_files.append(out_file_pstrel)
9✔
1718
        df = None
9✔
1719
        if inschek:
9✔
1720
            # df = pst_utils._try_run_inschek(ins_file,out_file,cwd=cwd)
1721
            # ins_file = os.path.join(cwd, ins_file)
1722
            # out_file = os.path.join(cwd, out_file)
1723
            df = pyemu.pst_utils.try_process_output_file(
9✔
1724
                ins_file=ins_file, output_file=out_file
1725
            )
1726
        if df is not None:
9✔
1727
            # print(self.observation_data.index,df.index)
1728
            new_obs_data.loc[df.index, "obsval"] = df.obsval
9✔
1729
        self.obs_dfs.append(new_obs_data)
9✔
1730
        self.logger.log(
9✔
1731
            "adding observation from instruction file '{0}'".format(ins_file)
1732
        )
1733
        return new_obs_data
9✔
1734

1735
    def add_parameters(
9✔
1736
        self,
1737
        filenames,
1738
        par_type,
1739
        zone_array=None,
1740
        dist_type="gaussian",
1741
        sigma_range=4.0,
1742
        upper_bound=None,
1743
        lower_bound=None,
1744
        transform=None,
1745
        par_name_base="p",
1746
        index_cols=None,
1747
        use_cols=None,
1748
        use_rows=None,
1749
        pargp=None,
1750
        pp_space=10,
1751
        use_pp_zones=False,
1752
        num_eig_kl=100,
1753
        spatial_reference=None,
1754
        geostruct=None,
1755
        datetime=None,
1756
        mfile_fmt="free",
1757
        mfile_skip=None,
1758
        mfile_sep=None,
1759
        ult_ubound=None,
1760
        ult_lbound=None,
1761
        rebuild_pst=False,
1762
        alt_inst_str="inst",
1763
        comment_char=None,
1764
        par_style="multiplier",
1765
        initial_value=None,
1766
    ):
1767
        """
1768
        Add list or array style model input files to PstFrom object.
1769
        This method is the main entry point for adding parameters to the
1770
        pest interface
1771

1772
        Args:
1773
            filenames (`str`): Model input filenames to parameterize
1774
            par_type (`str`): One of `grid` - for every element, `constant` - for single
1775
                parameter applied to every element, `zone` - for zone-based
1776
                parameterization or `pilotpoint` - for
1777
                pilot-point base parameterization of array style input files.
1778
                Note `kl` not yet implemented # TODO
1779
            zone_array (`np.ndarray`): array defining spatial limits or zones
1780
                for parameterization.
1781
            dist_type: not yet implemented # TODO
1782
            sigma_range: not yet implemented # TODO
1783
            upper_bound (`float`): PEST parameter upper bound.  If `None`, then 1.0e+10 is used.  Default is `None` #
1784
            lower_bound (`float`): PEST parameter lower bound.  If `None` and `transform` is "log", then 1.0e-10 is used.
1785
                Otherwise, if `None`, -1.0e+10 is used.  Default is `None`
1786
            transform (`str`): PEST parameter transformation.  Must be either "log","none" or "fixed.  The "tied" transform
1787
                must be used after calling `PstFrom.build_pst()`.
1788
            par_name_base (`str` or `list`-like): basename for parameters that
1789
                are set up. If parameter file is tabular list-style file
1790
                (`index_cols` is not None) then :
1791
                len(par_name_base) must equal len(use_cols)
1792
            index_cols (`list`-like): if not None, will attempt to parameterize
1793
                expecting a tabular-style model input file. `index_cols`
1794
                defines the unique columns used to set up pars. If passed as a
1795
                list of `str`, stings are expected to denote the columns
1796
                headers in tabular-style parameter files; if `i` and `j` in
1797
                list, these columns will be used to define spatial position for
1798
                spatial correlations (if required). WARNING: If passed as list
1799
                of `int`, `i` and `j` will be assumed to be in last two entries
1800
                in the list. Can be passed as a dictionary using the keys
1801
                `i` and `j` to explicitly speficy the columns that relate to
1802
                model rows and columns to be identified and processed to x,y.
1803
            use_cols (`list`-like or `int`): for tabular-style model input file,
1804
                defines the columns to be parameterised
1805
            use_rows (`list` or `tuple`): Setup parameters for
1806
                only specific rows in list-style model input file.
1807
                Action is dependent on the the dimensions of use_rows.
1808
                If ndim(use_rows) < 2: use_rows is assumed to represent the row number, index slicer (equiv df.iloc),
1809
                for all passed files (after headers stripped). So use_rows=[0,3,5], will parameterise the
1810
                1st, 4th and 6th rows of each passed list-like file.
1811
                If ndim(use_rows) = 2: use_rows represent the index value to paramterise according to index_cols.
1812
                e.g. [(3,5,6)] or [[3,5,6]] would attempt to set parameters where the model file
1813
                values for 3 `index_cols` are 3,5,6. N.B. values in tuple are the actual
1814
                model file entry values.
1815
                If no rows in the model input file match `use_rows`, parameters
1816
                will be set up for all rows. Only valid/effective if index_cols is not None.
1817
                Default is None -- setup parameters for all rows.
1818
            pargp (`str`): Parameter group to assign pars to. This is PESTs
1819
                pargp but is also used to gather correlated parameters set up
1820
                using multiple `add_parameters()` calls (e.g. temporal pars)
1821
                with common geostructs.
1822
            pp_space (`float`, `int`,`str` or `pd.DataFrame`): Spatial pilot point information.
1823
                If `float` or `int`, AND `spatial_reference` is of type VertexGrid, it is the spacing in model length untis between pilot points.
1824
                If `int` it is the spacing in rows and cols of where to place pilot points.
1825
                If `pd.DataFrame`, then this arg is treated as a prefined set of pilot points
1826
                and in this case, the dataframe must have "name", "x", "y", and optionally "zone" columns.
1827
                If `str`, then an attempt is made to load a dataframe from a csv file (if `pp_space` ends with ".csv"),
1828
                shapefile (if `pp_space` ends with ".shp") or from a pilot points file.  If `pp_space` is None,
1829
                an integer spacing of 10 is used.  Default is None
1830
            use_pp_zones (`bool`): a flag to use the greater-than-zero values
1831
                in the zone_array as pilot point zones.
1832
                If False, zone_array values greater than zero are treated as a
1833
                single zone.  This argument is only used if `pp_space` is None
1834
                or `int`. Default is False.
1835
            num_eig_kl: TODO - impliment with KL pars
1836
            spatial_reference (`pyemu.helpers.SpatialReference`): If different
1837
                spatial reference required for pilotpoint setup.
1838
                If None spatial reference passed to `PstFrom()` will be used
1839
                for pilot-points
1840
            geostruct (`pyemu.geostats.GeoStruct()`): For specifying correlation
1841
                geostruct for pilot-points and par covariance.
1842
            datetime (`str`): optional %Y%m%d string or datetime object for
1843
                setting up temporally correlated pars. Where datetime is passed
1844
                correlation axis for pars will be set to timedelta.
1845
            mfile_fmt (`str`): format of model input file - this will be preserved
1846
            mfile_skip (`int` or `str`): header in model input file to skip
1847
                when reading and reapply when writing. Can optionally be `str` in which case `mf_skip` will be treated
1848
                as a `comment_char`.
1849
            mfile_sep (`str`): separator/delimiter in model input file.
1850
                If None, separator will be interpretted from file name extension.
1851
                `.csv` is assumed to be comma separator. Default is None
1852
            ult_ubound (`float`): Ultimate upper bound for model input
1853
                parameter once all mults are applied - ensure physical model par vals. If not passed,
1854
                it is set to 1.0e+30
1855
            ult_lbound (`float`): Ultimate lower bound for model input
1856
                parameter once all mults are applied.  If not passed, it is set to
1857
                1.0e-30 for log transform and -1.0e+30 for non-log transform
1858
            rebuild_pst (`bool`): (Re)Construct PstFrom.pst object after adding
1859
                new parameters
1860
            alt_inst_str (`str`): Alternative to default `inst` string in
1861
                parameter names. Specify ``None`` or ``""`` to exclude the instance
1862
                information from parameter names. For example, if parameters
1863
                that apply to more than one input/template file are desired.
1864
            comment_char (`str`): option to skip comment lines in model file.
1865
                This is not additive with `mfile_skip` option.
1866
                Warning: currently comment lines within list-style tabular data
1867
                will be lost.
1868
            par_style (`str`): either "m"/"mult"/"multiplier", "a"/"add"/"addend", or "d"/"direct" where the former setups
1869
                up a multiplier and addend parameters process against the existing model input
1870
                array and the former setups a template file to write the model
1871
                input file directly.  Default is "multiplier".
1872

1873
            initial_value (`float`): the value to set for the `parval1` value in the control file
1874
                Default is 1.0
1875
        Returns:
1876
            `pandas.DataFrame`: dataframe with info for new parameters
1877

1878
        Example::
1879

1880
            # setup grid-scale direct parameters for an array of numbers
1881
            df = pf.add_parameters("hk.dat",par_type="grid",par_style="direct")
1882
            # setup pilot point multiplier parameters for an array of numbers
1883
            # with a pilot point being set in every 5th active model cell
1884
            df = pf.add_parameters("recharge.dat",par_type="pilotpoint",pp_space=5,
1885
                                   zone_array="ibound.dat")
1886
            # setup a single multiplier parameter for the 4th column
1887
            # of a column format (list/tabular type) file
1888
            df = pf.add_parameters("wel_list_1.dat",par_type="constant",
1889
                                   index_cols=[0,1,2],use_cols=[3])
1890

1891
        """
1892
        # TODO need more support for temporal pars?
1893
        #  - As another partype using index_cols or an additional time_cols
1894

1895
        # TODO support passing par_file (i,j)/(x,y) directly where information
1896
        #  is not contained in model parameter file - e.g. no i,j columns
1897
        self.add_pars_callcount += 1
9✔
1898
        self.ijwarned[self.add_pars_callcount] = False
9✔
1899

1900
        if transform is None:
9✔
1901
            if par_style in ["a", "add", "addend"]:
9✔
1902
                transform = 'none'
×
1903
                self.logger.statement(
×
1904
                    "par_style is 'add' and transform was not passed, setting tranform to 'none'"
1905
                )
1906
            else:
1907
                transform = 'log'
9✔
1908
                self.logger.statement(
9✔
1909
                    "transform was not passed, setting default tranform to 'log'"
1910
                )
1911
        if transform.lower().strip() not in ["none", "log", "fixed"]:
9✔
1912
            self.logger.lraise(
×
1913
                "unrecognized transform ('{0}'), should be in ['none','log','fixed']".format(
1914
                    transform
1915
                )
1916
            )
1917
        if transform == "fixed" and geostruct is not None:
9✔
1918
            self.logger.lraise(
×
1919
                "geostruct is not 'None', cant draw values for fixed pars"
1920
            )
1921

1922
        # some checks for direct parameters
1923
        par_style = par_style.lower()
9✔
1924
        if len(par_style) > 1:
9✔
1925
            par_style = par_style[0]
9✔
1926
        if par_style not in ["m", "d", "a"]:
9✔
1927
            self.logger.lraise(
×
1928
                "add_parameters(): unrecognized 'style': {0}, "
1929
                "should be either 'm'/'mult'/'multiplier', "
1930
                "'a'/'add'/'addend' or 'd'/'direct'".format(
1931
                    par_style
1932
                )
1933
            )
1934

1935
        if initial_value is None:
9✔
1936
            if par_style == "m":
9✔
1937
                initial_value = 1.0
9✔
1938
            elif par_style == "a":
8✔
1939
                initial_value = 0.0
8✔
1940
            elif par_style == "d":
8✔
1941
                initial_value = 1.0  # ?
8✔
1942

1943
        if upper_bound is None:
9✔
1944
            upper_bound = 1.0e10
8✔
1945

1946
        if lower_bound is None:
9✔
1947
            if transform.lower() == "log":
8✔
1948
                lower_bound = 1.0e-10
8✔
1949
            else:
1950
                lower_bound = -1.0e10
8✔
1951

1952
        if isinstance(filenames, str) or isinstance(filenames, Path):
9✔
1953
            filenames = [filenames]
9✔
1954
        # data file paths relative to the pest parent directory
1955
        filenames = [
9✔
1956
            get_relative_filepath(self.original_d, filename) for filename in filenames
1957
        ]
1958
        if len(filenames) == 0:
9✔
1959
            self.logger.lraise("add_parameters(): filenames is empty")
×
1960
        if par_style == "d":
9✔
1961
            if len(filenames) != 1:
8✔
1962
                self.logger.lraise(
×
1963
                    "add_parameters(): 'filenames' arg for 'direct' style "
1964
                    "must contain one and only one filename, "
1965
                    f"not {len(filenames)} files"
1966
                )
1967
            if filenames[0] in self.direct_org_files:
8✔
1968
                self.logger.lraise(
×
1969
                    f"add_parameters(): original model input file "
1970
                    f"'{filenames[0]}' already used for 'direct' parameterization"
1971
                )
1972
            else:
1973
                self.direct_org_files.append(filenames[0])
8✔
1974
        # Default par data columns used for pst
1975
        par_data_cols = pyemu.pst_utils.pst_config["par_fieldnames"]
9✔
1976
        self.logger.log(
9✔
1977
            f"adding {par_type} type {par_style} style parameters for file(s) "
1978
            f"{[str(f) for f in filenames]}"
1979
        )
1980
        if geostruct is not None:
9✔
1981
            self.logger.log("using geostruct:{0}".format(str(geostruct)))
9✔
1982
            if geostruct.sill != 1.0:  #  and par_style != "multiplier": #TODO !=?
9✔
1983
                self.logger.warn(
×
1984
                    "geostruct sill != 1.0"  # for 'multiplier' style parameters"
1985
                )
1986
            if geostruct.transform != transform:
9✔
1987
                self.logger.warn(
9✔
1988
                    "0) Inconsistency between " "geostruct transform and partrans."
1989
                )
1990
                self.logger.warn(f"1) Setting geostruct transform to {transform}")
9✔
1991
                if geostruct not in self.par_struct_dict.keys():
9✔
1992
                    # safe to just reset transform
1993
                    geostruct.transform = transform
9✔
1994
                else:
1995
                    self.logger.warn("2) This will create a new copy of geostruct")
8✔
1996
                    # to avoid flip flopping transform need to make a new geostruct
1997
                    geostruct = copy.copy(geostruct)
8✔
1998
                    geostruct.transform = transform
8✔
1999
                self.logger.warn(
9✔
2000
                    "-) Better to pass an appropriately transformed geostruct"
2001
                )
2002
        # big sr and zone dependancy checker here: todo - tidy?
2003
        checker = (
9✔
2004
                self._spatial_reference is not None
2005
                and not isinstance(self._spatial_reference, dict)
2006
                and self._spatial_reference.grid_type == 'vertex'
2007
                and zone_array is not None
2008
                and len(zone_array.shape) == 1
2009
        )
2010
        if checker:
9✔
2011
            zone_array = np.reshape(zone_array, (zone_array.shape[0], 1))
8✔
2012

2013
        # Get useful variables from arguments passed
2014
        # if index_cols passed as a dictionary that maps i,j information
2015
        idx_cols = index_cols
9✔
2016
        ij_in_idx = None
9✔
2017
        xy_in_idx = None
9✔
2018
        if isinstance(index_cols, dict):
9✔
2019
            # only useful if i and j are in keys .... or xy?
2020
            # TODO: or datetime str?
2021
            keys = np.array([k.lower() for k in index_cols.keys()])
8✔
2022
            idx_cols = [index_cols[k] for k in keys]
8✔
2023
            if any(all(a in keys for a in aa) for aa in [["i", "j"], ["x", "y"]]):
8✔
2024
                if all(ij in keys for ij in ["i", "j"]):
8✔
2025
                    o_idx = np.argsort(keys)
8✔
2026
                    ij_in_idx = o_idx[np.searchsorted(keys[o_idx], ["i", "j"])]
8✔
2027
                if all(xy in keys for xy in ["x", "y"]):
8✔
2028
                    o_idx = np.argsort(keys)
8✔
2029
                    xy_in_idx = o_idx[np.searchsorted(keys[o_idx], ["x", "y"])]
8✔
2030
            else:
2031
                self.logger.lraise(
×
2032
                    "If passing `index_cols` as type == dict, "
2033
                    "keys need to contain [`i` and `j`] or "
2034
                    "[`x` and `y`]"
2035
                )
2036

2037
        (
9✔
2038
            index_cols,
2039
            use_cols,
2040
            file_dict,
2041
            fmt_dict,
2042
            sep_dict,
2043
            skip_dict,
2044
            headerlines,  # needed for direct pars (need to be in tpl)
2045
        ) = self._par_prep(
2046
            filenames,
2047
            idx_cols,
2048
            use_cols,
2049
            fmts=mfile_fmt,
2050
            skip_rows=mfile_skip,
2051
            c_char=comment_char,
2052
            seps=mfile_sep,
2053
        )
2054
        if datetime is not None:  # convert and check datetime
9✔
2055
            # TODO: something needed here to allow a different relative point.
2056
            datetime = _get_datetime_from_str(datetime)
9✔
2057
            if self.start_datetime is None:
9✔
2058
                self.logger.warn(
8✔
2059
                    "NO START_DATEIME PROVIDED, ASSUMING PAR "
2060
                    "DATETIME IS START {}".format(datetime)
2061
                )
2062
                self.start_datetime = datetime
8✔
2063
            assert (
9✔
2064
                datetime >= self.start_datetime
2065
            ), "passed datetime is earlier than start_datetime {0}, {1}".format(
2066
                datetime, self.start_datetime
2067
            )
2068
            t_offest = datetime - self.start_datetime
9✔
2069

2070
        # Pull out and construct name-base for parameters
2071
        if isinstance(par_name_base, str):
9✔
2072
            par_name_base = [par_name_base]
9✔
2073
        # if `use_cols` is passed check number of base names is the same as cols
2074
        if use_cols is None and len(par_name_base) == 1:
9✔
2075
            pass
9✔
2076
        elif use_cols is not None and len(par_name_base) == len(use_cols):
9✔
2077
            pass
9✔
2078
        else:
2079
            self.logger.lraise(
×
2080
                "par_name_base should be a string, "
2081
                "single-element container, or container of "
2082
                f"len use_cols, not '{str(par_name_base)}'"
2083
            )
2084

2085
        # otherewise, things get tripped up in the ensemble/cov stuff
2086
        if pargp is not None:
9✔
2087
            if isinstance(pargp, list):
9✔
2088
                pargp = [pg.lower() for pg in pargp]
8✔
2089
            else:
2090
                pargp = pargp.lower()
9✔
2091
        par_name_base = [pnb.lower() for pnb in par_name_base]
9✔
2092

2093

2094
        fmt = "_{0}".format(alt_inst_str) + ":{0}"
9✔
2095
        chk_prefix = "_{0}".format(alt_inst_str)  # add `instance` identifier
9✔
2096

2097
        # increment name base if already passed
2098
        for i in range(len(par_name_base)):
9✔
2099
            # multiplier file name will be taken first par group, if passed
2100
            # (the same multipliers will apply to all pars passed in this call)
2101
            # Remove `:` for filenames
2102
            # multiplier file needs instance number 
2103
            # regardless of whether instance is to be included 
2104
            # in the parameter names
2105
            if i == 0:
9✔
2106
                inst = self._next_count(par_name_base[i] +\
9✔
2107
                                        chk_prefix)
2108
                par_name_store = (par_name_base[0] + 
9✔
2109
                                  fmt.format(inst)).replace(":", "")
2110
                # if instance is to be included in the parameter names
2111
                # add the instance suffix to the parameter name base
2112
                if alt_inst_str is not None and len(alt_inst_str) > 0:
9✔
2113
                    par_name_base[0] += fmt.format(inst)
9✔
2114
            # if instance is to be included in the parameter names
2115
            # add the instance suffix to the parameter name base
2116
            elif alt_inst_str is not None and len(alt_inst_str) > 0:
8✔
2117
                par_name_base[i] += fmt.format(
8✔
2118
                    self._next_count(par_name_base[i] + chk_prefix)
2119
                )
2120

2121
        # multiplier file name will be taken first par group, if passed
2122
        # (the same multipliers will apply to all pars passed in this call)
2123
        # Remove `:` for filenames
2124
        #par_name_store = par_name_base[0].replace(":", "")  # for os filename
2125

2126
        # Define requisite filenames
2127
        if par_style in ["m", "a"]:
9✔
2128
            mlt_filename = "{0}_{1}.csv".format(par_name_store, par_type)
9✔
2129
            # pst input file (for tpl->in pair) is multfile (in mult dir)
2130
            in_fileabs = self.mult_file_d / mlt_filename
9✔
2131
            # pst input file (for tpl->in pair) is multfile (in mult dir)
2132
            in_filepst = in_fileabs.relative_to(self.new_d)
9✔
2133
            tpl_filename = self.tpl_d / (mlt_filename + ".tpl")
9✔
2134
        else:
2135
            mlt_filename = np.NaN
8✔
2136
            # absolute path to org/datafile
2137
            in_fileabs = self.original_file_d / filenames[0].name
8✔
2138
            # pst input file (for tpl->in pair) is orgfile (in org dir)
2139
            # relative path to org/datafile (relative to dest model workspace):
2140
            in_filepst = in_fileabs.relative_to(self.new_d)
8✔
2141
            tpl_filename = self.tpl_d / (filenames[0].name + ".tpl")
8✔
2142

2143
        # this keeps denormal values for creeping into the model input arrays
2144
        ubfill = None
9✔
2145
        lbfill = None
9✔
2146
        if ult_ubound is None:
9✔
2147
            # no ultimate bounds are passed default to class set bounds
2148
            ult_ubound = self.ult_ubound_fill
9✔
2149
            ubfill = "first"  # will fill for all use_cols
9✔
2150
        if ult_lbound is None:
9✔
2151
            ult_lbound = self.ult_lbound_fill
9✔
2152
            lbfill = "first"
9✔
2153

2154
        pp_filename = None  # setup placeholder variables
9✔
2155
        fac_filename = None
9✔
2156
        nxs = None
9✔
2157
        # Process model parameter files to produce appropriate pest pars
2158
        if index_cols is not None:  # Assume list/tabular type input files
9✔
2159
            # ensure inputs are provided for all required cols
2160
            ncol = len(use_cols)
9✔
2161
            ult_lbound = _check_var_len(ult_lbound, ncol, fill=ubfill)
9✔
2162
            ult_ubound = _check_var_len(ult_ubound, ncol, fill=lbfill)
9✔
2163
            pargp = _check_var_len(pargp, ncol)
9✔
2164
            lower_bound = _check_var_len(lower_bound, ncol, fill="first")
9✔
2165
            upper_bound = _check_var_len(upper_bound, ncol, fill="first")
9✔
2166
            if len(use_cols) != len(ult_lbound) != len(ult_ubound):
9✔
2167
                self.logger.lraise(
×
2168
                    "mismatch in number of columns to use {0} "
2169
                    "and number of ultimate lower {0} or upper "
2170
                    "{1} par bounds defined"
2171
                    "".format(len(use_cols), len(ult_lbound), len(ult_ubound))
2172
                )
2173

2174
            self.logger.log(
9✔
2175
                "writing list-style template file '{0}'".format(tpl_filename)
2176
            )
2177
            # Generate tabular type template - also returns par data
2178
            # relative file paths are in file_dict as Path instances (kludgey)
2179
            dfs = [file_dict[Path(filename)] for filename in filenames]
9✔
2180
            get_xy = None
9✔
2181
            if (
9✔
2182
                par_type.startswith("grid") or par_type.startswith("p")
2183
            ) and geostruct is not None:
2184
                get_xy = self.get_xy
8✔
2185
            df, nxs = write_list_tpl(
9✔
2186
                filenames,
2187
                dfs,
2188
                par_name_base,
2189
                tpl_filename=tpl_filename,
2190
                par_type=par_type,
2191
                suffix="",
2192
                index_cols=index_cols,
2193
                use_cols=use_cols,
2194
                use_rows=use_rows,
2195
                zone_array=zone_array,
2196
                gpname=pargp,
2197
                ij_in_idx=ij_in_idx,
2198
                xy_in_idx=xy_in_idx,
2199
                get_xy=get_xy,
2200
                zero_based=self.zero_based,
2201
                input_filename=in_fileabs,
2202
                par_style=par_style,
2203
                headerlines=headerlines,
2204
                fill_value=initial_value,
2205
                logger=self.logger,
2206
            )
2207
            nxs = {fname: nx for fname, nx in zip(filenames, nxs)}
9✔
2208
            assert (
9✔
2209
                np.mod(len(df), len(use_cols)) == 0.0
2210
            ), "Parameter dataframe wrong shape for number of cols {0}" "".format(
2211
                use_cols
2212
            )
2213
            # variables need to be passed to each row in df
2214
            lower_bound = np.tile(lower_bound, int(len(df) / ncol))
9✔
2215
            upper_bound = np.tile(upper_bound, int(len(df) / ncol))
9✔
2216
            self.logger.log(
9✔
2217
                "writing list-style template file '{0}'".format(tpl_filename)
2218
            )
2219
        else:  # Assume array type parameter file
2220
            self.logger.log(
9✔
2221
                "writing array-style template file '{0}'".format(tpl_filename)
2222
            )
2223
            if pargp is None:
9✔
2224
                pargp = par_name_base[0]
8✔
2225
            shp = file_dict[list(file_dict.keys())[0]].shape
9✔
2226
            # ARRAY constant, zones or grid (cell-by-cell)
2227
            if par_type in {"constant", "zone", "grid"}:
9✔
2228
                self.logger.log(
9✔
2229
                    "writing template file "
2230
                    "{0} for {1}".format(tpl_filename, par_name_base)
2231
                )
2232
                # Generate array type template - also returns par data
2233
                df = write_array_tpl(
9✔
2234
                    name=par_name_base[0],
2235
                    tpl_filename=tpl_filename,
2236
                    suffix="",
2237
                    par_type=par_type,
2238
                    zone_array=zone_array,
2239
                    shape=shp,
2240
                    get_xy=self.get_xy,
2241
                    fill_value=initial_value if initial_value is not None else 1.0,
2242
                    gpname=pargp,
2243
                    input_filename=in_fileabs,
2244
                    par_style=par_style,
2245
                )
2246
                self.logger.log(
9✔
2247
                    "writing template file"
2248
                    " {0} for {1}".format(tpl_filename, par_name_base)
2249
                )
2250
            # ARRAY PILOTPOINT setup
2251
            elif par_type in {
9✔
2252
                "pilotpoints",
2253
                "pilot_points",
2254
                "pilotpoint",
2255
                "pilot_point",
2256
                "pilot-point",
2257
                "pilot-points",
2258
                "pp"
2259
            }:
2260
                if par_style == "d":
9✔
2261
                    self.logger.lraise(
×
2262
                        "pilot points not supported for 'direct' par_style"
2263
                    )
2264
                # Setup pilotpoints for array type par files
2265
                self.logger.log("setting up pilot point parameters")
9✔
2266
                # finding spatial references for for setting up pilot points
2267
                if spatial_reference is None:
9✔
2268
                    # if none passed with add_pars call
2269
                    self.logger.statement(
9✔
2270
                        "No spatial reference " "(containing cell spacing) passed."
2271
                    )
2272
                    if self.spatial_reference is not None:
9✔
2273
                        # using global sr on PstFrom object
2274
                        self.logger.statement(
9✔
2275
                            "OK - using spatial reference " "in parent object."
2276
                        )
2277
                        spatial_reference = self.spatial_reference
9✔
2278
                        spatial_reference_type = spatial_reference.grid_type
9✔
2279
                    else:
2280
                        # uhoh
2281
                        self.logger.lraise(
×
2282
                            "No spatial reference in parent "
2283
                            "object either. "
2284
                            "Can't set-up pilotpoints"
2285
                        )
2286
                # check that spatial reference lines up with the original array dimensions
2287
                structured = False
9✔
2288
                if not isinstance(spatial_reference, dict):
9✔
2289
                    structured = True
9✔
2290
                    for mod_file, ar in file_dict.items():
9✔
2291
                        orgdata = ar.shape
9✔
2292
                        if spatial_reference_type == 'vertex':
9✔
2293
                            assert orgdata[0] == spatial_reference.ncpl, (
8✔
2294
                                "Spatial reference ncpl not equal to original data ncpl for\n"
2295
                                + os.path.join(
2296
                                    *os.path.split(self.original_file_d)[1:], mod_file
2297
                                )
2298
                            )
2299
                        else:
2300
                            assert orgdata[0] == spatial_reference.nrow, (
9✔
2301
                                "Spatial reference nrow not equal to original data nrow for\n"
2302
                                + os.path.join(
2303
                                    *os.path.split(self.original_file_d)[1:], mod_file
2304
                                )
2305
                            )
2306
                            assert orgdata[1] == spatial_reference.ncol, (
9✔
2307
                                "Spatial reference ncol not equal to original data ncol for\n"
2308
                                + os.path.join(
2309
                                    *os.path.split(self.original_file_d)[1:], mod_file
2310
                                )
2311
                            )
2312
                # (stolen from helpers.PstFromFlopyModel()._pp_prep())
2313
                # but only settting up one set of pps at a time
2314
                pnb = par_name_base[0]
9✔
2315
                pnb = "pname:{1}_ptype:pp_pstyle:{0}".format(par_style, pnb)
9✔
2316
                pp_dict = {0: pnb}
9✔
2317
                pp_filename = "{0}pp.dat".format(par_name_store)
9✔
2318
                # pst inputfile (for tpl->in pair) is
2319
                # par_name_storepp.dat table (in pst ws)
2320
                in_filepst = pp_filename
9✔
2321
                pp_filename_dict = {pnb: in_filepst}
9✔
2322
                tpl_filename = self.tpl_d / (pp_filename + ".tpl")
9✔
2323
                # tpl_filename = get_relative_filepath(self.new_d, tpl_filename)
2324
                pp_locs = None
9✔
2325
                if pp_space is None:  # default spacing if not passed
9✔
2326
                    self.logger.warn("pp_space is None, using 10...\n")
×
2327
                    pp_space = 10
×
2328
                else:
2329
                    if not use_pp_zones and (isinstance(pp_space, int)):
9✔
2330
                        # if not using pp zones will set up pp for just one
2331
                        # zone (all non zero) -- for active domain...
2332
                        if zone_array is None:
9✔
2333
                            nr, nc = file_dict[list(file_dict.keys())[0]].shape
8✔
2334
                            zone_array = np.ones((nr,nc))                        
8✔
2335
                        zone_array[zone_array > 0] = 1  # so can set all
9✔
2336
                        # gt-zero to 1
2337
                    if isinstance(pp_space, float):
9✔
2338
                        pp_space = int(pp_space)
×
2339
                    elif isinstance(pp_space, int):
9✔
2340
                        pass
9✔
2341
                    elif isinstance(pp_space, str):
9✔
2342
                        if pp_space.lower().strip().endswith(".csv"):
9✔
2343
                            self.logger.statement(
8✔
2344
                                "trying to load pilot point location info from csv file '{0}'".format(
2345
                                    self.new_d / Path(pp_space)
2346
                                )
2347
                            )
2348
                            pp_locs = pd.read_csv(self.new_d / pp_space)
8✔
2349

2350
                        elif pp_space.lower().strip().endswith(".shp"):
9✔
2351
                            self.logger.statement(
9✔
2352
                                "trying to load pilot point location info from shapefile '{0}'".format(
2353
                                    self.new_d / Path(pp_space)
2354
                                )
2355
                            )
2356
                            pp_locs = pyemu.pp_utils.pilot_points_from_shapefile(
9✔
2357
                                str(self.new_d / Path(pp_space))
2358
                            )
2359
                        else:
2360
                            self.logger.statement(
8✔
2361
                                "trying to load pilot point location info from pilot point file '{0}'".format(
2362
                                    self.new_d / Path(pp_space)
2363
                                )
2364
                            )
2365
                            pp_locs = pyemu.pp_utils.pp_file_to_dataframe(
8✔
2366
                                self.new_d / pp_space
2367
                            )
2368
                        self.logger.statement(
9✔
2369
                            "pilot points found in file '{0}' will be transferred to '{1}' for parameterization".format(
2370
                                pp_space, pp_filename
2371
                            )
2372
                        )
2373
                    elif isinstance(pp_space, pd.DataFrame):
8✔
2374
                        pp_locs = pp_space
8✔
2375
                    else:
2376
                        self.logger.lraise(
×
2377
                            "unrecognized 'pp_space' value, should be int, csv file, pp file or dataframe, not '{0}'".format(
2378
                                type(pp_space)
2379
                            )
2380
                        )
2381
                    if pp_locs is not None:
9✔
2382
                        cols = pp_locs.columns.tolist()
9✔
2383
                        if "name" not in cols:
9✔
2384
                            self.logger.lraise("'name' col not found in pp dataframe")
×
2385
                        if "x" not in cols:
9✔
2386
                            self.logger.lraise("'x' col not found in pp dataframe")
×
2387
                        if "y" not in cols:
9✔
2388
                            self.logger.lraise("'y' col not found in pp dataframe")
×
2389
                        if "zone" not in cols:
9✔
2390
                            self.logger.warn(
×
2391
                                "'zone' col not found in pp dataframe, adding generic zone"
2392
                            )
2393
                            pp_locs.loc[:, "zone"] = 1
×
2394
                        elif zone_array is not None:
9✔
2395
                            # check that all the zones in the pp df are in the zone array
2396
                            missing = []
9✔
2397
                            for uz in pp_locs.zone.unique():
9✔
2398
                                if int(uz) not in zone_array:
9✔
2399
                                    missing.append(str(uz))
×
2400
                            if len(missing) > 0:
9✔
2401
                                self.logger.lraise(
×
2402
                                    "the following pp zone values were not found in the zone array: {0}".format(
2403
                                        ",".join(missing)
2404
                                    )
2405
                                )
2406

2407
                            for uz in np.unique(zone_array):
9✔
2408
                                if uz < 1:
9✔
2409
                                    continue
9✔
2410
                                if uz not in pp_locs.zone.values:
9✔
2411

2412
                                    missing.append(str(uz))
×
2413
                            if len(missing) > 0:
9✔
2414
                                self.logger.warn(
×
2415
                                    "the following zones don't have any pilot points:{0}".format(
2416
                                        ",".join(missing)
2417
                                    )
2418
                                )
2419

2420
                if not structured and pp_locs is None:
9✔
2421
                    self.logger.lraise(
×
2422
                        "pilot point type parameters with an unstructured grid requires 'pp_space' "
2423
                        "contain explict pilot point information"
2424
                    )
2425

2426
                if not structured and zone_array is not None:
9✔
2427
                    # self.logger.lraise("'zone_array' not supported for unstructured grids and pilot points")
2428
                    if "zone" not in pp_locs.columns:
8✔
2429
                        self.logger.lraise(
×
2430
                            "'zone' not found in pp info dataframe and 'zone_array' passed"
2431
                        )
2432
                    uvals = np.unique(zone_array)
8✔
2433
                    zvals = set([int(z) for z in pp_locs.zone.tolist()])
8✔
2434
                    missing = []
8✔
2435
                    for uval in uvals:
8✔
2436
                        if int(uval) not in zvals and int(uval) != 0:
8✔
2437
                            missing.append(str(int(uval)))
×
2438
                    if len(missing) > 0:
8✔
2439
                        self.logger.warn(
×
2440
                            "the following values in the zone array were not found in the pp info: {0}".format(
2441
                                ",".join(missing)
2442
                            )
2443
                        )
2444

2445
                if geostruct is None:  # need a geostruct for pilotpoints
9✔
2446

2447
                    # can use model default, if provided
2448
                    if self.geostruct is None:  # but if no geostruct passed...
9✔
2449
                        if not structured:
9✔
2450
                            self.logger.lraise(
×
2451
                                "pilot point type parameters with an unstructured grid requires an"
2452
                                " explicit `geostruct` arg be passed to either PstFrom or add_parameters()"
2453
                            )
2454
                        self.logger.warn(
9✔
2455
                            "pp_geostruct is None,"
2456
                            "using ExpVario with contribution=1 "
2457
                            "and a=(pp_space*max(delr,delc))"
2458
                        )
2459
                        # set up a default - could probably do something better if pp locs are passed
2460
                        if not isinstance(pp_space, int):
9✔
2461
                            space = 10
9✔
2462
                        else:
2463
                            space = pp_space
9✔
2464
                        pp_dist = space * float(
9✔
2465
                            max(
2466
                                spatial_reference.delr.max(),
2467
                                spatial_reference.delc.max(),
2468
                            )
2469
                        )
2470
                        v = pyemu.geostats.ExpVario(contribution=1.0, a=pp_dist)
9✔
2471
                        pp_geostruct = pyemu.geostats.GeoStruct(
9✔
2472
                            variograms=v, name="pp_geostruct", transform=transform
2473
                        )
2474
                    else:
2475
                        pp_geostruct = self.geostruct
×
2476
                        if pp_geostruct.transform != transform:
×
2477
                            self.logger.warn(
×
2478
                                "0) Inconsistency between "
2479
                                "pp_geostruct transform and "
2480
                                "partrans."
2481
                            )
2482
                            self.logger.warn(
×
2483
                                "1) Setting pp_geostruct transform "
2484
                                "to {0}".format(transform)
2485
                            )
2486
                            self.logger.warn(
×
2487
                                "2) This will create a new copy of " "pp_geostruct"
2488
                            )
2489
                            self.logger.warn(
×
2490
                                "3) Better to pass an appropriately "
2491
                                "transformed geostruct"
2492
                            )
2493
                            pp_geostruct = copy.copy(pp_geostruct)
×
2494
                            pp_geostruct.transform = transform
×
2495
                else:
2496
                    pp_geostruct = geostruct
9✔
2497

2498
                if pp_locs is None:
9✔
2499
                    # Set up pilot points
2500

2501
                    df = pyemu.pp_utils.setup_pilotpoints_grid(
9✔
2502
                        sr=spatial_reference,
2503
                        ibound=zone_array,
2504
                        use_ibound_zones=use_pp_zones,
2505
                        prefix_dict=pp_dict,
2506
                        every_n_cell=pp_space,
2507
                        pp_dir=self.new_d,
2508
                        tpl_dir=self.tpl_d,
2509
                        shapename=str(self.new_d / "{0}.shp".format(par_name_store)),
2510
                        pp_filename_dict=pp_filename_dict,
2511
                    )
2512
                else:
2513

2514
                    df = pyemu.pp_utils.pilot_points_to_tpl(
9✔
2515
                        pp_locs,
2516
                        tpl_filename,
2517
                        pnb,
2518
                    )
2519
                df.loc[:, "pargp"] = pargp
9✔
2520
                df.set_index("parnme", drop=False, inplace=True)
9✔
2521
                # df includes most of the par info for par_dfs and also for
2522
                # relate_parfiles
2523
                self.logger.statement(
9✔
2524
                    "{0} pilot point parameters created".format(df.shape[0])
2525
                )
2526
                # should be only one group at a time
2527
                pargp = df.pargp.unique()
9✔
2528
                self.logger.statement("pilot point 'pargp':{0}".format(",".join(pargp)))
9✔
2529
                self.logger.log("setting up pilot point parameters")
9✔
2530

2531
                # Calculating pp factors
2532
                pg = pargp[0]
9✔
2533
                # this reletvively quick
2534
                ok_pp = pyemu.geostats.OrdinaryKrige(pp_geostruct, df)
9✔
2535
                # build krige reference information on the fly - used to help
2536
                # prevent unnecessary krig factor calculation
2537
                pp_info_dict = {
9✔
2538
                    "pp_data": ok_pp.point_data.loc[:, ["x", "y", "zone"]],
2539
                    "cov": ok_pp.point_cov_df,
2540
                    "zn_ar": zone_array,
2541
                    "sr": spatial_reference,
2542
                    "pstyle":par_style,
2543
                    "transform":transform
2544
                }
2545
                fac_processed = False
9✔
2546
                for facfile, info in self._pp_facs.items():  # check against
9✔
2547
                    # factors already calculated
2548
                    if (
9✔
2549
                        info["pp_data"].equals(pp_info_dict["pp_data"])
2550
                        and info["cov"].equals(pp_info_dict["cov"])
2551
                        and np.array_equal(info["zn_ar"], pp_info_dict["zn_ar"])
2552
                        and pp_info_dict["pstyle"] == info["pstyle"]
2553
                        and pp_info_dict["transform"] == info["transform"]
2554

2555
                    ):
2556
                        if type(info["sr"]) == type(spatial_reference):
9✔
2557
                            if isinstance(spatial_reference, dict):
9✔
2558
                                if len(info["sr"]) != len(spatial_reference):
×
2559
                                    continue
×
2560
                        else:
2561
                            continue
×
2562

2563
                        fac_processed = True  # don't need to re-calc same factors
9✔
2564
                        fac_filename = facfile  # relate to existing fac file
9✔
2565
                        self.logger.statement("reusing factors")
9✔
2566
                        break
9✔
2567
                if not fac_processed:
9✔
2568
                    # TODO need better way of naming sequential fac_files?
2569
                    self.logger.log("calculating factors for pargp={0}".format(pg))
9✔
2570
                    fac_filename = self.new_d / "{0}pp.fac".format(par_name_store)
9✔
2571
                    var_filename = fac_filename.with_suffix(".var.dat")
9✔
2572
                    self.logger.statement(
9✔
2573
                        "saving krige variance file:{0}".format(var_filename)
2574
                    )
2575
                    self.logger.statement(
9✔
2576
                        "saving krige factors file:{0}".format(fac_filename)
2577
                    )
2578
                    # store info on pilotpoints
2579
                    self._pp_facs[fac_filename] = pp_info_dict
9✔
2580
                    # this is slow (esp on windows) so only want to do this
2581
                    # when required
2582
                    if structured:
9✔
2583
                        ok_pp.calc_factors_grid(
9✔
2584
                            spatial_reference,
2585
                            var_filename=var_filename,
2586
                            zone_array=zone_array,
2587
                            num_threads=10,
2588
                        )
2589
                        ok_pp.to_grid_factors_file(fac_filename)
9✔
2590
                    else:
2591
                        # put the sr dict info into a df
2592
                        # but we only want to use the n
2593
                        if zone_array is not None:
8✔
2594
                            for zone in np.unique(zone_array):
8✔
2595
                                if int(zone) == 0:
8✔
2596
                                    continue
8✔
2597

2598
                                data = []
8✔
2599
                                for node, (x, y) in spatial_reference.items():
8✔
2600
                                    if zone_array[0, node] == zone:
8✔
2601
                                        data.append([node, x, y])
8✔
2602
                                if len(data) == 0:
8✔
2603
                                    continue
×
2604
                                node_df = pd.DataFrame(data, columns=["node", "x", "y"])
8✔
2605
                                ok_pp.calc_factors(
8✔
2606
                                    node_df.x,
2607
                                    node_df.y,
2608
                                    num_threads=1,
2609
                                    pt_zone=zone,
2610
                                    idx_vals=node_df.node.astype(int),
2611
                                )
2612
                            ok_pp.to_grid_factors_file(
8✔
2613
                                fac_filename, ncol=len(spatial_reference)
2614
                            )
2615
                        else:
2616
                            data = []
×
2617
                            for node, (x, y) in spatial_reference.items():
×
2618
                                data.append([node, x, y])
×
2619
                            node_df = pd.DataFrame(data, columns=["node", "x", "y"])
×
2620
                            ok_pp.calc_factors(node_df.x, node_df.y, num_threads=10)
×
2621
                            ok_pp.to_grid_factors_file(
×
2622
                                fac_filename, ncol=node_df.shape[0]
2623
                            )
2624

2625
                    self.logger.log("calculating factors for pargp={0}".format(pg))
9✔
2626
            # TODO - other par types - JTW?
2627
            elif par_type == "kl":
×
2628
                self.logger.lraise("array type 'kl' not implemented")
×
2629
            else:
2630
                self.logger.lraise(
×
2631
                    "unrecognized 'par_type': '{0}', "
2632
                    "should be in "
2633
                    "['constant','zone','grid','pilotpoints',"
2634
                    "'kl'"
2635
                )
2636
            self.logger.log(
9✔
2637
                "writing array-based template file " "'{0}'".format(tpl_filename)
2638
            )
2639

2640
        if datetime is not None:
9✔
2641
            # add time info to par_dfs
2642
            df["datetime"] = datetime
9✔
2643
            df["timedelta"] = t_offest
9✔
2644
        # accumulate information that relates mult_files (set-up here and
2645
        # eventually filled by PEST) to actual model files so that actual
2646
        # model input file can be generated
2647
        # (using helpers.apply_list_and_array_pars())
2648
        zone_filename = None
9✔
2649
        if zone_array is not None and zone_array.ndim < 3:
9✔
2650
            # zone_filename = tpl_filename.replace(".tpl",".zone")
2651
            zone_filename = Path(str(tpl_filename).replace(".tpl", ".zone"))
9✔
2652
            self.logger.statement(
9✔
2653
                "saving zone array {0} for tpl file {1}".format(
2654
                    zone_filename, tpl_filename
2655
                )
2656
            )
2657
            np.savetxt(zone_filename, zone_array, fmt="%4d")
9✔
2658
            zone_filename = zone_filename.name
9✔
2659

2660
        relate_parfiles = []
9✔
2661
        for mod_file, pdf in file_dict.items():
9✔
2662
            mult_dict = {
9✔
2663
                "org_file": Path(self.original_file_d.name, mod_file.name),
2664
                "model_file": mod_file,
2665
                "use_cols": use_cols,
2666
                "index_cols": index_cols,
2667
                "fmt": fmt_dict[mod_file],
2668
                "sep": sep_dict[mod_file],
2669
                "head_rows": skip_dict[mod_file],
2670
                "upper_bound": ult_ubound,
2671
                "lower_bound": ult_lbound,
2672
                "operator": par_style,
2673
            }
2674
            if nxs:
9✔
2675
                mult_dict["chkpar"] = nxs[mod_file]
9✔
2676
            if par_style in ["m", "a"]:
9✔
2677
                mult_dict["mlt_file"] = Path(self.mult_file_d.name, mlt_filename)
9✔
2678

2679
            if pp_filename is not None:
9✔
2680
                # if pilotpoint need to store more info
2681
                assert fac_filename is not None, "missing pilot-point input filename"
9✔
2682
                mult_dict["fac_file"] = os.path.relpath(fac_filename, self.new_d)
9✔
2683
                mult_dict["pp_file"] = pp_filename
9✔
2684
                if transform == "log":
9✔
2685
                    mult_dict["pp_fill_value"] = 1.0
9✔
2686
                    mult_dict["pp_lower_limit"] = 1.0e-30
9✔
2687
                    mult_dict["pp_upper_limit"] = 1.0e30
9✔
2688
                else:
2689
                    mult_dict["pp_fill_value"] = 0.0
×
2690
                    mult_dict["pp_lower_limit"] = -1.0e30
×
2691
                    mult_dict["pp_upper_limit"] = 1.0e30
×
2692
            if zone_filename is not None:
9✔
2693
                mult_dict["zone_file"] = zone_filename
9✔
2694
            relate_parfiles.append(mult_dict)
9✔
2695
        relate_pars_df = pd.DataFrame(relate_parfiles)
9✔
2696
        # store on self for use in pest build etc
2697
        self._parfile_relations.append(relate_pars_df)
9✔
2698

2699
        # add cols required for pst.parameter_data
2700
        df.loc[:, "partype"] = par_type
9✔
2701
        df.loc[:, "partrans"] = transform
9✔
2702
        df.loc[:, "parubnd"] = upper_bound
9✔
2703
        df.loc[:, "parlbnd"] = lower_bound
9✔
2704
        if par_style != "d":
9✔
2705
            df.loc[:, "parval1"] = initial_value
9✔
2706
        # df.loc[:,"tpl_filename"] = tpl_filename
2707

2708
        # store tpl --> in filename pair
2709
        self.tpl_filenames.append(get_relative_filepath(self.new_d, tpl_filename))
9✔
2710
        self.input_filenames.append(in_filepst)
9✔
2711
        for file_name in file_dict.keys():
9✔
2712
            # store mult --> original file pairs
2713
            self.org_files.append(file_name)
9✔
2714
            self.mult_files.append(mlt_filename)
9✔
2715

2716
        # add pars to par_data list BH: is this what we want?
2717
        # - BH: think we can get away with dropping duplicates?
2718
        missing = set(par_data_cols) - set(df.columns)
9✔
2719
        for field in missing:  # fill missing pst.parameter_data cols with defaults
9✔
2720
            df[field] = pyemu.pst_utils.pst_config["par_defaults"][field]
9✔
2721
        df = df.drop_duplicates(subset="parnme")  # drop pars that appear multiple times
9✔
2722
        # df = df.loc[:, par_data_cols]  # just storing pst required cols
2723
        # - need to store more for cov builder (e.g. x,y)
2724
        # TODO - check when self.par_dfs gets used
2725
        #  if constructing struct_dict here....
2726
        #  - possibly not necessary to store
2727
        # only add pardata for new parameters
2728
        # some parameters might already be in a par_df if they occur
2729
        # in more than one instance (layer, for example)
2730
        new_parnmes = set(df['parnme']).difference(self.unique_parnmes)
9✔
2731
        df = df.loc[df['parnme'].isin(new_parnmes)].copy()
9✔
2732
        self.par_dfs.append(df)
9✔
2733
        self.unique_parnmes.update(new_parnmes)
9✔
2734
        # pivot df to list of df per par group in this call
2735
        # (all groups will be related to same geostruct)
2736
        # TODO maybe use different marker to denote a relationship between pars
2737
        #  at the moment relating pars using common geostruct and pargp but may
2738
        #  want to reserve pargp for just PEST
2739
        if "covgp" not in df.columns:
9✔
2740
            gp_dict = {g: [d] for g, d in df.groupby("pargp")}
9✔
2741
        else:
2742
            gp_dict = {g: [d] for g, d in df.groupby("covgp")}
9✔
2743
        # df_list = [d for g, d in df.groupby('pargp')]
2744
        if geostruct is not None and (
9✔
2745
            par_type.lower() not in ["constant", "zone"] or datetime is not None
2746
        ):
2747
            # relating pars to geostruct....
2748
            if geostruct not in self.par_struct_dict.keys():
9✔
2749
                # add new geostruct
2750
                self.par_struct_dict[geostruct] = gp_dict
9✔
2751
                # self.par_struct_dict_l[geostruct] = list(gp_dict.values())
2752
            else:
2753
                # append group to appropriate key associated with this geostruct
2754
                # this is how pars setup with different calls are collected
2755
                # so their correlations can be tracked
2756
                for gp, gppars in gp_dict.items():
9✔
2757
                    # if group not already set up
2758
                    if gp not in self.par_struct_dict[geostruct].keys():
9✔
2759
                        # update dict entry with new {key:par} pair
2760
                        self.par_struct_dict[geostruct].update({gp: gppars})
9✔
2761
                    else:
2762
                        # if gp already assigned to this geostruct append par
2763
                        # list to approprate group key
2764
                        self.par_struct_dict[geostruct][gp].extend(gppars)
9✔
2765
                # self.par_struct_dict_l[geostruct].extend(list(gp_dict.values()))
2766
        else:  # TODO some rules for if geostruct is not passed....
2767
            if "x" in df.columns:
9✔
2768
                pass
4✔
2769
                #  TODO warn that it looks like spatial pars but no geostruct?
2770
            # if self.geostruct is not None:
2771
            #     geostruct = self.geostruct
2772
            # elif pp_geostruct is not None:
2773
            #     geostruct = pp_geostruct
2774
            # else:
2775
            #     TODO - do we need an error or warning and define a default?
2776
            #     options:
2777
            # if spatial_reference is None:
2778
            #     spatial_reference = self.spatial_reference  # TODO placeholder for now. but this needs improving, sr and self.sr might be None
2779
            # dist = 10 * float(
2780
            #             max(spatial_reference.delr.max(),
2781
            #                 spatial_reference.delc.max()))
2782
            # v = pyemu.geostats.ExpVario(contribution=1.0, a=dist)
2783
            # geostruct = pyemu.geostats.GeoStruct(
2784
            #     variograms=v)
2785
            # temporal default:
2786
            # v = pyemu.geostats.ExpVario(contribution=1.0, a=180.0)  # 180 correlation length
2787
            # geostruct = pyemu.geostats.GeoStruct(
2788
            #     variograms=v)
2789

2790
        self.logger.log(
9✔
2791
            "adding {0} type {1} style parameters for file(s) {2}".format(
2792
                par_type, par_style, [str(f) for f in filenames]
2793
            )
2794
        )
2795

2796
        if rebuild_pst:  # may want to just update pst and rebuild
9✔
2797
            # (with new relations)
2798
            if self.pst is not None:
8✔
2799
                self.logger.log("Adding pars to control file " "and rewriting pst")
8✔
2800
                self.build_pst(filename=self.pst.filename, update="pars")
8✔
2801
            else:
2802
                self.build_pst(filename=self.pst.filename, update=False)
×
2803
                self.logger.warn(
×
2804
                    "pst object not available, " "new control file will be written"
2805
                )
2806
        return df
9✔
2807

2808
    def _load_listtype_file(
9✔
2809
        self, filename, index_cols, use_cols, fmt=None, sep=None, skip=None, c_char=None
2810
    ):
2811
        if isinstance(filename, list):
9✔
2812
            assert len(filename) == 1
9✔
2813
            filename = filename[0]  # should only ever be one passed
9✔
2814
        if isinstance(fmt, list):
9✔
2815
            assert len(fmt) == 1
9✔
2816
            fmt = fmt[0]
9✔
2817
        if isinstance(sep, list):
9✔
2818
            assert len(sep) == 1
9✔
2819
            sep = sep[0]
9✔
2820
        if isinstance(skip, list):
9✔
2821
            assert len(skip) == 1
9✔
2822
            skip = skip[0]
9✔
2823

2824
        # trying to use use_cols and index_cols to work out whether to
2825
        # read header from csv.
2826
        # either use_cols or index_cols could still be None
2827
        # -- case of both being None should already have been caught
2828
        # but index_cols could still be None...
2829

2830
        check_args = [a for a in [index_cols, use_cols] if a is not None]
9✔
2831
        # `a` should be list if it is not None
2832
        if all(isinstance(a[0], str) for a in check_args):
9✔
2833
            # index_cols can be from header str
2834
            header = 0  # will need to read a header
9✔
2835
        elif all(isinstance(a[0], int) for a in check_args):
9✔
2836
            # index_cols are column numbers in input file
2837
            header = None
9✔
2838
        else:
2839
            if len(check_args) > 1:
×
2840
                #  implies neither are None but they either both are not str,int
2841
                #  or are different
2842
                self.logger.lraise(
×
2843
                    "unrecognized type for index_cols or use_cols "
2844
                    "should be str or int and both should be of the "
2845
                    "same type, not {0} or {1}".format(
2846
                        *[str(type(a[0])) for a in check_args]
2847
                    )
2848
                )
2849
            else:
2850
                # implies not correct type
2851
                self.logger.lraise(
×
2852
                    "unrecognized type for either index_cols or use_cols "
2853
                    "should be str or int, not {0}".format(type(check_args[0][0]))
2854
                )
2855

2856
        # checking no overlap between index_cols and use_cols
2857
        if len(check_args) > 1:
9✔
2858
            si = set(index_cols)
9✔
2859
            su = set(use_cols)
9✔
2860

2861
            i = si.intersection(su)
9✔
2862
            if len(i) > 0:
9✔
2863
                self.logger.lraise(f"use_cols also listed in index_cols: {str(i)}")
×
2864

2865
        file_path = self.new_d / filename
9✔
2866
        if not os.path.exists(file_path):
9✔
2867
            self.logger.lraise(f"par/obs filename '{file_path}' not found ")
×
2868
        self.logger.log(f"reading list-style file: {file_path}")
9✔
2869
        if fmt.lower() == "free":
9✔
2870
            if sep is None:
9✔
2871
                sep = r"\s+"
9✔
2872
                if Path(filename).suffix == ".csv":
9✔
2873
                    sep = ","
9✔
2874
        else:
2875
            # TODO support reading fixed-format
2876
            #  (based on value of fmt passed)
2877
            #  ... or not?
2878
            self.logger.warn(
8✔
2879
                "0) Only reading free format list-style par "
2880
                "files currently supported."
2881
            )
2882
            self.logger.warn("1) Assuming safe to read as whitespace delim.")
8✔
2883
            self.logger.warn("2) Desired format string will still be passed through")
8✔
2884
            sep = r"\s+"
8✔
2885
        try:
9✔
2886
            # read each input file
2887
            if skip > 0 or c_char is not None:
9✔
2888
                if c_char is None:
8✔
2889
                    with open(file_path, "r") as fp:
8✔
2890
                        storehead = {
8✔
2891
                            lp: line for lp, line in enumerate(fp) if lp < skip
2892
                        }
2893
                else:
2894
                    with open(file_path, "r") as fp:
8✔
2895
                        storehead = {
8✔
2896
                            lp: line
2897
                            for lp, line in enumerate(fp)
2898
                            if lp < skip or line.strip().startswith(c_char)
2899
                        }
2900
            else:
2901
                storehead = {}
9✔
2902
        except TypeError:
8✔
2903
            c_char = skip
8✔
2904
            skip = None
8✔
2905
            with open(file_path, "r") as fp:
8✔
2906
                storehead = {
8✔
2907
                    lp: line
2908
                    for lp, line in enumerate(fp)
2909
                    if line.strip().startswith(c_char)
2910
                }
2911
        df = pd.read_csv(
9✔
2912
            file_path,
2913
            comment=c_char,
2914
            sep=sep,
2915
            skiprows=skip,
2916
            header=header,
2917
            low_memory=False,
2918
            dtype='object'
2919
        )
2920
        self.logger.log(f"reading list-style file: {file_path}")
9✔
2921
        # ensure that column ids from index_col is in input file
2922
        missing = []
9✔
2923
        for index_col in index_cols:
9✔
2924
            if index_col not in df.columns:
9✔
2925
                missing.append(index_col)
×
2926
            # df.loc[:, index_col] = df.loc[:, index_col].astype(np.int64) # TODO int? why?
2927
        if len(missing) > 0:
9✔
2928
            self.logger.lraise(
×
2929
                "the following index_cols were not "
2930
                "found in file '{0}':{1}"
2931
                "".format(file_path, str(missing))
2932
            )
2933
        # ensure requested use_cols are in input file
2934
        if use_cols is not None:
9✔
2935
            for use_col in use_cols:
9✔
2936
                if use_col not in df.columns:
9✔
2937
                    missing.append(use_cols)
×
2938
        if len(missing) > 0:
9✔
2939
            self.logger.lraise(
×
2940
                "the following use_cols were not found "
2941
                "in file '{0}':{1}"
2942
                "".format(file_path, str(missing))
2943
            )
2944
        return df, storehead, sep
9✔
2945

2946
    def _prep_arg_list_lengths(
9✔
2947
        self,
2948
        filenames,
2949
        fmts=None,
2950
        seps=None,
2951
        skip_rows=None,
2952
        index_cols=None,
2953
        use_cols=None,
2954
    ):
2955
        """
2956
        Private wrapper function to align filenames, formats, delimiters,
2957
        reading options and setup columns for passing sequentially to
2958
        load_listtype
2959
        Args:
2960
            filenames (`str`) or (`list`): names for files ot eventually read
2961
            fmts (`str`) or (`list`): of column formaters for input file.
2962
                If `None`, free-formatting is assumed
2963
            seps (`str`) or (`list`): column separator free formatter files.
2964
                If `None`, a list of `None`s is returned and the delimiter
2965
                is eventually governed by the file extension (`,` for .csv)
2966
            skip_rows (`str`) or (`list`): Number of rows in file header to not
2967
                form part of the dataframe
2968
            index_cols (`int`) or (`list`): Columns in tabular file to use as indicies
2969
            use_cols (`int`) or (`list`): Columns in tabular file to
2970
                use as par or obs cols
2971
        Returns:
2972
            algined lists of:
2973
            filenames, fmts, seps, skip_rows, index_cols, use_cols
2974
            for squentially passing to `_load_listtype_file()`
2975

2976
        """
2977
        if not isinstance(filenames, list):
9✔
2978
            filenames = [filenames]
9✔
2979
        if fmts is None:
9✔
2980
            fmts = ["free" for _ in filenames]
9✔
2981
        if not isinstance(fmts, list):
9✔
2982
            fmts = [fmts]
9✔
2983
        if len(fmts) != len(filenames):
9✔
2984
            self.logger.warn(
9✔
2985
                "Discrepancy between number of filenames ({0}) "
2986
                "and number of formatter strings ({1}). "
2987
                "Will repeat first ({2})"
2988
                "".format(len(filenames), len(fmts), fmts[0])
2989
            )
2990
            fmts = [fmts[0] for _ in filenames]
9✔
2991
        fmts = ["free" if fmt is None else fmt for fmt in fmts]
9✔
2992
        if seps is None:
9✔
2993
            seps = [None for _ in filenames]
9✔
2994
        if not isinstance(seps, list):
9✔
2995
            seps = [seps]
8✔
2996
        if len(seps) != len(filenames):
9✔
2997
            self.logger.warn(
×
2998
                "Discrepancy between number of filenames ({0}) "
2999
                "and number of seps defined ({1}). "
3000
                "Will repeat first ({2})"
3001
                "".format(len(filenames), len(seps), seps[0])
3002
            )
3003
            seps = [seps[0] for _ in filenames]
×
3004
        if skip_rows is None:
9✔
3005
            skip_rows = [None for _ in filenames]
9✔
3006
        if not isinstance(skip_rows, list):
9✔
3007
            skip_rows = [skip_rows]
8✔
3008
        if len(skip_rows) != len(filenames):
9✔
3009
            self.logger.warn(
×
3010
                "Discrepancy between number of filenames ({0}) "
3011
                "and number of skip_rows defined ({1}). "
3012
                "Will repeat first ({2})"
3013
                "".format(len(filenames), len(skip_rows), skip_rows[0])
3014
            )
3015
            skip_rows = [skip_rows[0] for _ in filenames]
×
3016
        skip_rows = [0 if s is None else s for s in skip_rows]
9✔
3017

3018
        if index_cols is None and use_cols is not None:
9✔
3019
            self.logger.lraise(
×
3020
                "index_cols is None, but use_cols is not ({0})" "".format(str(use_cols))
3021
            )
3022

3023
        if index_cols is not None:
9✔
3024
            if not isinstance(index_cols, list):
9✔
3025
                index_cols = [index_cols]
9✔
3026
        if use_cols is not None:
9✔
3027
            if not isinstance(use_cols, list):
9✔
3028
                use_cols = [use_cols]
8✔
3029
        return filenames, fmts, seps, skip_rows, index_cols, use_cols
9✔
3030

3031

3032
def write_list_tpl(
9✔
3033
    filenames,
3034
    dfs,
3035
    name,
3036
    tpl_filename,
3037
    index_cols,
3038
    par_type,
3039
    use_cols=None,
3040
    use_rows=None,
3041
    suffix="",
3042
    zone_array=None,
3043
    gpname=None,
3044
    get_xy=None,
3045
    ij_in_idx=None,
3046
    xy_in_idx=None,
3047
    zero_based=True,
3048
    input_filename=None,
3049
    par_style="m",
3050
    headerlines=None,
3051
    fill_value=1.0,
3052
    logger=None,
3053
):
3054
    """Write template files for a list style input.
3055

3056
    Args:
3057
        filenames (`str` of `container` of `str`): original input filenames
3058
        dfs (`pandas.DataFrame` or `container` of pandas.DataFrames): pandas
3059
            representations of input file.
3060
        name (`str` or container of str): parameter name prefixes.
3061
            If more that one column to be parameterised, must be a container
3062
            of strings providing the prefix for the parameters in the
3063
            different columns.
3064
        tpl_filename (`str`): Path (from current execution directory)
3065
            for desired template file
3066
        index_cols (`list`): column names to use as indices in tabular
3067
            input dataframe
3068
        par_type (`str`): 'constant','zone', or 'grid' used in parname
3069
            generation. If `constant`, one par is set up for each `use_cols`.
3070
            If `zone`, one par is set up for each zone for each `use_cols`.
3071
            If `grid`, one par is set up for every unique index combination
3072
            (from `index_cols`) for each `use_cols`.
3073
        use_cols (`list`): Columns in tabular input file to paramerterise.
3074
            If None, pars are set up for all columns apart from index cols.
3075
        use_rows (`list` of `int` or `tuple`): Setup parameters for only
3076
            specific rows in list-style model input file.
3077
            If list of `int` -- assumed to be a row index selction (zero-based).
3078
            If list of `tuple` -- assumed to be selection based `index_cols`
3079
                values. e.g. [(3,5,6)] would attempt to set parameters where the
3080
                model file values for 3 `index_cols` are 3,5,6. N.B. values in
3081
                tuple are actual model file entry values.
3082
            If no rows in the model input file match `use_rows` -- parameters
3083
                will be set up for all rows.
3084
            Only valid/effective if index_cols is not None.
3085
            Default is None -- setup parameters for all rows.
3086
        suffix (`str`): Optional par name suffix
3087
        zone_array (`np.ndarray`): Array defining zone divisions.
3088
            If not None and `par_type` is `grid` or `zone` it is expected that
3089
            `index_cols` provide the indicies for
3090
            querying `zone_array`. Therefore, array dimension should equal
3091
            `len(index_cols)`.
3092
        get_xy (`pyemu.PstFrom` method): Can be specified to get real-world xy
3093
            from `index_cols` passed (to assist correlation definition)
3094
        ij_in_idx (`list` or `array`): defining which `index_cols` contain i,j
3095
        xy_in_idx (`list` or `array`): defining which `index_cols` contain x,y
3096
        zero_based (`boolean`): IMPORTANT - pass as False if `index_cols`
3097
            are NOT zero-based indicies (e.g. MODFLOW row/cols).
3098
            If False 1 with be subtracted from `index_cols`.
3099
        input_filename (`str`): Path to input file (paired with tpl file)
3100
        par_style (`str`): either 'd','a', or 'm'
3101
        headerlines ([`str`]): optional header lines in the original model file, used for
3102
            direct style parameters
3103
    Returns:
3104
        `pandas.DataFrame`: dataframe with info for the new parameters
3105

3106
    Note:
3107
        This function is called by `PstFrom` programmatically
3108

3109
    """
3110
    # get dataframe with autogenerated parnames based on `name`, `index_cols`,
3111
    # `use_cols`, `suffix` and `par_type`
3112
    if par_style == "d":
9✔
3113
        df_tpl, nxs = _write_direct_df_tpl(
8✔
3114
            filenames[0],
3115
            tpl_filename,
3116
            dfs[0],
3117
            name,
3118
            index_cols,
3119
            par_type,
3120
            use_cols=use_cols,
3121
            use_rows=use_rows,
3122
            suffix=suffix,
3123
            gpname=gpname,
3124
            zone_array=zone_array,
3125
            get_xy=get_xy,
3126
            ij_in_idx=ij_in_idx,
3127
            xy_in_idx=xy_in_idx,
3128
            zero_based=zero_based,
3129
            headerlines=headerlines,
3130
            logger=logger,
3131
        )
3132
    else:
3133
        df_tpl = _get_tpl_or_ins_df(
9✔
3134
            dfs,
3135
            name,
3136
            index_cols,
3137
            par_type,
3138
            use_cols=use_cols,
3139
            suffix=suffix,
3140
            gpname=gpname,
3141
            zone_array=zone_array,
3142
            get_xy=get_xy,
3143
            ij_in_idx=ij_in_idx,
3144
            xy_in_idx=xy_in_idx,
3145
            zero_based=zero_based,
3146
            par_fill_value=fill_value,
3147
            par_style=par_style,
3148
        )
3149
        idxs = [[tuple(s) for s in df.loc[:, index_cols].values] for df in dfs]
9✔
3150
        use_rows, nxs = _get_use_rows(
9✔
3151
            df_tpl, idxs, use_rows, zero_based, tpl_filename, logger=logger
3152
        )
3153
        df_tpl = df_tpl.loc[use_rows, :]  # direct pars done in direct function
9✔
3154
        # can we just slice df_tpl here
3155
    for col in use_cols:  # corellations flagged using pargp
9✔
3156
        df_tpl["covgp{0}".format(col)] = df_tpl.loc[:, "pargp{0}".format(col)].values
9✔
3157
    # needs modifying if colocated pars in same group
3158
    if par_type == "grid" and "x" in df_tpl.columns:
9✔
3159
        if df_tpl.duplicated(["x", "y"]).any():
8✔
3160
            # may need to use a different grouping for parameter correlations
3161
            # where parameter x and y values are the same but pars are not
3162
            # correlated (e.g. 2d correlation but different layers)
3163
            # - this will only work if `index_cols` contains a third dimension.
3164
            if len(index_cols) > 2:
8✔
3165
                third_d = index_cols.copy()
8✔
3166
                if xy_in_idx is not None:
8✔
3167
                    for idx in xy_in_idx:
×
3168
                        third_d.remove(index_cols[idx])
×
3169
                elif ij_in_idx is not None:
8✔
3170
                    for idx in ij_in_idx:
×
3171
                        third_d.remove(index_cols[idx])
×
3172
                else:  # if xy_in_idx and ij_in_idx ar None
3173
                    # then parse_kij assumes that i is at idx[-2] and j at idx[-1]
3174
                    third_d.pop()  # pops -1
8✔
3175
                    third_d.pop()  # pops -2
8✔
3176
                msg = (
8✔
3177
                    "Coincidently located pars in list-style file, "
3178
                    "attempting to separate pars based on `index_cols` "
3179
                    f"passed - using index_col[{third_d[-1]}] "
3180
                    f"for third dimension"
3181
                )
3182
                if logger is not None:
8✔
3183
                    logger.warn(msg)
8✔
3184
                else:
3185
                    PyemuWarning(msg)
×
3186
                for col in use_cols:
8✔
3187
                    df_tpl["covgp{0}".format(col)] = df_tpl.loc[
8✔
3188
                        :, "covgp{0}".format(col)
3189
                    ].str.cat(
3190
                        pd.DataFrame(df_tpl.sidx.to_list()).iloc[:, 0].astype(str),
3191
                        "_cov",
3192
                    )
3193
            else:
3194
                msg = (
×
3195
                    "Coincidently located pars in list-style file. "
3196
                    "Likely to cause issues building par cov or "
3197
                    "drawing par ensemble. Can be resolved by passing "
3198
                    "an additional `index_col` as a basis for "
3199
                    "splitting colocated correlations (e.g. Layer)"
3200
                )
3201
                if logger is not None:
×
3202
                    logger.warn(msg)
×
3203
                else:
3204
                    PyemuWarning(msg)
×
3205
    # pull out par details where multiple `use_cols` are requested
3206
    parnme = list(df_tpl.loc[:, use_cols].values.flatten())
9✔
3207
    pargp = list(
9✔
3208
        df_tpl.loc[:, ["pargp{0}".format(col) for col in use_cols]].values.flatten()
3209
    )
3210

3211
    covgp = list(
9✔
3212
        df_tpl.loc[:, ["covgp{0}".format(col) for col in use_cols]].values.flatten()
3213
    )
3214
    df_tpl = df_tpl.drop(
9✔
3215
        [col for col in df_tpl.columns if str(col).startswith("covgp")], axis=1
3216
    )
3217
    df_par = pd.DataFrame(
9✔
3218
        {"parnme": parnme, "pargp": pargp, "covgp": covgp}, index=parnme
3219
    )
3220

3221
    parval_cols = [c for c in df_tpl.columns if "parval1" in str(c)]
9✔
3222
    parval = df_tpl.loc[:, parval_cols].values.flatten().tolist()
9✔
3223

3224
    if (
9✔
3225
        par_type == "grid" and "x" in df_tpl.columns
3226
    ):  # TODO work out if x,y needed for constant and zone pars too
3227
        df_par["x"], df_par["y"] = np.concatenate(
8✔
3228
            df_tpl.apply(lambda r: [[r.x, r.y] for _ in use_cols], axis=1).values
3229
        ).T
3230
    for use_col in use_cols:
9✔
3231
        df_tpl.loc[:, use_col] = df_tpl.loc[:, use_col].apply(
9✔
3232
            lambda x: "~  {0}  ~".format(x)
3233
        )
3234
    if par_style in ["m", "a"]:
9✔
3235
        pyemu.helpers._write_df_tpl(
9✔
3236
            filename=tpl_filename, df=df_tpl, sep=",", tpl_marker="~"
3237
        )
3238

3239
        if input_filename is not None:
9✔
3240
            df_in = df_tpl.copy()
9✔
3241
            df_in.loc[:, use_cols] = fill_value
9✔
3242
            df_in.to_csv(input_filename)
9✔
3243
    df_par.loc[:, "tpl_filename"] = tpl_filename
9✔
3244
    df_par.loc[:, "input_filename"] = input_filename
9✔
3245
    df_par.loc[:, "parval1"] = parval
9✔
3246
    return df_par, nxs
9✔
3247

3248

3249
def _write_direct_df_tpl(
9✔
3250
    in_filename,
3251
    tpl_filename,
3252
    df,
3253
    name,
3254
    index_cols,
3255
    typ,
3256
    use_cols=None,
3257
    use_rows=None,
3258
    suffix="",
3259
    zone_array=None,
3260
    get_xy=None,
3261
    ij_in_idx=None,
3262
    xy_in_idx=None,
3263
    zero_based=True,
3264
    gpname=None,
3265
    headerlines=None,
3266
    logger=None,
3267
):
3268

3269
    """
3270
    Private method to auto-generate parameter or obs names from tabular
3271
    model files (input or output) read into pandas dataframes
3272
    Args:
3273
        tpl_filename (`str` ): template filename
3274
        df (`pandas.DataFrame`): DataFrame of list-style input file
3275
        name (`str`): Parameter name prefix
3276
        index_cols (`str` or `list`): columns of dataframes to use as indicies
3277
        typ (`str`): 'constant','zone', or 'grid' used in parname generation.
3278
            If `constant`, one par is set up for each `use_cols`.
3279
            If `zone`, one par is set up for each zone for each `use_cols`.
3280
            If `grid`, one par is set up for every unique index combination
3281
            (from `index_cols`) for each `use_cols`.
3282
        use_cols (`list`): Columns to parameterise. If None, pars are set up
3283
            for all columns apart from index cols
3284
        suffix (`str`): Optional par name suffix.
3285
        zone_array (`np.ndarray`): Array defining zone divisions.
3286
            If not None and `par_type` is `grid` or `zone` it is expected that
3287
            `index_cols` provide the indicies for querying `zone_array`.
3288
            Therefore, array dimension should equal `len(index_cols)`.
3289
        get_xy (`pyemu.PstFrom` method): Can be specified to get real-world xy
3290
            from `index_cols` passed (to include in obs/par name)
3291
        ij_in_idx (`list` or `array`): defining which `index_cols` contain i,j
3292
        xy_in_idx (`list` or `array`): defining which `index_cols` contain x,y
3293
        zero_based (`boolean`): IMPORTANT - pass as False if `index_cols`
3294
            are NOT zero-based indicies (e.g. MODFLOW row/cols).
3295
            If False 1 with be subtracted from `index_cols`.
3296

3297
    Returns:
3298
        pandas.DataFrame with paranme and pargroup define for each `use_col`
3299

3300
    Note:
3301
        This function is called by `PstFrom` programmatically
3302

3303
    """
3304
    # TODO much of this duplicates what is in _get_tpl_or_ins_df() -- could posssibly be consolidated
3305
    # work out the union of indices across all dfs
3306

3307
    sidx = []
8✔
3308

3309
    didx = df.loc[:, index_cols].apply(tuple, axis=1)
8✔
3310
    sidx.extend(didx)
8✔
3311

3312
    df_ti = pd.DataFrame({"sidx": sidx}, columns=["sidx"])
8✔
3313
    inames, fmt = _get_index_strfmt(index_cols)
8✔
3314
    df_ti = _get_index_strings(df_ti, fmt, zero_based)
8✔
3315
    df_ti = _getxy_from_idx(df_ti, get_xy, xy_in_idx, ij_in_idx)
8✔
3316

3317
    df_ti, direct_tpl_df = _build_parnames(
8✔
3318
        df_ti,
3319
        typ,
3320
        zone_array,
3321
        index_cols,
3322
        use_cols,
3323
        name,
3324
        gpname,
3325
        suffix,
3326
        par_style="d",
3327
        init_df=df,
3328
        init_fname=in_filename,
3329
    )
3330
    idxs = [tuple(s) for s in df.loc[:, index_cols].values]
8✔
3331
    use_rows, nxs = _get_use_rows(
8✔
3332
        df_ti, [idxs], use_rows, zero_based, tpl_filename, logger=logger
3333
    )
3334
    df_ti = df_ti.loc[use_rows]
8✔
3335
    not_rows = ~direct_tpl_df.index.isin(use_rows)
8✔
3336
    direct_tpl_df.loc[not_rows] = df.loc[not_rows, direct_tpl_df.columns]
8✔
3337
    if isinstance(direct_tpl_df.columns[0], str):
8✔
3338
        header = True
8✔
3339
    else:
3340
        header = False
8✔
3341
    pyemu.helpers._write_df_tpl(
8✔
3342
        tpl_filename, direct_tpl_df, index=False, header=header, headerlines=headerlines
3343
    )
3344
    return df_ti, nxs
8✔
3345

3346

3347
def _get_use_rows(tpldf, idxcolvals, use_rows, zero_based, fnme, logger=None):
9✔
3348
    """
3349
    private function to get use_rows index within df based on passed use_rows
3350
    option, which could be in various forms...
3351
    Args:
3352
        tpldf:
3353
        idxcolvals:
3354
        use_rows:
3355
        zero_based:
3356
        fname:
3357
        logger:
3358

3359
    Returns:
3360

3361
    """
3362
    if use_rows is None:
9✔
3363
        use_rows = tpldf.index
9✔
3364
        nxs = [len(set(idx)) for idx in idxcolvals]
9✔
3365
        return use_rows, nxs
9✔
3366
    if np.ndim(use_rows) == 0:
8✔
3367
        use_rows = [use_rows]
×
3368
    if np.ndim(use_rows) == 1:  # assume we have collection of int that describe iloc
8✔
3369
        use_rows = [idx[i] for i in use_rows for idx in idxcolvals]
8✔
3370
    else:
3371
        use_rows = [tuple(r) for r in use_rows]
8✔
3372
    nxs = [len(set(use_rows).intersection(idx)) for idx in idxcolvals]
8✔
3373
    orig_use_rows = use_rows.copy()
8✔
3374
    if not zero_based:  # assume passed indicies are 1 based
8✔
3375
        use_rows = [
8✔
3376
            tuple([i - 1 if isinstance(i, (int, np.integer)) else i for i in r])
3377
            if not isinstance(r, str)
3378
            else r
3379
            for r in use_rows
3380
        ]
3381
    use_rows = set(use_rows)
8✔
3382
    sel = tpldf.sidx.isin(use_rows) | tpldf.idx_strs.isin(use_rows)
8✔
3383
    if not sel.any():  # use_rows must be ints
8✔
3384
        inidx = list(use_rows.intersection(tpldf.index))
8✔
3385
        missing = use_rows.difference(tpldf.index)
8✔
3386
        use_rows = tpldf.iloc[inidx].index.unique()
8✔
3387
    else:
3388
        missing = set(use_rows).difference(tpldf.sidx, tpldf.idx_strs)
8✔
3389
        use_rows = tpldf.loc[sel].index.unique()
8✔
3390
    if len(missing) > 0:
8✔
3391
        msg = (
8✔
3392
            "write_list_tpl: Requested rows missing from parameter file, "
3393
            f"rows: {missing}, file: {fnme}."
3394
        )
3395
        if logger is not None:
8✔
3396
            logger.warn(msg)
8✔
3397
        else:
3398
            warnings.warn(msg, PyemuWarning)
×
3399
    if len(use_rows) == 0:
8✔
3400
        msg = (
8✔
3401
            "write_list_tpl: None of request rows found in parameter file, "
3402
            f"rows: {orig_use_rows}, file: {fnme}. "
3403
            "Will set up pars for all rows."
3404
        )
3405
        if logger is not None:
8✔
3406
            logger.warn(msg)
8✔
3407
        else:
3408
            warnings.warn(msg, PyemuWarning)
×
3409
        use_rows = tpldf.index
8✔
3410
    return use_rows, nxs
8✔
3411

3412

3413
def _get_index_strfmt(index_cols):
9✔
3414
    # get some index strings for naming
3415
    j = "_"
9✔
3416
    # fmt = "{0}|{1}"
3417
    if isinstance(index_cols[0], str):
9✔
3418
        inames = index_cols
9✔
3419
    else:
3420
        inames = ["idx{0}".format(i) for i in range(len(index_cols))]
9✔
3421
    # full formatter string
3422
    fmt = j.join([f"{iname}|{{{i}}}" for i, iname in enumerate(inames)])
9✔
3423
    # else:
3424
    #     # fmt = "{1:3}"
3425
    #     j = ""
3426
    #     if isinstance(index_cols[0], str):
3427
    #         inames = index_cols
3428
    #     else:
3429
    #         inames = ["{0}".format(i) for i in range(len(index_cols))]
3430
    #     # full formatter string
3431
    #     fmt = j.join([f"{{{i}:3}}" for i, iname in enumerate(inames)])
3432
    return inames, fmt
9✔
3433

3434

3435
def _get_index_strings(df, fmt, zero_based):
9✔
3436
    if not zero_based:
9✔
3437
        # only if indices are ints (trying to support strings as par ids)
3438
        df.loc[:, "sidx"] = df.sidx.apply(
9✔
3439
            lambda x: tuple(xx - 1 if isinstance(xx, int) else xx for xx in x)
3440
        )
3441

3442
    df.loc[:, "idx_strs"] = df.sidx.apply(lambda x: fmt.format(*x)).str.replace(" ", "")
9✔
3443
    df.loc[:, "idx_strs"] = df.idx_strs.str.replace(":", "", regex=False).str.lower()
9✔
3444
    df.loc[:, "idx_strs"] = df.idx_strs.str.replace("|", ":", regex=False)
9✔
3445
    return df
9✔
3446

3447

3448
def _getxy_from_idx(df, get_xy, xy_in_idx, ij_in_idx):
9✔
3449
    if get_xy is None:
9✔
3450
        return df
9✔
3451
    if xy_in_idx is not None:
8✔
3452
        df[["x", "y"]] = pd.DataFrame(df.sidx.to_list()).iloc[:, xy_in_idx]
8✔
3453
        return df
8✔
3454

3455
    df.loc[:, "xy"] = df.sidx.apply(get_xy, ij_id=ij_in_idx)
8✔
3456
    df.loc[:, "x"] = df.xy.apply(lambda x: x[0])
8✔
3457
    df.loc[:, "y"] = df.xy.apply(lambda x: x[1])
8✔
3458
    return df
8✔
3459

3460

3461
def _build_parnames(
9✔
3462
    df,
3463
    typ,
3464
    zone_array,
3465
    index_cols,
3466
    use_cols,
3467
    basename,
3468
    gpname,
3469
    suffix,
3470
    par_style,
3471
    init_df=None,
3472
    init_fname=None,
3473
    fill_value=1.0,
3474
):
3475
    if par_style == "d":
9✔
3476
        assert init_df is not None
8✔
3477
        direct_tpl_df = init_df.copy()
8✔
3478
        if typ == "constant":
8✔
3479
            assert init_fname is not None
8✔
3480
    if use_cols is None:
9✔
3481
        use_cols = [c for c in df.columns if c not in index_cols]
×
3482
        # if direct, we have more to deal with...
3483
    for iuc, use_col in enumerate(use_cols):
9✔
3484
        if not isinstance(basename, str):
9✔
3485
            nname = basename[iuc]
9✔
3486
            # if zone type, find the zones for each index position
3487
        else:
3488
            nname = basename
×
3489
        if zone_array is not None and typ in ["zone", "grid"]:
9✔
3490
            if zone_array.ndim != len(index_cols):
8✔
3491
                raise Exception(
×
3492
                    "get_tpl_or_ins_df() error: "
3493
                    "zone_array.ndim "
3494
                    "({0}) != len(index_cols)({1})"
3495
                    "".format(zone_array.ndim, len(index_cols))
3496
                )
3497
            df.loc[:, "zval"] = df.sidx.apply(lambda x: zone_array[x])
8✔
3498

3499
        if gpname is None or gpname[iuc] is None:
9✔
3500
            ngpname = nname
8✔
3501
        else:
3502
            if not isinstance(gpname, str):
9✔
3503
                ngpname = gpname[iuc]
9✔
3504
            else:
3505
                ngpname = gpname
×
3506
        df.loc[:, "pargp{}".format(use_col)] = ngpname
9✔
3507
        df.loc[:, "parval1_{0}".format(use_col)] = fill_value
9✔
3508
        if typ == "constant":
9✔
3509
            # one par for entire use_col column
3510
            fmtr = "pname:{0}_ptype:cn_usecol:{1}"
9✔
3511
            fmtr += "_pstyle:{0}".format(par_style)
9✔
3512
            if suffix != "":
9✔
3513
                fmtr += f"_{suffix}"
×
3514
            # else:
3515
            #     fmtr = "{0}{1}"
3516
            #     if suffix != "":
3517
            #         fmtr += suffix
3518
            df.loc[:, use_col] = fmtr.format(nname, use_col)
9✔
3519
            if par_style == "d":
9✔
3520
                _check_diff(init_df.loc[:, use_col].values, init_fname)
8✔
3521
                df.loc[:, "parval1_{0}".format(use_col)] = init_df.loc[:, use_col][0]
8✔
3522
        elif typ == "zone":
9✔
3523
            # one par for each zone
3524
            fmtr = "pname:{0}_ptype:zn_usecol:{1}"
8✔
3525
            if par_style == "d":
8✔
3526
                # todo
3527
                raise NotImplementedError(
×
3528
                    "list-style direct zone-type parameters not implemented"
3529
                )
3530
                fmtr += "_pstyle:d"
3531
            else:
3532
                fmtr += "_pstyle:m"
8✔
3533
            if zone_array is not None:
8✔
3534
                fmtr += "_zone:{2}"
8✔
3535
                # df.loc[:, use_col] += df.zval.apply(
3536
                #     lambda x: "_zone:{0}".format(x)
3537
                # )
3538
            if suffix != "":
8✔
3539
                fmtr += f"_{suffix}"
×
3540
                # df.loc[:, use_col] += "_{0}".format(suffix)
3541
            # else:
3542
            #     fmtr = "{0}{1}"
3543
            #     if zone_array is not None:
3544
            #         fmtr += "z{2}"
3545
            #     if suffix != "":
3546
            #         fmtr += suffix
3547
            if zone_array is not None:
8✔
3548
                df.loc[:, use_col] = df.zval.apply(
8✔
3549
                    lambda x: fmtr.format(nname, use_col, x)
3550
                )
3551
            else:
3552
                df.loc[:, use_col] = fmtr.format(nname, use_col)
×
3553
            # todo:  Direct pars:
3554
            #  check that values are constant within zones and assign parval1
3555

3556
        elif typ == "grid":
9✔
3557
            # one par for each index
3558
            fmtr = "pname:{0}_ptype:gr_usecol:{1}"
9✔
3559
            fmtr += "_pstyle:{0}".format(par_style)
9✔
3560
            if zone_array is not None:
9✔
3561
                fmtr += "_zone:{2}_{3}"
8✔
3562
            else:
3563
                fmtr += "_{2}"
9✔
3564
            if suffix != "":
9✔
3565
                fmtr += f"_{suffix}"
×
3566
            if zone_array is not None:
9✔
3567
                df.loc[:, use_col] = df.apply(
8✔
3568
                    lambda x: fmtr.format(nname, use_col, x.zval, x.idx_strs), axis=1
3569
                )
3570
            else:
3571
                df.loc[:, use_col] = df.idx_strs.apply(
9✔
3572
                    lambda x: fmtr.format(nname, use_col, x)
3573
                )
3574
            if par_style == "d":
9✔
3575
                df.loc[:, f"parval1_{use_col}"] = init_df.loc[:, use_col].values
8✔
3576
        else:
3577
            raise Exception(
×
3578
                "get_tpl_or_ins_df() error: "
3579
                "unrecognized 'typ', if not 'obs', "
3580
                "should be 'constant','zone', "
3581
                "or 'grid', not '{0}'".format(typ)
3582
            )
3583

3584
        if par_style == "d":
9✔
3585
            direct_tpl_df.loc[:, use_col] = (
8✔
3586
                df.loc[:, use_col].apply(lambda x: "~ {0} ~".format(x)).values
3587
            )
3588
    if par_style == "d":
9✔
3589
        return df, direct_tpl_df
8✔
3590
    return df
9✔
3591

3592

3593
def _get_tpl_or_ins_df(
9✔
3594
    dfs,
3595
    name,
3596
    index_cols,
3597
    typ,
3598
    use_cols=None,
3599
    suffix="",
3600
    zone_array=None,
3601
    get_xy=None,
3602
    ij_in_idx=None,
3603
    xy_in_idx=None,
3604
    zero_based=True,
3605
    gpname=None,
3606
    par_fill_value=1.0,
3607
    par_style="m",
3608
):
3609
    """
3610
    Private method to auto-generate parameter or obs names from tabular
3611
    model files (input or output) read into pandas dataframes
3612
    Args:
3613
        dfs (`pandas.DataFrame` or `list`): DataFrames (can be list of DataFrames)
3614
            to set up parameters or observations
3615
        name (`str`): Parameter name or Observation name prefix
3616
        index_cols (`str` or `list`): columns of dataframes to use as indicies
3617
        typ (`str`): 'obs' to set up observation names or,
3618
            'constant','zone', or 'grid' used in parname generation.
3619
            If `constant`, one par is set up for each `use_cols`.
3620
            If `zone`, one par is set up for each zone for each `use_cols`.
3621
            If `grid`, one par is set up for every unique index combination
3622
            (from `index_cols`) for each `use_cols`.
3623
        use_cols (`list`): Columns to parameterise. If None, pars are set up
3624
            for all columns apart from index cols. Not used if `typ`==`obs`.
3625
        suffix (`str`): Optional par name suffix. Not used if `typ`==`obs`.
3626
        zone_array (`np.ndarray`): Only used for paremeters (`typ` != `obs`).
3627
            Array defining zone divisions.
3628
            If not None and `par_type` is `grid` or `zone` it is expected that
3629
            `index_cols` provide the indicies for querying `zone_array`.
3630
            Therefore, array dimension should equal `len(index_cols)`.
3631
        get_xy (`pyemu.PstFrom` method): Can be specified to get real-world xy
3632
            from `index_cols` passed (to include in obs/par name)
3633
        ij_in_idx (`list` or `array`): defining which `index_cols` contain i,j
3634
        xy_in_idx (`list` or `array`): defining which `index_cols` contain x,y
3635
        zero_based (`boolean`): IMPORTANT - pass as False if `index_cols`
3636
            are NOT zero-based indicies (e.g. MODFLOW row/cols).
3637
            If False 1 with be subtracted from `index_cols`.=
3638
        par_fill_value (float): value to use as `parval1`,Default is 1.0
3639

3640
    Returns:
3641
        if `typ`==`obs`: pandas.DataFrame with index strings for setting up obs
3642
        names when passing through to
3643
        pyemu.pst_utils.csv_to_ins_file(df.set_index('idx_str')
3644

3645
        else: pandas.DataFrame with paranme and pargroup define for each `use_col`
3646

3647
    """
3648
    if isinstance(dfs, pd.DataFrame):
9✔
3649
        dfs = [dfs]
9✔
3650
    if not isinstance(dfs, list):
9✔
3651
        dfs = list(dfs)
×
3652

3653
    # work out the union of indices across all dfs
3654
    # order matters for obs
3655
    sidx = []
9✔
3656
    for df in dfs:
9✔
3657
        # avoiding df.values to prevent conversion to same type
3658
        didx = zip(*[df[col] for col in index_cols])
9✔
3659
        aidx = [i for i in didx if i not in sidx]
9✔
3660
        sidx.extend(aidx)
9✔
3661

3662
    df_ti = pd.DataFrame({"sidx": sidx}, columns=["sidx"])
9✔
3663
    inames, fmt = _get_index_strfmt(index_cols)
9✔
3664
    df_ti = _get_index_strings(df_ti, fmt, zero_based)
9✔
3665
    df_ti = _getxy_from_idx(df_ti, get_xy, xy_in_idx, ij_in_idx)
9✔
3666

3667
    if typ == "obs":
9✔
3668
        return df_ti  #################### RETURN if OBS
9✔
3669
    df_ti = _build_parnames(
9✔
3670
        df_ti,
3671
        typ,
3672
        zone_array,
3673
        index_cols,
3674
        use_cols,
3675
        name,
3676
        gpname,
3677
        suffix,
3678
        par_style,
3679
        fill_value=par_fill_value,
3680
    )
3681
    return df_ti
9✔
3682

3683

3684
def write_array_tpl(
9✔
3685
    name,
3686
    tpl_filename,
3687
    suffix,
3688
    par_type,
3689
    zone_array=None,
3690
    gpname=None,
3691
    shape=None,
3692
    fill_value=1.0,
3693
    get_xy=None,
3694
    input_filename=None,
3695
    par_style="m",
3696
):
3697
    """
3698
    write a template file for a 2D array.
3699

3700
     Args:
3701
        name (`str`): the base parameter name
3702
        tpl_filename (`str`): the template file to write - include path
3703
        suffix (`str`): suffix to append to par names
3704
        par_type (`str`): type of parameter
3705
        zone_array (`numpy.ndarray`): an array used to skip inactive cells. Values less than 1 are
3706
            not parameterized and are assigned a value of fill_value. Default is None.
3707
        gpname (`str`): pargp filed in dataframe
3708
        shape (`tuple`): dimensions of array to write
3709
        fill_value:
3710
        get_xy:
3711
        input_filename:
3712
        par_style (`str`): either 'd','a', or 'm'
3713

3714
    Returns:
3715
        df (`pandas.DataFrame`): a dataframe with parameter information
3716

3717
    Note:
3718
        This function is called by `PstFrom` programmatically
3719

3720
    """
3721

3722
    if shape is None and zone_array is None:
9✔
3723
        raise Exception(
×
3724
            "write_array_tpl() error: must pass either zone_array " "or shape"
3725
        )
3726
    elif shape is not None and zone_array is not None:
9✔
3727
        if shape != zone_array.shape:
9✔
3728
            raise Exception(
×
3729
                "write_array_tpl() error: passed "
3730
                "shape {0} != zone_array.shape {1}".format(shape, zone_array.shape)
3731
            )
3732
    elif shape is None:
9✔
3733
        shape = zone_array.shape
×
3734
    if len(shape) != 2:
9✔
3735
        raise Exception(
×
3736
            "write_array_tpl() error: shape '{0}' not 2D" "".format(str(shape))
3737
        )
3738

3739
    par_style = par_style.lower()
9✔
3740
    if par_style == "d":
9✔
3741
        if not os.path.exists(input_filename):
8✔
3742
            raise Exception(
×
3743
                "write_array_tpl() error: couldn't find input file "
3744
                + " {0}, which is required for 'direct' par_style".format(
3745
                    input_filename
3746
                )
3747
            )
3748
        org_arr = np.loadtxt(input_filename, ndmin=2)
8✔
3749
        if par_type == "grid":
8✔
3750
            pass
8✔
3751
        elif par_type == "constant":
8✔
3752
            _check_diff(org_arr, input_filename)
×
3753
        elif par_type == "zone":
8✔
3754
            for zval in np.unique(zone_array):
8✔
3755
                if zval < 1:
8✔
3756
                    continue
8✔
3757
                zone_org_arr = org_arr.copy()
8✔
3758
                zone_org_arr[zone_array != zval] = np.NaN
8✔
3759
                _check_diff(zone_org_arr, input_filename, zval)
8✔
3760
    elif par_style == "m":
9✔
3761
        org_arr = np.ones(shape)
9✔
3762
    elif par_style == "a":
×
3763
        org_arr = np.zeros(shape)
×
3764
    else:
3765
        raise Exception(
×
3766
            "write_array_tpl() error: unrecognized 'par_style' {0} ".format(par_style)
3767
            + "should be 'd','a', or 'm'"
3768
        )
3769

3770
    def constant_namer(i, j):
9✔
3771
        pname = "pname:{1}_ptype:cn_pstyle:{0}".format(par_style, name)
9✔
3772
        if suffix != "":
9✔
3773
            pname += "_{0}".format(suffix)
×
3774
        return pname
9✔
3775

3776
    def zone_namer(i, j):
9✔
3777
        zval = 1
8✔
3778
        if zone_array is not None:
8✔
3779
            zval = zone_array[i, j]
8✔
3780
        pname = "pname:{1}_ptype:zn_pstyle:{0}_zone:{2}".format(
8✔
3781
            par_style, name, zval
3782
        )
3783
        if suffix != "":
8✔
3784
            pname += "_{0}".format(suffix)
×
3785
        return pname
8✔
3786

3787
    def grid_namer(i, j):
9✔
3788
        pname = "pname:{1}_ptype:gr_pstyle:{0}_i:{2}_j:{3}".format(
9✔
3789
            par_style, name, i, j
3790
        )
3791
        if get_xy is not None:
9✔
3792
            pname += "_x:{0:0.2f}_y:{1:0.2f}".format(
9✔
3793
                *get_xy([i, j], ij_id=[0, 1]))
3794
        if zone_array is not None:
9✔
3795
            pname += "_zone:{0}".format(zone_array[i, j])
9✔
3796
        if suffix != "":
9✔
3797
            pname += "_{0}".format(suffix)
×
3798
        return pname
9✔
3799

3800
    if par_type == "constant":
9✔
3801
        namer = constant_namer
9✔
3802
    elif par_type == "zone":
9✔
3803
        namer = zone_namer
8✔
3804
    elif par_type == "grid":
9✔
3805
        namer = grid_namer
9✔
3806
    else:
3807
        raise Exception(
×
3808
            "write_array_tpl() error: unsupported par_type"
3809
            ", options are 'constant', 'zone', or 'grid', not"
3810
            "'{0}'".format(par_type)
3811
        )
3812

3813
    parnme = []
9✔
3814
    org_par_val_dict = {}
9✔
3815
    xx, yy, ii, jj = [], [], [], []
9✔
3816
    with open(tpl_filename, "w") as f:
9✔
3817
        f.write("ptf ~\n")
9✔
3818
        for i in range(shape[0]):
9✔
3819
            for j in range(shape[1]):
9✔
3820
                if zone_array is not None and zone_array[i, j] < 1:
9✔
3821
                    pname = " {0} ".format(fill_value)
9✔
3822
                else:
3823
                    if get_xy is not None:
9✔
3824
                        x, y = get_xy([i, j], ij_id=[0, 1])
9✔
3825
                        xx.append(x)
9✔
3826
                        yy.append(y)
9✔
3827
                    ii.append(i)
9✔
3828
                    jj.append(j)
9✔
3829

3830
                    pname = namer(i, j)
9✔
3831
                    parnme.append(pname)
9✔
3832
                    org_par_val_dict[pname] = org_arr[i, j]
9✔
3833
                    pname = " ~   {0}    ~".format(pname)
9✔
3834

3835
                f.write(pname)
9✔
3836
            f.write("\n")
9✔
3837
    df = pd.DataFrame({"parnme": parnme}, index=parnme)
9✔
3838
    df.loc[:, "parval1"] = df.parnme.apply(lambda x: org_par_val_dict[x])
9✔
3839
    if par_type == "grid":
9✔
3840
        df.loc[:, "i"] = ii
9✔
3841
        df.loc[:, "j"] = jj
9✔
3842
        if get_xy is not None:
9✔
3843
            df.loc[:, "x"] = xx
9✔
3844
            df.loc[:, "y"] = yy
9✔
3845
    if gpname is None:
9✔
3846
        gpname = name
×
3847
    df.loc[:, "pargp"] = "{0}_{1}".format(gpname, suffix.replace("_", "")).rstrip("_")
9✔
3848
    df.loc[:, "tpl_filename"] = tpl_filename
9✔
3849
    df.loc[:, "input_filename"] = input_filename
9✔
3850
    if input_filename is not None:
9✔
3851
        if par_style in ["m", "d"]:
9✔
3852
            arr = np.ones(shape)
9✔
3853
        elif par_style == "a":
×
3854
            arr = np.zeros(shape)
×
3855
        np.savetxt(input_filename, arr, fmt="%2.1f")
9✔
3856

3857
    return df
9✔
3858

3859

3860
def _check_diff(org_arr, input_filename, zval=None):
9✔
3861
    percent_diff = 100.0 * np.abs(
8✔
3862
        (np.nanmax(org_arr) - np.nanmin(org_arr)) / np.nanmean(org_arr)
3863
    )
3864

3865
    if percent_diff > DIRECT_PAR_PERCENT_DIFF_TOL:
8✔
3866
        message = "_check_diff() error: direct par for file '{0}'".format(
×
3867
            input_filename
3868
        ) + "exceeds tolerance for percent difference: {0} > {1}".format(
3869
            percent_diff, DIRECT_PAR_PERCENT_DIFF_TOL
3870
        )
3871
        if zval is not None:
×
3872
            message += " in zone {0}".format(zval)
×
3873
        raise Exception(message)
×
3874

3875

3876
def get_filepath(folder, filename):
9✔
3877
    """Return a path to a file within a folder,
3878
    without repeating the folder in the output path,
3879
    if the input filename (path) already contains the folder."""
3880
    filename = Path(filename)
9✔
3881
    folder = Path(folder)
9✔
3882
    if folder not in filename.parents:
9✔
3883
        filename = folder / filename
9✔
3884
    return filename
9✔
3885

3886

3887
def get_relative_filepath(folder, filename):
9✔
3888
    """Like :func:`~pyemu.utils.pst_from.get_filepath`, except
3889
    return path for filename relative to folder.
3890
    """
3891
    return get_filepath(folder, filename).relative_to(folder)
9✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc