• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pypest / pyemu / 5887625428

17 Aug 2023 06:23AM UTC coverage: 79.857% (+1.5%) from 78.319%
5887625428

push

github

briochh
Merge branch 'develop'

11386 of 14258 relevant lines covered (79.86%)

6.77 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.42
/pyemu/utils/gw_utils.py
1
"""MODFLOW support utilities"""
9✔
2
import os
9✔
3
from datetime import datetime
9✔
4
import shutil
9✔
5
import warnings
9✔
6
import numpy as np
9✔
7
import pandas as pd
9✔
8
import re
9✔
9

10
pd.options.display.max_colwidth = 100
9✔
11
from pyemu.pst.pst_utils import (
9✔
12
    SFMT,
13
    IFMT,
14
    FFMT,
15
    pst_config,
16
    parse_tpl_file,
17
    try_process_output_file,
18
)
19
from pyemu.utils.os_utils import run
9✔
20
from pyemu.utils.helpers import _write_df_tpl
9✔
21
from ..pyemu_warnings import PyemuWarning
9✔
22

23
PP_FMT = {
9✔
24
    "name": SFMT,
25
    "x": FFMT,
26
    "y": FFMT,
27
    "zone": IFMT,
28
    "tpl": SFMT,
29
    "parval1": FFMT,
30
}
31
PP_NAMES = ["name", "x", "y", "zone", "parval1"]
9✔
32

33

34
def modflow_pval_to_template_file(pval_file, tpl_file=None):
9✔
35
    """write a template file for a modflow parameter value file.
36

37
    Args:
38
        pval_file (`str`): the path and name of the existing modflow pval file
39
        tpl_file (`str`, optional):  template file to write. If None, use
40
            `pval_file` +".tpl". Default is None
41
    Note:
42
        Uses names in the first column in the pval file as par names.
43

44
    Returns:
45
        **pandas.DataFrame**: a dataFrame with control file parameter information
46
    """
47

48
    if tpl_file is None:
8✔
49
        tpl_file = pval_file + ".tpl"
8✔
50
    pval_df = pd.read_csv(
8✔
51
        pval_file,
52
        delim_whitespace=True,
53
        header=None,
54
        skiprows=2,
55
        names=["parnme", "parval1"],
56
    )
57
    pval_df.index = pval_df.parnme
8✔
58
    pval_df.loc[:, "tpl"] = pval_df.parnme.apply(lambda x: " ~   {0:15s}   ~".format(x))
8✔
59
    with open(tpl_file, "w") as f:
8✔
60
        f.write("ptf ~\n#pval template file from pyemu\n")
8✔
61
        f.write("{0:10d} #NP\n".format(pval_df.shape[0]))
8✔
62
        f.write(
8✔
63
            pval_df.loc[:, ["parnme", "tpl"]].to_string(
64
                col_space=0,
65
                formatters=[SFMT, SFMT],
66
                index=False,
67
                header=False,
68
                justify="left",
69
            )
70
        )
71
    return pval_df
8✔
72

73

74
def modflow_hob_to_instruction_file(hob_file, ins_file=None):
9✔
75
    """write an instruction file for a modflow head observation file
76

77
    Args:
78
        hob_file (`str`): the path and name of the existing modflow hob file
79
        ins_file (`str`, optional): the name of the instruction file to write.
80
            If `None`, `hob_file` +".ins" is used.  Default is `None`.
81

82
    Returns:
83
        **pandas.DataFrame**: a dataFrame with control file observation information
84

85
    """
86

87
    hob_df = pd.read_csv(
9✔
88
        hob_file,
89
        delim_whitespace=True,
90
        skiprows=1,
91
        header=None,
92
        names=["simval", "obsval", "obsnme"],
93
    )
94

95
    hob_df.loc[:, "obsnme"] = hob_df.obsnme.apply(str.lower)
9✔
96
    hob_df.loc[:, "ins_line"] = hob_df.obsnme.apply(lambda x: "l1 !{0:s}!".format(x))
9✔
97
    hob_df.loc[0, "ins_line"] = hob_df.loc[0, "ins_line"].replace("l1", "l2")
9✔
98

99
    if ins_file is None:
9✔
100
        ins_file = hob_file + ".ins"
9✔
101
    f_ins = open(ins_file, "w")
9✔
102
    f_ins.write("pif ~\n")
9✔
103
    f_ins.write(
9✔
104
        hob_df.loc[:, ["ins_line"]].to_string(
105
            col_space=0,
106
            columns=["ins_line"],
107
            header=False,
108
            index=False,
109
            formatters=[SFMT],
110
        )
111
        + "\n"
112
    )
113
    hob_df.loc[:, "weight"] = 1.0
9✔
114
    hob_df.loc[:, "obgnme"] = "obgnme"
9✔
115
    f_ins.close()
9✔
116
    return hob_df
9✔
117

118

119
def modflow_hydmod_to_instruction_file(hydmod_file, ins_file=None):
9✔
120
    """write an instruction file for a modflow hydmod file
121

122
    Args:
123
        hydmod_file (`str`): the path and name of the existing modflow hob file
124
        ins_file (`str`, optional): the name of the instruction file to write.
125
            If `None`, `hydmod_file` +".ins" is used.  Default is `None`.
126

127
    Returns:
128
        **pandas.DataFrame**: a dataFrame with control file observation information
129

130
    Note:
131
        calls `pyemu.gw_utils.modflow_read_hydmod_file()`
132
    """
133

134
    hydmod_df, hydmod_outfile = modflow_read_hydmod_file(hydmod_file)
8✔
135
    hydmod_df.loc[:, "ins_line"] = hydmod_df.obsnme.apply(
8✔
136
        lambda x: "l1 w !{0:s}!".format(x)
137
    )
138

139
    if ins_file is None:
8✔
140
        ins_file = hydmod_outfile + ".ins"
8✔
141

142
    with open(ins_file, "w") as f_ins:
8✔
143
        f_ins.write("pif ~\nl1\n")
8✔
144
        f_ins.write(
8✔
145
            hydmod_df.loc[:, ["ins_line"]].to_string(
146
                col_space=0,
147
                columns=["ins_line"],
148
                header=False,
149
                index=False,
150
                formatters=[SFMT],
151
            )
152
            + "\n"
153
        )
154
    hydmod_df.loc[:, "weight"] = 1.0
8✔
155
    hydmod_df.loc[:, "obgnme"] = "obgnme"
8✔
156

157
    df = try_process_output_file(hydmod_outfile + ".ins")
8✔
158
    if df is not None:
8✔
159
        df.loc[:, "obsnme"] = df.index.values
8✔
160
        df.loc[:, "obgnme"] = df.obsnme.apply(lambda x: x[:-9])
8✔
161
        df.to_csv("_setup_" + os.path.split(hydmod_outfile)[-1] + ".csv", index=False)
8✔
162
        return df
8✔
163

164
    return hydmod_df
×
165

166

167
def modflow_read_hydmod_file(hydmod_file, hydmod_outfile=None):
9✔
168
    """read a binary hydmod file and return a dataframe of the results
169

170
    Args:
171
        hydmod_file (`str`): The path and name of the existing modflow hydmod binary file
172
        hydmod_outfile (`str`, optional): output file to write.  If `None`, use `hydmod_file` +".dat".
173
            Default is `None`.
174

175
    Returns:
176
        **pandas.DataFrame**: a dataFrame with hymod_file values
177
    """
178
    try:
8✔
179
        import flopy.utils as fu
8✔
180
    except Exception as e:
×
181
        print("flopy is not installed - cannot read {0}\n{1}".format(hydmod_file, e))
×
182
        return
×
183

184
    obs = fu.HydmodObs(hydmod_file)
8✔
185
    hyd_df = obs.get_dataframe()
8✔
186

187
    hyd_df.columns = [i[2:] if i.lower() != "totim" else i for i in hyd_df.columns]
8✔
188
    # hyd_df.loc[:,"datetime"] = hyd_df.index
189
    hyd_df["totim"] = hyd_df.index.map(lambda x: x.strftime("%Y%m%d"))
8✔
190

191
    hyd_df.rename(columns={"totim": "datestamp"}, inplace=True)
8✔
192

193
    # reshape into a single column
194
    hyd_df = pd.melt(hyd_df, id_vars="datestamp")
8✔
195

196
    hyd_df.rename(columns={"value": "obsval"}, inplace=True)
8✔
197

198
    hyd_df["obsnme"] = [
8✔
199
        i.lower() + "_" + j.lower() for i, j in zip(hyd_df.variable, hyd_df.datestamp)
200
    ]
201

202
    vc = hyd_df.obsnme.value_counts().sort_values()
8✔
203
    vc = list(vc.loc[vc > 1].index.values)
8✔
204
    if len(vc) > 0:
8✔
205
        hyd_df.to_csv("hyd_df.duplciates.csv")
×
206
        obs.get_dataframe().to_csv("hyd_org.duplicates.csv")
×
207
        raise Exception("duplicates in obsnme:{0}".format(vc))
×
208
    # assert hyd_df.obsnme.value_counts().max() == 1,"duplicates in obsnme"
209

210
    if not hydmod_outfile:
8✔
211
        hydmod_outfile = hydmod_file + ".dat"
8✔
212
    hyd_df.to_csv(hydmod_outfile, columns=["obsnme", "obsval"], sep=" ", index=False)
8✔
213
    # hyd_df = hyd_df[['obsnme','obsval']]
214
    return hyd_df[["obsnme", "obsval"]], hydmod_outfile
8✔
215

216

217
def setup_mtlist_budget_obs(
9✔
218
    list_filename,
219
    gw_filename="mtlist_gw.dat",
220
    sw_filename="mtlist_sw.dat",
221
    start_datetime="1-1-1970",
222
    gw_prefix="gw",
223
    sw_prefix="sw",
224
    save_setup_file=False,
225
):
226
    """setup observations of gw (and optionally sw) mass budgets from mt3dusgs list file.
227

228
    Args:
229
        list_filename (`str`): path and name of existing modflow list file
230
        gw_filename (`str`, optional): output filename that will contain the gw budget
231
            observations. Default is "mtlist_gw.dat"
232
        sw_filename (`str`, optional): output filename that will contain the sw budget
233
            observations. Default is "mtlist_sw.dat"
234
        start_datetime (`str`, optional):  an str that can be parsed into a `pandas.TimeStamp`.
235
            used to give budget observations meaningful names.  Default is "1-1-1970".
236
        gw_prefix (`str`, optional): a prefix to add to the GW budget observations.
237
            Useful if processing more than one list file as part of the forward run process.
238
            Default is 'gw'.
239
        sw_prefix (`str`, optional): a prefix to add to the SW budget observations.  Useful
240
            if processing more than one list file as part of the forward run process.
241
            Default is 'sw'.
242
        save_setup_file (`bool`, optional): a flag to save "_setup_"+ `list_filename` +".csv" file
243
            that contains useful control file information.  Default is `False`.
244

245
    Returns:
246
        tuple containing
247

248
        - **str**:  the command to add to the forward run script
249
        - **str**: the names of the instruction files that were created
250
        - **pandas.DataFrame**: a dataframe with information for constructing a control file
251

252

253
    Note:
254
        writes an instruction file and also a _setup_.csv to use when constructing a pest
255
        control file
256

257
        The instruction files are named `out_filename` +".ins"
258

259
        It is recommended to use the default value for `gw_filename` or `sw_filename`.
260

261
        This is the companion function of `gw_utils.apply_mtlist_budget_obs()`.
262

263
    """
264
    gw, sw = apply_mtlist_budget_obs(
8✔
265
        list_filename, gw_filename, sw_filename, start_datetime
266
    )
267
    gw_ins = gw_filename + ".ins"
8✔
268
    _write_mtlist_ins(gw_ins, gw, gw_prefix)
8✔
269
    ins_files = [gw_ins]
8✔
270

271
    df_gw = try_process_output_file(gw_ins, gw_filename)
8✔
272
    if df_gw is None:
8✔
273
        raise Exception("error processing groundwater instruction file")
×
274
    if sw is not None:
8✔
275
        sw_ins = sw_filename + ".ins"
8✔
276
        _write_mtlist_ins(sw_ins, sw, sw_prefix)
8✔
277
        ins_files.append(sw_ins)
8✔
278

279
        df_sw = try_process_output_file(sw_ins, sw_filename)
8✔
280
        if df_sw is None:
8✔
281
            raise Exception("error processing surface water instruction file")
×
282
        df_gw = pd.concat([df_gw, df_sw])
8✔
283
        df_gw.loc[:, "obsnme"] = df_gw.index.values
8✔
284
    if save_setup_file:
8✔
285
        df_gw.to_csv("_setup_" + os.path.split(list_filename)[-1] + ".csv", index=False)
×
286

287
    frun_line = "pyemu.gw_utils.apply_mtlist_budget_obs('{0}')".format(list_filename)
8✔
288
    return frun_line, ins_files, df_gw
8✔
289

290

291
def _write_mtlist_ins(ins_filename, df, prefix):
9✔
292
    """write an instruction file for a MT3D-USGS list file"""
293
    try:
8✔
294
        dt_str = df.index.map(lambda x: x.strftime("%Y%m%d"))
8✔
295
    except:
8✔
296
        dt_str = df.index.map(lambda x: "{0:08.1f}".format(x).strip())
8✔
297
    with open(ins_filename, "w") as f:
8✔
298
        f.write("pif ~\nl1\n")
8✔
299
        for dt in dt_str:
8✔
300
            f.write("l1 ")
8✔
301
            for col in df.columns.str.translate(
8✔
302
                {ord(s): None for s in ["(", ")", "/", "="]}
303
            ):
304
                if prefix == "":
8✔
305
                    obsnme = "{0}_{1}".format(col, dt)
8✔
306
                else:
307
                    obsnme = "{0}_{1}_{2}".format(prefix, col, dt)
8✔
308
                f.write(" w !{0}!".format(obsnme))
8✔
309
            f.write("\n")
8✔
310

311

312
def apply_mtlist_budget_obs(
9✔
313
    list_filename,
314
    gw_filename="mtlist_gw.dat",
315
    sw_filename="mtlist_sw.dat",
316
    start_datetime="1-1-1970",
317
):
318
    """process an MT3D-USGS list file to extract mass budget entries.
319

320
    Args:
321
        list_filename (`str`): the path and name of an existing MT3D-USGS list file
322
        gw_filename (`str`, optional): the name of the output file with gw mass
323
            budget information. Default is "mtlist_gw.dat"
324
        sw_filename (`str`): the name of the output file with sw mass budget information.
325
            Default is "mtlist_sw.dat"
326
        start_datatime (`str`): an str that can be cast to a pandas.TimeStamp.  Used to give
327
            observations a meaningful name
328

329
    Returns:
330
        2-element tuple containing
331

332
        - **pandas.DataFrame**: the gw mass budget dataframe
333
        - **pandas.DataFrame**: (optional) the sw mass budget dataframe.
334
          If the SFT process is not active, this returned value is `None`.
335

336
    Note:
337
        This is the companion function of `gw_utils.setup_mtlist_budget_obs()`.
338
    """
339
    try:
8✔
340
        import flopy
8✔
341
    except Exception as e:
×
342
        raise Exception("error import flopy: {0}".format(str(e)))
×
343
    mt = flopy.utils.MtListBudget(list_filename)
8✔
344
    gw, sw = mt.parse(start_datetime=start_datetime, diff=True)
8✔
345
    gw = gw.drop(
8✔
346
        [
347
            col
348
            for col in gw.columns
349
            for drop_col in ["kper", "kstp", "tkstp"]
350
            if (col.lower().startswith(drop_col))
351
        ],
352
        axis=1,
353
    )
354
    gw.to_csv(gw_filename, sep=" ", index_label="datetime", date_format="%Y%m%d")
8✔
355
    if sw is not None:
8✔
356
        sw = sw.drop(
8✔
357
            [
358
                col
359
                for col in sw.columns
360
                for drop_col in ["kper", "kstp", "tkstp"]
361
                if (col.lower().startswith(drop_col))
362
            ],
363
            axis=1,
364
        )
365
        sw.to_csv(sw_filename, sep=" ", index_label="datetime", date_format="%Y%m%d")
8✔
366
    return gw, sw
8✔
367

368

369
def setup_mflist_budget_obs(
9✔
370
    list_filename,
371
    flx_filename="flux.dat",
372
    vol_filename="vol.dat",
373
    start_datetime="1-1'1970",
374
    prefix="",
375
    save_setup_file=False,
376
    specify_times=None,
377
):
378
    """setup observations of budget volume and flux from modflow list file.
379

380
    Args:
381
        list_filename (`str`): path and name of the existing modflow list file
382
        flx_filename (`str`, optional): output filename that will contain the budget flux
383
            observations. Default is "flux.dat"
384
        vol_filename (`str`, optional): output filename that will contain the budget volume
385
            observations.  Default is "vol.dat"
386
        start_datetime (`str`, optional): a string that can be parsed into a pandas.TimeStamp.
387
            This is used to give budget observations meaningful names.  Default is "1-1-1970".
388
        prefix (`str`, optional): a prefix to add to the water budget observations.  Useful if
389
            processing more than one list file as part of the forward run process. Default is ''.
390
        save_setup_file (`bool`): a flag to save "_setup_"+ `list_filename` +".csv" file that contains useful
391
            control file information
392
        specify_times (`np.ndarray`-like, optional): An array of times to
393
             extract from the budget dataframes returned by the flopy
394
             MfListBudget(list_filename).get_dataframe() method. This can be
395
             useful to ensure consistent observation times for PEST.
396
             Array needs to be alignable with index of dataframe
397
             return by flopy method, care should be take to ensure that
398
             this is the case. If passed will be written to
399
             "budget_times.config" file as strings to be read by the companion
400
             `apply_mflist_budget_obs()` method at run time.
401

402
    Returns:
403
        **pandas.DataFrame**: a dataframe with information for constructing a control file.
404

405
    Note:
406
        This method writes instruction files and also a _setup_.csv to use when constructing a pest
407
        control file.  The instruction files are named <flux_file>.ins and <vol_file>.ins, respectively
408

409
        It is recommended to use the default values for flux_file and vol_file.
410

411
        This is the companion function of `gw_utils.apply_mflist_budget_obs()`.
412

413

414
    """
415
    flx, vol = apply_mflist_budget_obs(
9✔
416
        list_filename, flx_filename, vol_filename, start_datetime, times=specify_times
417
    )
418
    _write_mflist_ins(flx_filename + ".ins", flx, prefix + "flx")
9✔
419
    _write_mflist_ins(vol_filename + ".ins", vol, prefix + "vol")
9✔
420

421
    df = try_process_output_file(flx_filename + ".ins")
9✔
422
    if df is None:
9✔
423
        raise Exception("error processing flux instruction file")
×
424

425
    df2 = try_process_output_file(vol_filename + ".ins")
9✔
426
    if df2 is None:
9✔
427
        raise Exception("error processing volume instruction file")
×
428

429
    df = pd.concat([df, df2])
9✔
430
    df.loc[:, "obsnme"] = df.index.values
9✔
431
    if save_setup_file:
9✔
432
        df.to_csv("_setup_" + os.path.split(list_filename)[-1] + ".csv", index=False)
×
433
    if specify_times is not None:
9✔
434
        np.savetxt(
8✔
435
            os.path.join(os.path.dirname(flx_filename), "budget_times.config"),
436
            specify_times,
437
            fmt="%s",
438
        )
439
    return df
9✔
440

441

442
def apply_mflist_budget_obs(
9✔
443
    list_filename,
444
    flx_filename="flux.dat",
445
    vol_filename="vol.dat",
446
    start_datetime="1-1-1970",
447
    times=None,
448
):
449
    """process a MODFLOW list file to extract flux and volume water budget
450
       entries.
451

452
    Args:
453
         list_filename (`str`): path and name of the existing modflow list file
454
         flx_filename (`str`, optional): output filename that will contain the
455
             budget flux observations. Default is "flux.dat"
456
         vol_filename (`str`, optional): output filename that will contain the
457
             budget volume observations.  Default is "vol.dat"
458
         start_datetime (`str`, optional): a string that can be parsed into a
459
             pandas.TimeStamp. This is used to give budget observations
460
             meaningful names.  Default is "1-1-1970".
461
         times (`np.ndarray`-like or `str`, optional): An array of times to
462
             extract from the budget dataframes returned by the flopy
463
             MfListBudget(list_filename).get_dataframe() method. This can be
464
             useful to ensure consistent observation times for PEST.
465
             If type `str`, will assume `times=filename` and attempt to read
466
             single vector (no header or index) from file, parsing datetime
467
             using pandas. Array needs to be alignable with index of dataframe
468
             return by flopy method, care should be take to ensure that
469
             this is the case. If setup with `setup_mflist_budget_obs()`
470
             specifying `specify_times` argument `times` should be set to
471
             "budget_times.config".
472

473
     Note:
474
         This is the companion function of `gw_utils.setup_mflist_budget_obs()`.
475

476
     Returns:
477
         tuple containing
478

479
         - **pandas.DataFrame**: a dataframe with flux budget information
480
         - **pandas.DataFrame**: a dataframe with cumulative budget information
481

482
    """
483
    try:
9✔
484
        import flopy
9✔
485
    except Exception as e:
×
486
        raise Exception("error import flopy: {0}".format(str(e)))
×
487
    mlf = flopy.utils.MfListBudget(list_filename)
9✔
488
    flx, vol = mlf.get_dataframes(start_datetime=start_datetime, diff=True)
9✔
489
    if times is not None:
9✔
490
        if isinstance(times, str):
8✔
491
            if vol.index.tzinfo:
8✔
492
                parse_date = {"t": [0]}
×
493
                names = [None]
×
494
            else:
495
                parse_date = False
8✔
496
                names = ["t"]
8✔
497
            times = pd.read_csv(
8✔
498
                times, header=None, names=names, parse_dates=parse_date
499
            )["t"].values
500
        flx = flx.loc[times]
8✔
501
        vol = vol.loc[times]
8✔
502
    flx.to_csv(flx_filename, sep=" ", index_label="datetime", date_format="%Y%m%d")
9✔
503
    vol.to_csv(vol_filename, sep=" ", index_label="datetime", date_format="%Y%m%d")
9✔
504
    return flx, vol
9✔
505

506

507
def _write_mflist_ins(ins_filename, df, prefix):
9✔
508
    """write an instruction file for a MODFLOW list file"""
509

510
    dt_str = df.index.map(lambda x: x.strftime("%Y%m%d"))
9✔
511
    with open(ins_filename, "w") as f:
9✔
512
        f.write("pif ~\nl1\n")
9✔
513
        for dt in dt_str:
9✔
514
            f.write("l1 ")
9✔
515
            for col in df.columns:
9✔
516
                obsnme = "{0}_{1}_{2}".format(prefix, col, dt)
9✔
517
                f.write(" w !{0}!".format(obsnme))
9✔
518
            f.write("\n")
9✔
519

520

521
def setup_hds_timeseries(
9✔
522
    bin_file,
523
    kij_dict,
524
    prefix=None,
525
    include_path=False,
526
    model=None,
527
    postprocess_inact=None,
528
    text=None,
529
    fill=None,
530
    precision="single",
531
):
532
    """a function to setup a forward process to extract time-series style values
533
    from a binary modflow binary file (or equivalent format - hds, ucn, sub, cbb, etc).
534

535
    Args:
536
        bin_file (`str`): path and name of existing modflow binary file - headsave, cell budget and MT3D UCN supported.
537
        kij_dict (`dict`): dictionary of site_name: [k,i,j] pairs. For example: `{"wel1":[0,1,1]}`.
538
        prefix (`str`, optional): string to prepend to site_name when forming observation names.  Default is None
539
        include_path (`bool`, optional): flag to setup the binary file processing in directory where the hds_file
540
            is located (if different from where python is running).  This is useful for setting up
541
            the process in separate directory for where python is running.
542
        model (`flopy.mbase`, optional): a `flopy.basemodel` instance.  If passed, the observation names will
543
            have the datetime of the observation appended to them (using the flopy `start_datetime` attribute.
544
            If None, the observation names will have the zero-based stress period appended to them. Default is None.
545
        postprocess_inact (`float`, optional): Inactive value in heads/ucn file e.g. mt.btn.cinit.  If `None`, no
546
            inactive value processing happens.  Default is `None`.
547
        text (`str`): the text record entry in the binary file (e.g. "constant_head").
548
            Used to indicate that the binary file is a MODFLOW cell-by-cell budget file.
549
            If None, headsave or MT3D unformatted concentration file
550
            is assummed.  Default is None
551
        fill (`float`): fill value for NaNs in the extracted timeseries dataframe.  If
552
            `None`, no filling is done, which may yield model run failures as the resulting
553
            processed timeseries CSV file (produced at runtime) may have missing values and
554
            can't be processed with the cooresponding instruction file.  Default is `None`.
555
        precision (`str`): the precision of the binary file.  Can be "single" or "double".
556
            Default is "single".
557

558
    Returns:
559
        tuple containing
560

561
        - **str**: the forward run command to execute the binary file process during model runs.
562

563
        - **pandas.DataFrame**: a dataframe of observation information for use in the pest control file
564

565
    Note:
566
        This function writes hds_timeseries.config that must be in the same
567
        dir where `apply_hds_timeseries()` is called during the forward run
568

569
        Assumes model time units are days
570

571
        This is the companion function of `gw_utils.apply_hds_timeseries()`.
572

573
    """
574

575
    try:
8✔
576
        import flopy
8✔
577
    except Exception as e:
×
578
        print("error importing flopy, returning {0}".format(str(e)))
×
579
        return
×
580

581
    assert os.path.exists(bin_file), "binary file not found"
8✔
582
    iscbc = False
8✔
583
    if text is not None:
8✔
584
        text = text.upper()
8✔
585

586
        try:
8✔
587
            # hack: if model is passed and its None, it trips up CellBudgetFile...
588
            if model is not None:
8✔
589
                bf = flopy.utils.CellBudgetFile(
8✔
590
                    bin_file, precision=precision, model=model
591
                )
592
                iscbc = True
8✔
593
            else:
594
                bf = flopy.utils.CellBudgetFile(bin_file, precision=precision)
8✔
595
                iscbc = True
8✔
596
        except Exception as e:
×
597
            try:
×
598
                if model is not None:
×
599
                    bf = flopy.utils.HeadFile(
×
600
                        bin_file, precision=precision, model=model, text=text
601
                    )
602
                else:
603
                    bf = flopy.utils.HeadFile(bin_file, precision=precision, text=text)
×
604
            except Exception as e1:
×
605
                raise Exception(
×
606
                    "error instantiating binary file as either CellBudgetFile:{0} or as HeadFile with text arg: {1}".format(
607
                        str(e), str(e1)
608
                    )
609
                )
610
        if iscbc:
8✔
611
            tl = [t.decode().strip() for t in bf.textlist]
8✔
612
            if text not in tl:
8✔
613
                raise Exception(
8✔
614
                    "'text' {0} not found in CellBudgetFile.textlist:{1}".format(
615
                        text, tl
616
                    )
617
                )
618
    elif bin_file.lower().endswith(".ucn"):
8✔
619
        try:
8✔
620
            bf = flopy.utils.UcnFile(bin_file, precision=precision)
8✔
621
        except Exception as e:
×
622
            raise Exception("error instantiating UcnFile:{0}".format(str(e)))
×
623
    else:
624
        try:
8✔
625
            bf = flopy.utils.HeadFile(bin_file, precision=precision)
8✔
626
        except Exception as e:
×
627
            raise Exception("error instantiating HeadFile:{0}".format(str(e)))
×
628

629
    if text is None:
8✔
630
        text = "none"
8✔
631

632
    nlay, nrow, ncol = bf.nlay, bf.nrow, bf.ncol
8✔
633

634
    # if include_path:
635
    #    pth = os.path.join(*[p for p in os.path.split(hds_file)[:-1]])
636
    #    config_file = os.path.join(pth,"{0}_timeseries.config".format(hds_file))
637
    # else:
638
    config_file = "{0}_timeseries.config".format(bin_file)
8✔
639
    print("writing config file to {0}".format(config_file))
8✔
640
    if fill is None:
8✔
641
        fill = "none"
8✔
642
    f_config = open(config_file, "w")
8✔
643
    if model is not None:
8✔
644
        if model.dis.itmuni != 4:
8✔
645
            warnings.warn(
×
646
                "setup_hds_timeseries only supports 'days' time units...", PyemuWarning
647
            )
648
        f_config.write(
8✔
649
            "{0},{1},d,{2},{3},{4},{5}\n".format(
650
                os.path.split(bin_file)[-1],
651
                model.start_datetime,
652
                text,
653
                fill,
654
                precision,
655
                iscbc,
656
            )
657
        )
658
        start = pd.to_datetime(model.start_datetime)
8✔
659
    else:
660
        f_config.write(
8✔
661
            "{0},none,none,{1},{2},{3},{4}\n".format(
662
                os.path.split(bin_file)[-1], text, fill, precision, iscbc
663
            )
664
        )
665
    f_config.write("site,k,i,j\n")
8✔
666
    dfs = []
8✔
667

668
    for site, (k, i, j) in kij_dict.items():
8✔
669
        assert k >= 0 and k < nlay, k
8✔
670
        assert i >= 0 and i < nrow, i
8✔
671
        assert j >= 0 and j < ncol, j
8✔
672
        site = site.lower().replace(" ", "")
8✔
673
        if iscbc:
8✔
674
            ts = bf.get_ts((k, i, j), text=text)
8✔
675
            # print(ts)
676
            df = pd.DataFrame(data=ts, columns=["totim", site])
8✔
677
        else:
678
            df = pd.DataFrame(data=bf.get_ts((k, i, j)), columns=["totim", site])
8✔
679

680
        if model is not None:
8✔
681
            dts = start + pd.to_timedelta(df.totim, unit="d")
8✔
682
            df.loc[:, "totim"] = dts
8✔
683
        # print(df)
684
        f_config.write("{0},{1},{2},{3}\n".format(site, k, i, j))
8✔
685
        df.index = df.pop("totim")
8✔
686
        dfs.append(df)
8✔
687

688
    f_config.close()
8✔
689
    df = pd.concat(dfs, axis=1).T
8✔
690
    df.to_csv(bin_file + "_timeseries.processed", sep=" ")
8✔
691
    if model is not None:
8✔
692
        t_str = df.columns.map(lambda x: x.strftime("%Y%m%d"))
8✔
693
    else:
694
        t_str = df.columns.map(lambda x: "{0:08.2f}".format(x))
8✔
695

696
    ins_file = bin_file + "_timeseries.processed.ins"
8✔
697
    print("writing instruction file to {0}".format(ins_file))
8✔
698
    with open(ins_file, "w") as f:
8✔
699
        f.write("pif ~\n")
8✔
700
        f.write("l1 \n")
8✔
701
        for site in df.index:
8✔
702
            # for t in t_str:
703
            f.write("l1 w ")
8✔
704
            # for site in df.columns:
705
            for t in t_str:
8✔
706
                if prefix is not None:
8✔
707
                    obsnme = "{0}_{1}_{2}".format(prefix, site, t)
8✔
708
                else:
709
                    obsnme = "{0}_{1}".format(site, t)
8✔
710
                f.write(" !{0}!".format(obsnme))
8✔
711
            f.write("\n")
8✔
712
    if postprocess_inact is not None:
8✔
713
        _setup_postprocess_hds_timeseries(
8✔
714
            bin_file, df, config_file, prefix=prefix, model=model
715
        )
716
    bd = "."
8✔
717
    if include_path:
8✔
718
        bd = os.getcwd()
8✔
719
        pth = os.path.join(*[p for p in os.path.split(bin_file)[:-1]])
8✔
720
        os.chdir(pth)
8✔
721
    config_file = os.path.split(config_file)[-1]
8✔
722
    try:
8✔
723
        df = apply_hds_timeseries(config_file, postprocess_inact=postprocess_inact)
8✔
724

725
    except Exception as e:
×
726
        os.chdir(bd)
×
727
        raise Exception("error in apply_hds_timeseries(): {0}".format(str(e)))
×
728
    os.chdir(bd)
8✔
729

730
    df = try_process_output_file(ins_file)
8✔
731
    if df is None:
8✔
732
        raise Exception("error processing {0} instruction file".format(ins_file))
8✔
733

734
    df.loc[:, "weight"] = 0.0
8✔
735
    if prefix is not None:
8✔
736
        df.loc[:, "obgnme"] = df.index.map(lambda x: "_".join(x.split("_")[:2]))
8✔
737
    else:
738
        df.loc[:, "obgnme"] = df.index.map(lambda x: x.split("_")[0])
8✔
739
    frun_line = "pyemu.gw_utils.apply_hds_timeseries('{0}',{1})\n".format(
8✔
740
        config_file, postprocess_inact
741
    )
742
    return frun_line, df
8✔
743

744

745
def apply_hds_timeseries(config_file=None, postprocess_inact=None):
9✔
746
    """process a modflow binary file using a previously written
747
    configuration file
748

749
    Args:
750
        config_file (`str`, optional): configuration file written by `pyemu.gw_utils.setup_hds_timeseries`.
751
            If `None`, looks for `hds_timeseries.config`
752
        postprocess_inact (`float`, optional): Inactive value in heads/ucn file e.g. mt.btn.cinit.  If `None`, no
753
            inactive value processing happens.  Default is `None`.
754

755
    Note:
756
        This is the companion function of `gw_utils.setup_hds_timeseries()`.
757

758
    """
759
    import flopy
8✔
760

761
    if config_file is None:
8✔
762
        config_file = "hds_timeseries.config"
×
763

764
    assert os.path.exists(config_file), config_file
8✔
765
    with open(config_file, "r") as f:
8✔
766
        line = f.readline()
8✔
767
        (
8✔
768
            bf_file,
769
            start_datetime,
770
            time_units,
771
            text,
772
            fill,
773
            precision,
774
            _iscbc,
775
        ) = line.strip().split(",")
776
        if len(line.strip().split(",")) == 6:
8✔
777
            (
×
778
                bf_file,
779
                start_datetime,
780
                time_units,
781
                text,
782
                fill,
783
                precision,
784
            ) = line.strip().split(",")
785
            _iscbc = "false"
×
786
        else:
787
            (
8✔
788
                bf_file,
789
                start_datetime,
790
                time_units,
791
                text,
792
                fill,
793
                precision,
794
                _iscbc,
795
            ) = line.strip().split(",")
796
        site_df = pd.read_csv(f)
8✔
797
    text = text.upper()
8✔
798
    if _iscbc.lower().strip() == "false":
8✔
799
        iscbc = False
8✔
800
    elif _iscbc.lower().strip() == "true":
8✔
801
        iscbc = True
8✔
802
    else:
803
        raise Exception(
×
804
            "apply_hds_timeseries() error: unrecognized 'iscbc' string in config file: {0}".format(
805
                _iscbc
806
            )
807
        )
808
    assert os.path.exists(bf_file), "head save file not found"
8✔
809
    if iscbc:
8✔
810
        try:
8✔
811
            bf = flopy.utils.CellBudgetFile(bf_file, precision=precision)
8✔
812
        except Exception as e:
×
813
            raise Exception("error instantiating CellBudgetFile:{0}".format(str(e)))
×
814
    elif bf_file.lower().endswith(".ucn"):
8✔
815
        try:
8✔
816
            bf = flopy.utils.UcnFile(bf_file, precision=precision)
8✔
817
        except Exception as e:
×
818
            raise Exception("error instantiating UcnFile:{0}".format(str(e)))
×
819
    else:
820
        try:
8✔
821
            if text != "NONE":
8✔
822
                bf = flopy.utils.HeadFile(bf_file, text=text, precision=precision)
×
823
            else:
824
                bf = flopy.utils.HeadFile(bf_file, precision=precision)
8✔
825
        except Exception as e:
×
826
            raise Exception("error instantiating HeadFile:{0}".format(str(e)))
×
827

828
    nlay, nrow, ncol = bf.nlay, bf.nrow, bf.ncol
8✔
829

830
    dfs = []
8✔
831
    for site, k, i, j in zip(site_df.site, site_df.k, site_df.i, site_df.j):
8✔
832
        assert k >= 0 and k < nlay
8✔
833
        assert i >= 0 and i < nrow
8✔
834
        assert j >= 0 and j < ncol
8✔
835
        if iscbc:
8✔
836
            df = pd.DataFrame(
8✔
837
                data=bf.get_ts((k, i, j), text=text), columns=["totim", site]
838
            )
839
        else:
840
            df = pd.DataFrame(data=bf.get_ts((k, i, j)), columns=["totim", site])
8✔
841
        df.index = df.pop("totim")
8✔
842
        dfs.append(df)
8✔
843
    df = pd.concat(dfs, axis=1).T
8✔
844
    if df.shape != df.dropna().shape:
8✔
845
        warnings.warn("NANs in processed timeseries file", PyemuWarning)
8✔
846
        if fill.upper() != "NONE":
8✔
847
            fill = float(fill)
8✔
848
            df.fillna(fill, inplace=True)
8✔
849
    # print(df)
850
    df.to_csv(bf_file + "_timeseries.processed", sep=" ")
8✔
851
    if postprocess_inact is not None:
8✔
852
        _apply_postprocess_hds_timeseries(config_file, postprocess_inact)
8✔
853
    return df
8✔
854

855

856
def _setup_postprocess_hds_timeseries(
9✔
857
    hds_file, df, config_file, prefix=None, model=None
858
):
859
    """Dirty function to setup post processing concentrations in inactive/dry cells"""
860

861
    warnings.warn(
8✔
862
        "Setting up post processing of hds or ucn timeseries obs. "
863
        "Prepending 'pp' to obs name may cause length to exceed 20 chars",
864
        PyemuWarning,
865
    )
866
    if model is not None:
8✔
867
        t_str = df.columns.map(lambda x: x.strftime("%Y%m%d"))
8✔
868
    else:
869
        t_str = df.columns.map(lambda x: "{0:08.2f}".format(x))
×
870
    if prefix is not None:
8✔
871
        prefix = "pp{0}".format(prefix)
8✔
872
    else:
873
        prefix = "pp"
×
874
    ins_file = hds_file + "_timeseries.post_processed.ins"
8✔
875
    print("writing instruction file to {0}".format(ins_file))
8✔
876
    with open(ins_file, "w") as f:
8✔
877
        f.write("pif ~\n")
8✔
878
        f.write("l1 \n")
8✔
879
        for site in df.index:
8✔
880
            f.write("l1 w ")
8✔
881
            # for site in df.columns:
882
            for t in t_str:
8✔
883
                obsnme = "{0}{1}_{2}".format(prefix, site, t)
8✔
884
                f.write(" !{0}!".format(obsnme))
8✔
885
            f.write("\n")
8✔
886
    frun_line = "pyemu.gw_utils._apply_postprocess_hds_timeseries('{0}')\n".format(
8✔
887
        config_file
888
    )
889
    return frun_line
8✔
890

891

892
def _apply_postprocess_hds_timeseries(config_file=None, cinact=1e30):
9✔
893
    """private function to post processing binary files"""
894
    import flopy
8✔
895

896
    if config_file is None:
8✔
897
        config_file = "hds_timeseries.config"
×
898

899
    assert os.path.exists(config_file), config_file
8✔
900
    with open(config_file, "r") as f:
8✔
901
        line = f.readline()
8✔
902
        (
8✔
903
            hds_file,
904
            start_datetime,
905
            time_units,
906
            text,
907
            fill,
908
            precision,
909
            _iscbc,
910
        ) = line.strip().split(",")
911
        if len(line.strip().split(",")) == 6:
8✔
912
            (
×
913
                hds_file,
914
                start_datetime,
915
                time_units,
916
                text,
917
                fill,
918
                precision,
919
            ) = line.strip().split(",")
920
            _iscbc = "false"
×
921
        else:
922
            (
8✔
923
                hds_file,
924
                start_datetime,
925
                time_units,
926
                text,
927
                fill,
928
                precision,
929
                _iscbc,
930
            ) = line.strip().split(",")
931
        site_df = pd.read_csv(f)
8✔
932

933
    # print(site_df)
934
    text = text.upper()
8✔
935
    assert os.path.exists(hds_file), "head save file not found"
8✔
936
    if hds_file.lower().endswith(".ucn"):
8✔
937
        try:
8✔
938
            hds = flopy.utils.UcnFile(hds_file, precision=precision)
8✔
939
        except Exception as e:
×
940
            raise Exception("error instantiating UcnFile:{0}".format(str(e)))
×
941
    else:
942
        try:
×
943
            if text != "NONE":
×
944
                hds = flopy.utils.HeadFile(hds_file, text=text, precision=precision)
×
945
            else:
946
                hds = flopy.utils.HeadFile(hds_file, precision=precision)
×
947
        except Exception as e:
×
948
            raise Exception("error instantiating HeadFile:{0}".format(str(e)))
×
949

950
    nlay, nrow, ncol = hds.nlay, hds.nrow, hds.ncol
8✔
951

952
    dfs = []
8✔
953
    for site, k, i, j in zip(site_df.site, site_df.k, site_df.i, site_df.j):
8✔
954
        assert k >= 0 and k < nlay
8✔
955
        assert i >= 0 and i < nrow
8✔
956
        assert j >= 0 and j < ncol
8✔
957
        if text.upper() != "NONE":
8✔
958
            df = pd.DataFrame(data=hds.get_ts((k, i, j)), columns=["totim", site])
×
959
        else:
960
            df = pd.DataFrame(data=hds.get_ts((k, i, j)), columns=["totim", site])
8✔
961
        df.index = df.pop("totim")
8✔
962
        inact_obs = df[site].apply(lambda x: np.isclose(x, cinact))
8✔
963
        if inact_obs.sum() > 0:
8✔
964
            assert k + 1 < nlay, "Inactive observation in lowest layer"
8✔
965
            df_lower = pd.DataFrame(
8✔
966
                data=hds.get_ts((k + 1, i, j)), columns=["totim", site]
967
            )
968
            df_lower.index = df_lower.pop("totim")
8✔
969
            df.loc[inact_obs] = df_lower.loc[inact_obs]
8✔
970
            print(
8✔
971
                "{0} observation(s) post-processed for site {1} at kij ({2},{3},{4})".format(
972
                    inact_obs.sum(), site, k, i, j
973
                )
974
            )
975
        dfs.append(df)
8✔
976
    df = pd.concat(dfs, axis=1).T
8✔
977
    # print(df)
978
    df.to_csv(hds_file + "_timeseries.post_processed", sep=" ")
8✔
979
    return df
8✔
980

981

982
def setup_hds_obs(
9✔
983
    hds_file,
984
    kperk_pairs=None,
985
    skip=None,
986
    prefix="hds",
987
    text="head",
988
    precision="single",
989
    include_path=False,
990
):
991
    """a function to setup using all values from a layer-stress period
992
    pair for observations.
993

994
    Args:
995
        hds_file (`str`): path and name of an existing MODFLOW head-save file.
996
            If the hds_file endswith 'ucn', then the file is treated as a UcnFile type.
997
        kperk_pairs ([(int,int)]): a list of len two tuples which are pairs of kper
998
            (zero-based stress period index) and k (zero-based layer index) to
999
            setup observations for.  If None, then all layers and stress period records
1000
            found in the file will be used.  Caution: a shit-ton of observations may be produced!
1001
        skip (variable, optional): a value or function used to determine which values
1002
            to skip when setting up observations.  If np.scalar(skip)
1003
            is True, then values equal to skip will not be used.
1004
            If skip can also be a np.ndarry with dimensions equal to the model.
1005
            Observations are set up only for cells with Non-zero values in the array.
1006
            If not np.ndarray or np.scalar(skip), then skip will be treated as a lambda function that
1007
            returns np.NaN if the value should be skipped.
1008
        prefix (`str`): the prefix to use for the observation names. default is "hds".
1009
        text (`str`): the text tag the flopy HeadFile instance.  Default is "head"
1010
        precison (`str`): the precision string for the flopy HeadFile instance.  Default is "single"
1011
        include_path (`bool`, optional): flag to setup the binary file processing in directory where the hds_file
1012
        is located (if different from where python is running).  This is useful for setting up
1013
            the process in separate directory for where python is running.
1014

1015
    Returns:
1016
        tuple containing
1017

1018
        - **str**: the forward run script line needed to execute the headsave file observation
1019
          operation
1020
        - **pandas.DataFrame**: a dataframe of pest control file information
1021

1022
    Note:
1023
        Writes an instruction file and a _setup_ csv used construct a control file.
1024

1025
        This is the companion function to `gw_utils.apply_hds_obs()`.
1026

1027
    """
1028
    try:
9✔
1029
        import flopy
9✔
1030
    except Exception as e:
×
1031
        print("error importing flopy, returning {0}".format(str(e)))
×
1032
        return
×
1033

1034
    assert os.path.exists(hds_file), "head save file not found"
9✔
1035
    if hds_file.lower().endswith(".ucn"):
9✔
1036
        try:
8✔
1037
            hds = flopy.utils.UcnFile(hds_file)
8✔
1038
        except Exception as e:
×
1039
            raise Exception("error instantiating UcnFile:{0}".format(str(e)))
×
1040
    elif text.lower() == "headu":
9✔
1041
        try:
8✔
1042
            hds = flopy.utils.HeadUFile(hds_file, text=text, precision=precision)
8✔
1043
        except Exception as e:
×
1044
            raise Exception("error instantiating HeadFile:{0}".format(str(e)))
×
1045
    else:
1046
        try:
9✔
1047
            hds = flopy.utils.HeadFile(hds_file, text=text, precision=precision)
9✔
1048
        except Exception as e:
×
1049
            raise Exception("error instantiating HeadFile:{0}".format(str(e)))
×
1050

1051
    if kperk_pairs is None:
9✔
1052
        kperk_pairs = []
8✔
1053
        for kstp, kper in hds.kstpkper:
8✔
1054
            kperk_pairs.extend([(kper - 1, k) for k in range(hds.nlay)])
8✔
1055
    if len(kperk_pairs) == 2:
9✔
1056
        try:
8✔
1057
            if len(kperk_pairs[0]) == 2:
8✔
1058
                pass
×
1059
        except:
8✔
1060
            kperk_pairs = [kperk_pairs]
8✔
1061

1062
    # if start_datetime is not None:
1063
    #    start_datetime = pd.to_datetime(start_datetime)
1064
    #    dts = start_datetime + pd.to_timedelta(hds.times,unit='d')
1065
    data = {}
9✔
1066
    kpers = [kper - 1 for kstp, kper in hds.kstpkper]
9✔
1067
    for kperk_pair in kperk_pairs:
9✔
1068
        kper, k = kperk_pair
9✔
1069
        assert kper in kpers, "kper not in hds:{0}".format(kper)
9✔
1070
        assert k in range(hds.nlay), "k not in hds:{0}".format(k)
9✔
1071
        kstp = last_kstp_from_kper(hds, kper)
9✔
1072
        d = hds.get_data(kstpkper=(kstp, kper))[k]
9✔
1073

1074
        data["{0}_{1}".format(kper, k)] = d.flatten()
9✔
1075
        # data[(kper,k)] = d.flatten()
1076
    idx, iidx, jidx = [], [], []
9✔
1077
    for _ in range(len(data)):
9✔
1078
        for i in range(hds.nrow):
9✔
1079
            iidx.extend([i for _ in range(hds.ncol)])
9✔
1080
            jidx.extend([j for j in range(hds.ncol)])
9✔
1081
            idx.extend(["i{0:04d}_j{1:04d}".format(i, j) for j in range(hds.ncol)])
9✔
1082
    idx = idx[: hds.nrow * hds.ncol]
9✔
1083

1084
    df = pd.DataFrame(data, index=idx)
9✔
1085
    data_cols = list(df.columns)
9✔
1086
    data_cols.sort()
9✔
1087
    # df.loc[:,"iidx"] = iidx
1088
    # df.loc[:,"jidx"] = jidx
1089
    if skip is not None:
9✔
1090
        for col in data_cols:
9✔
1091
            if np.isscalar(skip):
9✔
1092
                df.loc[df.loc[:, col] == skip, col] = np.NaN
8✔
1093
            elif isinstance(skip, np.ndarray):
9✔
1094
                assert (
8✔
1095
                    skip.ndim >= 2
1096
                ), "skip passed as {}D array, At least 2D (<= 4D) array required".format(
1097
                    skip.ndim
1098
                )
1099
                assert skip.shape[-2:] == (
8✔
1100
                    hds.nrow,
1101
                    hds.ncol,
1102
                ), "Array dimensions of arg. skip needs to match model dimensions ({0},{1}). ({2},{3}) passed".format(
1103
                    hds.nrow, hds.ncol, skip.shape[-2], skip.shape[-1]
1104
                )
1105
                if skip.ndim == 2:
8✔
1106
                    print(
8✔
1107
                        "2D array passed for skip, assuming constant for all layers and kper"
1108
                    )
1109
                    skip = np.tile(skip, (len(kpers), hds.nlay, 1, 1))
8✔
1110
                if skip.ndim == 3:
8✔
1111
                    print("3D array passed for skip, assuming constant for all kper")
8✔
1112
                    skip = np.tile(skip, (len(kpers), 1, 1, 1))
8✔
1113
                kper, k = [int(c) for c in col.split("_")]
8✔
1114
                df.loc[
8✔
1115
                    df.index.map(
1116
                        lambda x: skip[
1117
                            kper,
1118
                            k,
1119
                            int(x.split("_")[0].strip("i")),
1120
                            int(x.split("_")[1].strip("j")),
1121
                        ]
1122
                        == 0
1123
                    ),
1124
                    col,
1125
                ] = np.NaN
1126
            else:
1127
                df.loc[:, col] = df.loc[:, col].apply(skip)
9✔
1128

1129
    # melt to long form
1130
    df = df.melt(var_name="kperk", value_name="obsval")
9✔
1131
    # set row and col identifies
1132
    df.loc[:, "iidx"] = iidx
9✔
1133
    df.loc[:, "jidx"] = jidx
9✔
1134
    # drop nans from skip
1135
    df = df.dropna()
9✔
1136
    # set some additional identifiers
1137
    df.loc[:, "kper"] = df.kperk.apply(lambda x: int(x.split("_")[0]))
9✔
1138
    df.loc[:, "kidx"] = df.pop("kperk").apply(lambda x: int(x.split("_")[1]))
9✔
1139

1140
    # form obs names
1141
    # def get_kper_str(kper):
1142
    #    if start_datetime is not None:
1143
    #        return  dts[int(kper)].strftime("%Y%m%d")
1144
    #    else:
1145
    #        return "kper{0:04.0f}".format(kper)
1146
    fmt = prefix + "_{0:02.0f}_{1:03.0f}_{2:03.0f}_{3:03.0f}"
9✔
1147
    # df.loc[:,"obsnme"] = df.apply(lambda x: fmt.format(x.kidx,x.iidx,x.jidx,
1148
    #                                                   get_kper_str(x.kper)),axis=1)
1149
    df.loc[:, "obsnme"] = df.apply(
9✔
1150
        lambda x: fmt.format(x.kidx, x.iidx, x.jidx, x.kper), axis=1
1151
    )
1152

1153
    df.loc[:, "ins_str"] = df.obsnme.apply(lambda x: "l1 w !{0}!".format(x))
9✔
1154
    df.loc[:, "obgnme"] = prefix
9✔
1155
    # write the instruction file
1156
    with open(hds_file + ".dat.ins", "w") as f:
9✔
1157
        f.write("pif ~\nl1\n")
9✔
1158
        df.ins_str.to_string(f, index=False, header=False)
9✔
1159

1160
    # write the corresponding output file
1161
    df.loc[:, ["obsnme", "obsval"]].to_csv(hds_file + ".dat", sep=" ", index=False)
9✔
1162

1163
    hds_path = os.path.dirname(hds_file)
9✔
1164
    setup_file = os.path.join(
9✔
1165
        hds_path, "_setup_{0}.csv".format(os.path.split(hds_file)[-1])
1166
    )
1167
    df.to_csv(setup_file)
9✔
1168
    if not include_path:
9✔
1169
        hds_file = os.path.split(hds_file)[-1]
9✔
1170
    fwd_run_line = (
9✔
1171
        "pyemu.gw_utils.apply_hds_obs('{0}',precision='{1}',text='{2}')\n".format(
1172
            hds_file, precision, text
1173
        )
1174
    )
1175
    df.index = df.obsnme
9✔
1176
    return fwd_run_line, df
9✔
1177

1178

1179
def last_kstp_from_kper(hds, kper):
9✔
1180
    """function to find the last time step (kstp) for a
1181
    give stress period (kper) in a modflow head save file.
1182

1183
    Args:
1184
        hds (`flopy.utils.HeadFile`): head save file
1185
        kper (`int`): the zero-index stress period number
1186

1187
    Returns:
1188
        **int**: the zero-based last time step during stress period
1189
        kper in the head save file
1190

1191
    """
1192
    # find the last kstp with this kper
1193
    kstp = -1
9✔
1194
    for kkstp, kkper in hds.kstpkper:
9✔
1195
        if kkper == kper + 1 and kkstp > kstp:
9✔
1196
            kstp = kkstp
9✔
1197
    if kstp == -1:
9✔
1198
        raise Exception("kstp not found for kper {0}".format(kper))
×
1199
    kstp -= 1
9✔
1200
    return kstp
9✔
1201

1202

1203
def apply_hds_obs(hds_file, inact_abs_val=1.0e20, precision="single", text="head"):
9✔
1204
    """process a modflow head save file.  A companion function to
1205
    `gw_utils.setup_hds_obs()` that is called during the forward run process
1206

1207
    Args:
1208
        hds_file (`str`): a modflow head save filename. if hds_file ends with 'ucn',
1209
            then the file is treated as a UcnFile type.
1210
        inact_abs_val (`float`, optional): the value that marks the mininum and maximum
1211
            active value.  values in the headsave file greater than `inact_abs_val` or less
1212
            than -`inact_abs_val` are reset to `inact_abs_val`
1213
    Returns:
1214
        **pandas.DataFrame**: a dataframe with extracted simulated values.
1215
    Note:
1216
        This is the companion function to `gw_utils.setup_hds_obs()`.
1217

1218
    """
1219

1220
    try:
9✔
1221
        import flopy
9✔
1222
    except Exception as e:
×
1223
        raise Exception("apply_hds_obs(): error importing flopy: {0}".format(str(e)))
×
1224
    from .. import pst_utils
9✔
1225

1226
    assert os.path.exists(hds_file)
9✔
1227
    out_file = hds_file + ".dat"
9✔
1228
    ins_file = out_file + ".ins"
9✔
1229
    assert os.path.exists(ins_file)
9✔
1230
    df = pd.DataFrame({"obsnme": pst_utils.parse_ins_file(ins_file)})
9✔
1231
    df.index = df.obsnme
9✔
1232

1233
    # populate metdata
1234
    items = ["k", "i", "j", "kper"]
9✔
1235
    for i, item in enumerate(items):
9✔
1236
        df.loc[:, item] = df.obsnme.apply(lambda x: int(x.split("_")[i + 1]))
9✔
1237

1238
    if hds_file.lower().endswith("ucn"):
9✔
1239
        hds = flopy.utils.UcnFile(hds_file)
8✔
1240
    elif text.lower() == "headu":
9✔
1241
        hds = flopy.utils.HeadUFile(hds_file)
8✔
1242
    else:
1243
        hds = flopy.utils.HeadFile(hds_file, precision=precision, text=text)
9✔
1244
    kpers = df.kper.unique()
9✔
1245
    df.loc[:, "obsval"] = np.NaN
9✔
1246
    for kper in kpers:
9✔
1247
        kstp = last_kstp_from_kper(hds, kper)
9✔
1248
        data = hds.get_data(kstpkper=(kstp, kper))
9✔
1249
        # jwhite 15jan2018 fix for really large values that are getting some
1250
        # trash added to them...
1251
        if text.lower() != "headu":
9✔
1252
            data[np.isnan(data)] = 0.0
9✔
1253
            data[data > np.abs(inact_abs_val)] = np.abs(inact_abs_val)
9✔
1254
            data[data < -np.abs(inact_abs_val)] = -np.abs(inact_abs_val)
9✔
1255
            df_kper = df.loc[df.kper == kper, :]
9✔
1256
            df.loc[df_kper.index, "obsval"] = data[df_kper.k, df_kper.i, df_kper.j]
9✔
1257
        else:
1258

1259
            df_kper = df.loc[df.kper == kper, :]
8✔
1260
            for k, d in enumerate(data):
8✔
1261
                d[np.isnan(d)] = 0.0
8✔
1262
                d[d > np.abs(inact_abs_val)] = np.abs(inact_abs_val)
8✔
1263
                d[d < -np.abs(inact_abs_val)] = -np.abs(inact_abs_val)
8✔
1264
                df_kperk = df_kper.loc[df_kper.k == k, :]
8✔
1265
                df.loc[df_kperk.index, "obsval"] = d[df_kperk.i]
8✔
1266

1267
    assert df.dropna().shape[0] == df.shape[0]
9✔
1268
    df.loc[:, ["obsnme", "obsval"]].to_csv(out_file, index=False, sep=" ")
9✔
1269
    return df
9✔
1270

1271

1272
def setup_sft_obs(sft_file, ins_file=None, start_datetime=None, times=None, ncomp=1):
9✔
1273
    """writes a post-processor and instruction file for a mt3d-usgs sft output file
1274

1275
    Args:
1276
        sft_file (`str`): path and name of an existing sft output file (ASCII)
1277
        ins_file (`str`, optional): the name of the instruction file to create.
1278
            If None, the name is `sft_file`+".ins".  Default is `None`.
1279
        start_datetime (`str`): a pandas.to_datetime() compatible str.  If not None,
1280
            then the resulting observation names have the datetime
1281
            suffix.  If None, the suffix is the output totim.  Default
1282
            is `None`.
1283
        times ([`float`]): a list of times to make observations for.  If None, all times
1284
            found in the file are used. Default is None.
1285
        ncomp (`int`): number of components in transport model. Default is 1.
1286

1287
    Returns:
1288
        **pandas.DataFrame**: a dataframe with observation names and values for the sft simulated
1289
        concentrations.
1290

1291
    Note:
1292
        This is the companion function to `gw_utils.apply_sft_obs()`.
1293

1294
    """
1295

1296
    df = pd.read_csv(sft_file, skiprows=1, delim_whitespace=True)
8✔
1297
    df.columns = [c.lower().replace("-", "_") for c in df.columns]
8✔
1298
    if times is None:
8✔
1299
        times = df.time.unique()
×
1300
    missing = []
8✔
1301
    utimes = df.time.unique()
8✔
1302
    for t in times:
8✔
1303
        if t not in utimes:
8✔
1304
            missing.append(str(t))
×
1305
    if len(missing) > 0:
8✔
1306
        print(df.time)
×
1307
        raise Exception("the following times are missing:{0}".format(",".join(missing)))
×
1308
    with open("sft_obs.config", "w") as f:
8✔
1309
        f.write(sft_file + "\n")
8✔
1310
        [f.write("{0:15.6E}\n".format(t)) for t in times]
8✔
1311
    df = apply_sft_obs()
8✔
1312
    utimes = df.time.unique()
8✔
1313
    for t in times:
8✔
1314
        assert t in utimes, "time {0} missing in processed dataframe".format(t)
8✔
1315
    idx = df.time.apply(lambda x: x in times)
8✔
1316
    if start_datetime is not None:
8✔
1317
        start_datetime = pd.to_datetime(start_datetime)
8✔
1318
        df["time_str"] = pd.to_timedelta(df.time, unit="d") + start_datetime
8✔
1319
        df["time_str"] = df.time_str.dt.strftime("%Y%m%d")
8✔
1320
        # print(df.time_str)
1321
    else:
1322
        df["time_str"] = df.time.apply(lambda x: f"{x:08.2f}")
×
1323
    df["ins_str"] = "l1\n"
8✔
1324
    # check for multiple components
1325
    # df_times = df.loc[idx, :]
1326
    df.loc[:, "icomp"] = 1
8✔
1327
    # icomp_idx = list(df.columns).index("icomp")
1328
    for t in times:
8✔
1329
        df_time = df.loc[df.time == t, :].copy()
8✔
1330
        vc = df_time.sfr_node.value_counts()
8✔
1331
        ncomp = vc.max()
8✔
1332
        assert np.all(vc.values == ncomp)
8✔
1333
        nstrm = df_time.shape[0] / ncomp
8✔
1334
        for icomp in range(ncomp):
8✔
1335
            s = int(nstrm * (icomp))
8✔
1336
            e = int(nstrm * (icomp + 1))
8✔
1337
            idxs = df_time.iloc[s:e, :].index
8✔
1338
            # df_time.iloc[nstrm*(icomp):nstrm*(icomp+1),icomp_idx.loc["icomp"] = int(icomp+1)
1339
            df_time.loc[idxs, "icomp"] = int(icomp + 1)
8✔
1340

1341
        # df.loc[df_time.index,"ins_str"] = df_time.apply(lambda x: "l1 w w !sfrc{0}_{1}_{2}! !swgw{0}_{1}_{2}! !gwcn{0}_{1}_{2}!\n".\
1342
        #                                 format(x.sfr_node,x.icomp,x.time_str),axis=1)
1343
        df.loc[df_time.index, "ins_str"] = df_time.apply(
8✔
1344
            lambda x: "l1 w w !sfrc{0}_{1}_{2}!\n".format(
1345
                x.sfr_node, x.icomp, x.time_str
1346
            ),
1347
            axis=1,
1348
        )
1349
        #print(df)
1350
        #print(df.ins_str)
1351
    df.index = np.arange(df.shape[0])
8✔
1352
    if ins_file is None:
8✔
1353
        ins_file = sft_file + ".processed.ins"
8✔
1354

1355
    with open(ins_file, "w") as f:
8✔
1356
        f.write("pif ~\nl1\n")
8✔
1357
        [f.write(i) for i in df.ins_str]
8✔
1358
    # df = try_process_ins_file(ins_file,sft_file+".processed")
1359
    df = try_process_output_file(ins_file, sft_file + ".processed")
8✔
1360
    return df
8✔
1361

1362

1363
def apply_sft_obs():
9✔
1364
    """process an mt3d-usgs sft ASCII output file using a previous-written
1365
    config file
1366

1367
    Returns:
1368
        **pandas.DataFrame**: a dataframe of extracted simulated outputs
1369

1370
    Note:
1371
        This is the companion function to `gw_utils.setup_sft_obs()`.
1372

1373
    """
1374
    # this is for dealing with the missing 'e' problem
1375
    def try_cast(x):
8✔
1376
        try:
8✔
1377
            return float(x)
8✔
1378
        except:
×
1379
            return 0.0
×
1380

1381
    times = []
8✔
1382
    with open("sft_obs.config") as f:
8✔
1383
        sft_file = f.readline().strip()
8✔
1384
        for line in f:
8✔
1385
            times.append(float(line.strip()))
8✔
1386
    df = pd.read_csv(sft_file, skiprows=1, delim_whitespace=True)  # ,nrows=10000000)
8✔
1387
    df.columns = [c.lower().replace("-", "_") for c in df.columns]
8✔
1388
    df = df.loc[df.time.apply(lambda x: x in times), :]
8✔
1389
    # print(df.dtypes)
1390
    # normalize
1391
    for c in df.columns:
8✔
1392
        # print(c)
1393
        if not "node" in c:
8✔
1394
            df.loc[:, c] = df.loc[:, c].apply(try_cast)
8✔
1395
        # print(df.loc[df.loc[:,c].apply(lambda x : type(x) == str),:])
1396
        if df.dtypes[c] == float:
8✔
1397
            df.loc[df.loc[:, c] < 1e-30, c] = 0.0
8✔
1398
            df.loc[df.loc[:, c] > 1e30, c] = 1.0e30
8✔
1399
    df.loc[:, "sfr_node"] = df.sfr_node.apply(np.int64)
8✔
1400

1401
    df.to_csv(sft_file + ".processed", sep=" ", index=False)
8✔
1402
    return df
8✔
1403

1404

1405
def setup_sfr_seg_parameters(
9✔
1406
    nam_file, model_ws=".", par_cols=None, tie_hcond=True, include_temporal_pars=None
1407
):
1408
    """Setup multiplier parameters for SFR segment data.
1409

1410
    Args:
1411
        nam_file (`str`): MODFLOw name file.  DIS, BAS, and SFR must be
1412
            available as pathed in the nam_file.  Optionally, `nam_file` can be
1413
            an existing `flopy.modflow.Modflow`.
1414
        model_ws (`str`): model workspace for flopy to load the MODFLOW model from
1415
        par_cols ([`str`]): a list of segment data entires to parameterize
1416
        tie_hcond (`bool`):  flag to use same mult par for hcond1 and hcond2 for a
1417
            given segment.  Default is `True`.
1418
        include_temporal_pars ([`str`]):  list of spatially-global multipliers to set up for
1419
            each stress period.  Default is None
1420

1421
    Returns:
1422
        **pandas.DataFrame**: a dataframe with useful parameter setup information
1423

1424
    Note:
1425
        This function handles the standard input case, not all the cryptic SFR options.  Loads the
1426
        dis, bas, and sfr files with flopy using model_ws.
1427

1428
        This is the companion function to `gw_utils.apply_sfr_seg_parameters()` .
1429
        The number (and numbering) of segment data entries must consistent across
1430
        all stress periods.
1431

1432
        Writes `nam_file` +"_backup_.sfr" as the backup of the original sfr file
1433
        Skips values = 0.0 since multipliers don't work for these
1434

1435
    """
1436

1437
    try:
9✔
1438
        import flopy
9✔
1439
    except Exception as e:
×
1440
        return
×
1441
    if par_cols is None:
9✔
1442
        par_cols = ["flow", "runoff", "hcond1", "pptsw"]
9✔
1443
    if tie_hcond:
9✔
1444
        if "hcond1" not in par_cols or "hcond2" not in par_cols:
9✔
1445
            tie_hcond = False
9✔
1446

1447
    if isinstance(nam_file, flopy.modflow.mf.Modflow) and nam_file.sfr is not None:
9✔
1448
        m = nam_file
9✔
1449
        nam_file = m.namefile
9✔
1450
        model_ws = m.model_ws
9✔
1451
    else:
1452
        # load MODFLOW model # is this needed? could we just pass the model if it has already been read in?
1453
        m = flopy.modflow.Modflow.load(
8✔
1454
            nam_file, load_only=["sfr"], model_ws=model_ws, check=False, forgive=False
1455
        )
1456
    if include_temporal_pars:
9✔
1457
        if include_temporal_pars is True:
8✔
1458
            tmp_par_cols = {col: range(m.dis.nper) for col in par_cols}
8✔
1459
        elif isinstance(include_temporal_pars, str):
8✔
1460
            tmp_par_cols = {include_temporal_pars: range(m.dis.nper)}
×
1461
        elif isinstance(include_temporal_pars, list):
8✔
1462
            tmp_par_cols = {col: range(m.dis.nper) for col in include_temporal_pars}
8✔
1463
        elif isinstance(include_temporal_pars, dict):
×
1464
            tmp_par_cols = include_temporal_pars
×
1465
        include_temporal_pars = True
8✔
1466
    else:
1467
        tmp_par_cols = {}
1✔
1468
        include_temporal_pars = False
1✔
1469

1470
    # make backup copy of sfr file
1471
    shutil.copy(
9✔
1472
        os.path.join(model_ws, m.sfr.file_name[0]),
1473
        os.path.join(model_ws, nam_file + "_backup_.sfr"),
1474
    )
1475

1476
    # get the segment data (dict)
1477
    segment_data = m.sfr.segment_data
9✔
1478
    shape = segment_data[list(segment_data.keys())[0]].shape
9✔
1479
    # check
1480
    for kper, seg_data in m.sfr.segment_data.items():
9✔
1481
        assert (
9✔
1482
            seg_data.shape == shape
1483
        ), "cannot use: seg data must have the same number of entires for all kpers"
1484
    seg_data_col_order = list(seg_data.dtype.names)
9✔
1485
    # convert segment_data dictionary to multi index df - this could get ugly
1486
    reform = {
9✔
1487
        (k, c): segment_data[k][c]
1488
        for k in segment_data.keys()
1489
        for c in segment_data[k].dtype.names
1490
    }
1491
    seg_data_all_kper = pd.DataFrame.from_dict(reform)
9✔
1492
    seg_data_all_kper.columns.names = ["kper", "col"]
9✔
1493

1494
    # extract the first seg data kper to a dataframe
1495
    seg_data = seg_data_all_kper[0].copy()  # pd.DataFrame.from_records(seg_data)
9✔
1496

1497
    # make sure all par cols are found and search of any data in kpers
1498
    missing = []
9✔
1499
    cols = par_cols.copy()
9✔
1500
    for par_col in set(par_cols + list(tmp_par_cols.keys())):
9✔
1501
        if par_col not in seg_data.columns:
9✔
1502
            if par_col in cols:
×
1503
                missing.append(cols.pop(cols.index(par_col)))
×
1504
            if par_col in tmp_par_cols.keys():
×
1505
                _ = tmp_par_cols.pop(par_col)
×
1506
        # look across all kper in multiindex df to check for values entry - fill with absmax should capture entries
1507
        else:
1508
            seg_data.loc[:, par_col] = (
9✔
1509
                seg_data_all_kper.loc[:, (slice(None), par_col)]
1510
                    .abs().groupby(level=1, axis=1).max()
1511
            )
1512
    if len(missing) > 0:
9✔
1513
        warnings.warn(
×
1514
            "the following par_cols were not found in segment data: {0}".format(
1515
                ",".join(missing)
1516
            ),
1517
            PyemuWarning,
1518
        )
1519
        if len(missing) >= len(par_cols):
×
1520
            warnings.warn(
×
1521
                "None of the passed par_cols ({0}) were found in segment data.".format(
1522
                    ",".join(par_cols)
1523
                ),
1524
                PyemuWarning,
1525
            )
1526
    seg_data = seg_data[seg_data_col_order]  # reset column orders to inital
9✔
1527
    seg_data_org = seg_data.copy()
9✔
1528
    seg_data.to_csv(os.path.join(model_ws, "sfr_seg_pars.dat"), sep=",")
9✔
1529

1530
    # the data cols not to parameterize
1531
    # better than a column indexer as pandas can change column orders
1532
    idx_cols = ["nseg", "icalc", "outseg", "iupseg", "iprior", "nstrpts"]
9✔
1533
    notpar_cols = [c for c in seg_data.columns if c not in cols + idx_cols]
9✔
1534

1535
    # process par cols
1536
    tpl_str, pvals = [], []
9✔
1537
    if include_temporal_pars:
9✔
1538
        tmp_pnames, tmp_tpl_str = [], []
8✔
1539
        tmp_df = pd.DataFrame(
8✔
1540
            data={c: 1.0 for c in tmp_par_cols.keys()},
1541
            index=list(m.sfr.segment_data.keys()),
1542
        )
1543
        tmp_df.sort_index(inplace=True)
8✔
1544
        tmp_df.to_csv(os.path.join(model_ws, "sfr_seg_temporal_pars.dat"))
8✔
1545
    for par_col in set(cols + list(tmp_par_cols.keys())):
9✔
1546
        print(par_col)
9✔
1547
        prefix = par_col
9✔
1548
        if tie_hcond and par_col == "hcond2":
9✔
1549
            prefix = "hcond1"
×
1550
        if seg_data.loc[:, par_col].sum() == 0.0:
9✔
1551
            print("all zeros for {0}...skipping...".format(par_col))
9✔
1552
            # seg_data.loc[:,par_col] = 1
1553
            # all zero so no need to set up
1554
            if par_col in cols:
9✔
1555
                # - add to notpar
1556
                notpar_cols.append(cols.pop(cols.index(par_col)))
9✔
1557
            if par_col in tmp_par_cols.keys():
9✔
1558
                _ = tmp_par_cols.pop(par_col)
8✔
1559
        if par_col in cols:
9✔
1560
            seg_data.loc[:, par_col] = seg_data.apply(
9✔
1561
                lambda x: "~    {0}_{1:04d}   ~".format(prefix, int(x.nseg))
1562
                if float(x[par_col]) != 0.0
1563
                else "1.0",
1564
                axis=1,
1565
            )
1566
            org_vals = seg_data_org.loc[seg_data_org.loc[:, par_col] != 0.0, par_col]
9✔
1567
            pnames = seg_data.loc[org_vals.index, par_col]
9✔
1568
            pvals.extend(list(org_vals.values))
9✔
1569
            tpl_str.extend(list(pnames.values))
9✔
1570
        if par_col in tmp_par_cols.keys():
9✔
1571
            parnme = tmp_df.index.map(
8✔
1572
                lambda x: "{0}_{1:04d}_tmp".format(par_col, int(x))
1573
                if x in tmp_par_cols[par_col]
1574
                else 1.0
1575
            )
1576
            sel = parnme != 1.0
8✔
1577
            tmp_df.loc[sel, par_col] = parnme[sel].map(lambda x: "~   {0}  ~".format(x))
8✔
1578
            tmp_tpl_str.extend(list(tmp_df.loc[sel, par_col].values))
8✔
1579
            tmp_pnames.extend(list(parnme[sel].values))
8✔
1580
    pnames = [t.replace("~", "").strip() for t in tpl_str]
9✔
1581
    df = pd.DataFrame(
9✔
1582
        {"parnme": pnames, "org_value": pvals, "tpl_str": tpl_str}, index=pnames
1583
    )
1584
    df.drop_duplicates(inplace=True)
9✔
1585
    if df.empty:
9✔
1586
        warnings.warn(
×
1587
            "No spatial sfr segment parameters have been set up, "
1588
            "either none of {0} were found or all were zero.".format(
1589
                ",".join(par_cols)
1590
            ),
1591
            PyemuWarning,
1592
        )
1593
        # return df
1594
    # set not par cols to 1.0
1595
    seg_data.loc[:, notpar_cols] = "1.0"
9✔
1596

1597
    # write the template file
1598
    _write_df_tpl(os.path.join(model_ws, "sfr_seg_pars.dat.tpl"), seg_data, sep=",")
9✔
1599

1600
    # make sure the tpl file exists and has the same num of pars
1601
    parnme = parse_tpl_file(os.path.join(model_ws, "sfr_seg_pars.dat.tpl"))
9✔
1602
    assert len(parnme) == df.shape[0]
9✔
1603

1604
    # set some useful par info
1605
    df["pargp"] = df.parnme.apply(lambda x: x.split("_")[0])
9✔
1606

1607
    if include_temporal_pars:
9✔
1608
        _write_df_tpl(
8✔
1609
            filename=os.path.join(model_ws, "sfr_seg_temporal_pars.dat.tpl"), df=tmp_df
1610
        )
1611
        pargp = [pname.split("_")[0] + "_tmp" for pname in tmp_pnames]
8✔
1612
        tmp_df = pd.DataFrame(
8✔
1613
            data={"parnme": tmp_pnames, "pargp": pargp}, index=tmp_pnames
1614
        )
1615
        if not tmp_df.empty:
8✔
1616
            tmp_df.loc[:, "org_value"] = 1.0
8✔
1617
            tmp_df.loc[:, "tpl_str"] = tmp_tpl_str
8✔
1618
            df = pd.concat([df, tmp_df[df.columns]])
8✔
1619
    if df.empty:
9✔
1620
        warnings.warn(
×
1621
            "No sfr segment parameters have been set up, "
1622
            "either none of {0} were found or all were zero.".format(
1623
                ",".join(set(par_cols + list(tmp_par_cols.keys())))
1624
            ),
1625
            PyemuWarning,
1626
        )
1627
        return df
×
1628

1629
    # write the config file used by apply_sfr_pars()
1630
    with open(os.path.join(model_ws, "sfr_seg_pars.config"), "w") as f:
9✔
1631
        f.write("nam_file {0}\n".format(nam_file))
9✔
1632
        f.write("model_ws {0}\n".format(model_ws))
9✔
1633
        f.write("mult_file sfr_seg_pars.dat\n")
9✔
1634
        f.write("sfr_filename {0}\n".format(m.sfr.file_name[0]))
9✔
1635
        if include_temporal_pars:
9✔
1636
            f.write("time_mult_file sfr_seg_temporal_pars.dat\n")
8✔
1637

1638
    # set some useful par info
1639
    df.loc[:, "parubnd"] = 1.25
9✔
1640
    df.loc[:, "parlbnd"] = 0.75
9✔
1641
    hpars = df.loc[df.pargp.apply(lambda x: x.startswith("hcond")), "parnme"]
9✔
1642
    df.loc[hpars, "parubnd"] = 100.0
9✔
1643
    df.loc[hpars, "parlbnd"] = 0.01
9✔
1644

1645
    return df
9✔
1646

1647

1648
def setup_sfr_reach_parameters(nam_file, model_ws=".", par_cols=["strhc1"]):
9✔
1649
    """Setup multiplier paramters for reach data, when reachinput option is specififed in sfr.
1650

1651

1652
    Args:
1653
        nam_file (`str`): MODFLOw name file.  DIS, BAS, and SFR must be
1654
            available as pathed in the nam_file.  Optionally, `nam_file` can be
1655
            an existing `flopy.modflow.Modflow`.
1656
        model_ws (`str`): model workspace for flopy to load the MODFLOW model from
1657
        par_cols ([`str`]): a list of segment data entires to parameterize
1658
        tie_hcond (`bool`):  flag to use same mult par for hcond1 and hcond2 for a
1659
            given segment.  Default is `True`.
1660
        include_temporal_pars ([`str`]):  list of spatially-global multipliers to set up for
1661
            each stress period.  Default is None
1662

1663
    Returns:
1664
        **pandas.DataFrame**: a dataframe with useful parameter setup information
1665

1666
    Note:
1667
        Similar to `gw_utils.setup_sfr_seg_parameters()`, method will apply params to sfr reachdata
1668

1669
        Can load the dis, bas, and sfr files with flopy using model_ws. Or can pass a model object
1670
        (SFR loading can be slow)
1671

1672
        This is the companion function of `gw_utils.apply_sfr_reach_parameters()`
1673
        Skips values = 0.0 since multipliers don't work for these
1674

1675
    """
1676
    try:
8✔
1677
        import flopy
8✔
1678
    except Exception as e:
×
1679
        return
×
1680
    if par_cols is None:
8✔
1681
        par_cols = ["strhc1"]
×
1682
    if isinstance(nam_file, flopy.modflow.mf.Modflow) and nam_file.sfr is not None:
8✔
1683
        # flopy MODFLOW model has been passed and has SFR loaded
1684
        m = nam_file
8✔
1685
        nam_file = m.namefile
8✔
1686
        model_ws = m.model_ws
8✔
1687
    else:
1688
        # if model has not been passed or SFR not loaded # load MODFLOW model
1689
        m = flopy.modflow.Modflow.load(
×
1690
            nam_file, load_only=["sfr"], model_ws=model_ws, check=False, forgive=False
1691
        )
1692
    # get reachdata as dataframe
1693
    reach_data = pd.DataFrame.from_records(m.sfr.reach_data)
8✔
1694
    # write inital reach_data as csv
1695
    reach_data_orig = reach_data.copy()
8✔
1696
    reach_data.to_csv(os.path.join(m.model_ws, "sfr_reach_pars.dat"), sep=",")
8✔
1697

1698
    # generate template file with pars in par_cols
1699
    # process par cols
1700
    tpl_str, pvals = [], []
8✔
1701
    # par_cols=["strhc1"]
1702
    idx_cols = ["node", "k", "i", "j", "iseg", "ireach", "reachID", "outreach"]
8✔
1703
    # the data cols not to parameterize
1704
    notpar_cols = [c for c in reach_data.columns if c not in par_cols + idx_cols]
8✔
1705
    # make sure all par cols are found and search of any data in kpers
1706
    missing = []
8✔
1707
    cols = par_cols.copy()
8✔
1708
    for par_col in par_cols:
8✔
1709
        if par_col not in reach_data.columns:
8✔
1710
            missing.append(par_col)
×
1711
            cols.remove(par_col)
×
1712
    if len(missing) > 0:
8✔
1713
        warnings.warn(
×
1714
            "the following par_cols were not found in reach data: {0}".format(
1715
                ",".join(missing)
1716
            ),
1717
            PyemuWarning,
1718
        )
1719
        if len(missing) >= len(par_cols):
×
1720
            warnings.warn(
×
1721
                "None of the passed par_cols ({0}) were found in reach data.".format(
1722
                    ",".join(par_cols)
1723
                ),
1724
                PyemuWarning,
1725
            )
1726
    for par_col in cols:
8✔
1727
        if par_col == "strhc1":
8✔
1728
            prefix = "strk"  # shorten par
8✔
1729
        else:
1730
            prefix = par_col
×
1731
        reach_data.loc[:, par_col] = reach_data.apply(
8✔
1732
            lambda x: "~    {0}_{1:04d}   ~".format(prefix, int(x.reachID))
1733
            if float(x[par_col]) != 0.0
1734
            else "1.0",
1735
            axis=1,
1736
        )
1737
        org_vals = reach_data_orig.loc[reach_data_orig.loc[:, par_col] != 0.0, par_col]
8✔
1738
        pnames = reach_data.loc[org_vals.index, par_col]
8✔
1739
        pvals.extend(list(org_vals.values))
8✔
1740
        tpl_str.extend(list(pnames.values))
8✔
1741
    pnames = [t.replace("~", "").strip() for t in tpl_str]
8✔
1742
    df = pd.DataFrame(
8✔
1743
        {"parnme": pnames, "org_value": pvals, "tpl_str": tpl_str}, index=pnames
1744
    )
1745
    df.drop_duplicates(inplace=True)
8✔
1746
    if df.empty:
8✔
1747
        warnings.warn(
×
1748
            "No sfr reach parameters have been set up, either none of {0} were found or all were zero.".format(
1749
                ",".join(par_cols)
1750
            ),
1751
            PyemuWarning,
1752
        )
1753
    else:
1754
        # set not par cols to 1.0
1755
        reach_data.loc[:, notpar_cols] = "1.0"
8✔
1756

1757
        # write the template file
1758
        _write_df_tpl(
8✔
1759
            os.path.join(model_ws, "sfr_reach_pars.dat.tpl"), reach_data, sep=","
1760
        )
1761

1762
        # write the config file used by apply_sfr_pars()
1763
        with open(os.path.join(model_ws, "sfr_reach_pars.config"), "w") as f:
8✔
1764
            f.write("nam_file {0}\n".format(nam_file))
8✔
1765
            f.write("model_ws {0}\n".format(model_ws))
8✔
1766
            f.write("mult_file sfr_reach_pars.dat\n")
8✔
1767
            f.write("sfr_filename {0}".format(m.sfr.file_name[0]))
8✔
1768

1769
        # make sure the tpl file exists and has the same num of pars
1770
        parnme = parse_tpl_file(os.path.join(model_ws, "sfr_reach_pars.dat.tpl"))
8✔
1771
        assert len(parnme) == df.shape[0]
8✔
1772

1773
        # set some useful par info
1774
        df.loc[:, "pargp"] = df.parnme.apply(lambda x: x.split("_")[0])
8✔
1775
        df.loc[:, "parubnd"] = 1.25
8✔
1776
        df.loc[:, "parlbnd"] = 0.75
8✔
1777
        hpars = df.loc[df.pargp.apply(lambda x: x.startswith("strk")), "parnme"]
8✔
1778
        df.loc[hpars, "parubnd"] = 100.0
8✔
1779
        df.loc[hpars, "parlbnd"] = 0.01
8✔
1780
    return df
8✔
1781

1782

1783
def apply_sfr_seg_parameters(seg_pars=True, reach_pars=False):
9✔
1784
    """apply the SFR segement multiplier parameters.
1785

1786
    Args:
1787
        seg_pars (`bool`, optional): flag to apply segment-based parameters.
1788
            Default is True
1789
        reach_pars (`bool`, optional): flag to apply reach-based parameters.
1790
            Default is False
1791

1792
    Returns:
1793
        **flopy.modflow.ModflowSfr**: the modified SFR package instance
1794

1795
    Note:
1796
        Expects "sfr_seg_pars.config" to exist
1797

1798
        Expects `nam_file` +"_backup_.sfr" to exist
1799

1800

1801

1802
    """
1803

1804
    if not seg_pars and not reach_pars:
9✔
1805
        raise Exception(
×
1806
            "gw_utils.apply_sfr_pars() error: both seg_pars and reach_pars are False"
1807
        )
1808
    # if seg_pars and reach_pars:
1809
    #    raise Exception("gw_utils.apply_sfr_pars() error: both seg_pars and reach_pars are True")
1810

1811
    import flopy
9✔
1812

1813
    bak_sfr_file, pars = None, None
9✔
1814

1815
    if seg_pars:
9✔
1816
        assert os.path.exists("sfr_seg_pars.config")
9✔
1817

1818
        with open("sfr_seg_pars.config", "r") as f:
9✔
1819
            pars = {}
9✔
1820
            for line in f:
9✔
1821
                line = line.strip().split()
9✔
1822
                pars[line[0]] = line[1]
9✔
1823
        bak_sfr_file = pars["nam_file"] + "_backup_.sfr"
9✔
1824
        # m = flopy.modflow.Modflow.load(pars["nam_file"],model_ws=pars["model_ws"],load_only=["sfr"],check=False)
1825
        m = flopy.modflow.Modflow.load(pars["nam_file"], load_only=[], check=False)
9✔
1826
        sfr = flopy.modflow.ModflowSfr2.load(os.path.join(bak_sfr_file), m)
9✔
1827
        sfrfile = pars["sfr_filename"]
9✔
1828
        mlt_df = pd.read_csv(pars["mult_file"], delim_whitespace=False, index_col=0)
9✔
1829
        # time_mlt_df = None
1830
        # if "time_mult_file" in pars:
1831
        #     time_mult_file = pars["time_mult_file"]
1832
        #     time_mlt_df = pd.read_csv(pars["time_mult_file"], delim_whitespace=False,index_col=0)
1833

1834
        idx_cols = ["nseg", "icalc", "outseg", "iupseg", "iprior", "nstrpts"]
9✔
1835
        present_cols = [c for c in idx_cols if c in mlt_df.columns]
9✔
1836
        mlt_cols = mlt_df.columns.drop(present_cols)
9✔
1837
        for key, val in m.sfr.segment_data.items():
9✔
1838
            df = pd.DataFrame.from_records(val)
9✔
1839
            df.loc[:, mlt_cols] *= mlt_df.loc[:, mlt_cols]
9✔
1840
            val = df.to_records(index=False)
9✔
1841
            sfr.segment_data[key] = val
9✔
1842
    if reach_pars:
9✔
1843
        assert os.path.exists("sfr_reach_pars.config")
8✔
1844
        with open("sfr_reach_pars.config", "r") as f:
8✔
1845
            r_pars = {}
8✔
1846
            for line in f:
8✔
1847
                line = line.strip().split()
8✔
1848
                r_pars[line[0]] = line[1]
8✔
1849
        if bak_sfr_file is None:  # will be the case is seg_pars is false
8✔
1850
            bak_sfr_file = r_pars["nam_file"] + "_backup_.sfr"
×
1851
            # m = flopy.modflow.Modflow.load(pars["nam_file"],model_ws=pars["model_ws"],load_only=["sfr"],check=False)
1852
            m = flopy.modflow.Modflow.load(
×
1853
                r_pars["nam_file"], load_only=[], check=False
1854
            )
1855
            sfr = flopy.modflow.ModflowSfr2.load(os.path.join(bak_sfr_file), m)
×
1856
            sfrfile = r_pars["sfr_filename"]
×
1857
        r_mlt_df = pd.read_csv(r_pars["mult_file"], sep=",", index_col=0)
8✔
1858
        r_idx_cols = ["node", "k", "i", "j", "iseg", "ireach", "reachID", "outreach"]
8✔
1859
        r_mlt_cols = r_mlt_df.columns.drop(r_idx_cols)
8✔
1860
        r_df = pd.DataFrame.from_records(m.sfr.reach_data)
8✔
1861
        r_df.loc[:, r_mlt_cols] *= r_mlt_df.loc[:, r_mlt_cols]
8✔
1862
        sfr.reach_data = r_df.to_records(index=False)
8✔
1863

1864
    # m.remove_package("sfr")
1865
    if pars is not None and "time_mult_file" in pars:
9✔
1866
        time_mult_file = pars["time_mult_file"]
8✔
1867
        time_mlt_df = pd.read_csv(time_mult_file, delim_whitespace=False, index_col=0)
8✔
1868
        for kper, sdata in m.sfr.segment_data.items():
8✔
1869
            assert kper in time_mlt_df.index, (
8✔
1870
                "gw_utils.apply_sfr_seg_parameters() error: kper "
1871
                + "{0} not in time_mlt_df index".format(kper)
1872
            )
1873
            for col in time_mlt_df.columns:
8✔
1874
                sdata[col] *= time_mlt_df.loc[kper, col]
8✔
1875

1876
    sfr.write_file(filename=sfrfile)
9✔
1877
    return sfr
9✔
1878

1879

1880
def apply_sfr_parameters(seg_pars=True, reach_pars=False):
9✔
1881
    """thin wrapper around `gw_utils.apply_sfr_seg_parameters()`
1882

1883
    Args:
1884
        seg_pars (`bool`, optional): flag to apply segment-based parameters.
1885
            Default is True
1886
        reach_pars (`bool`, optional): flag to apply reach-based parameters.
1887
            Default is False
1888

1889
    Returns:
1890
        **flopy.modflow.ModflowSfr**: the modified SFR package instance
1891

1892
    Note:
1893
        Expects "sfr_seg_pars.config" to exist
1894

1895
        Expects `nam_file` +"_backup_.sfr" to exist
1896

1897

1898
    """
1899
    sfr = apply_sfr_seg_parameters(seg_pars=seg_pars, reach_pars=reach_pars)
1✔
1900
    return sfr
1✔
1901

1902

1903
def setup_sfr_obs(
9✔
1904
    sfr_out_file, seg_group_dict=None, ins_file=None, model=None, include_path=False
1905
):
1906
    """setup observations using the sfr ASCII output file.  Setups
1907
    the ability to aggregate flows for groups of segments.  Applies
1908
    only flow to aquier and flow out.
1909

1910
    Args:
1911
        sft_out_file (`str`): the name and path to an existing SFR output file
1912
        seg_group_dict (`dict`): a dictionary of SFR segements to aggregate together for a single obs.
1913
            the key value in the dict is the base observation name. If None, all segments
1914
            are used as individual observations. Default is None
1915
        model (`flopy.mbase`): a flopy model.  If passed, the observation names will have
1916
            the datetime of the observation appended to them.  If None, the observation names
1917
            will have the stress period appended to them. Default is None.
1918
        include_path (`bool`): flag to prepend sfr_out_file path to sfr_obs.config.  Useful for setting up
1919
            process in separate directory for where python is running.
1920

1921

1922
    Returns:
1923
        **pandas.DataFrame**: dataframe of observation name, simulated value and group.
1924

1925
    Note:
1926
        This is the companion function of `gw_utils.apply_sfr_obs()`.
1927

1928
        This function writes "sfr_obs.config" which must be kept in the dir where
1929
        "gw_utils.apply_sfr_obs()" is being called during the forward run
1930

1931
    """
1932

1933
    sfr_dict = load_sfr_out(sfr_out_file)
9✔
1934
    kpers = list(sfr_dict.keys())
9✔
1935
    kpers.sort()
9✔
1936

1937
    if seg_group_dict is None:
9✔
1938
        seg_group_dict = {"seg{0:04d}".format(s): s for s in sfr_dict[kpers[0]].segment}
9✔
1939
    else:
1940
        warnings.warn(
8✔
1941
            "Flow out (flout) of grouped segments will be aggregated... ", PyemuWarning
1942
        )
1943
    sfr_segs = set(sfr_dict[list(sfr_dict.keys())[0]].segment)
9✔
1944
    keys = ["sfr_out_file"]
9✔
1945
    if include_path:
9✔
1946
        values = [os.path.split(sfr_out_file)[-1]]
1✔
1947
    else:
1948
        values = [sfr_out_file]
8✔
1949
    for oname, segs in seg_group_dict.items():
9✔
1950
        if np.isscalar(segs):
9✔
1951
            segs_set = {segs}
9✔
1952
            segs = [segs]
9✔
1953
        else:
1954
            segs_set = set(segs)
8✔
1955
        diff = segs_set.difference(sfr_segs)
9✔
1956
        if len(diff) > 0:
9✔
1957
            raise Exception(
×
1958
                "the following segs listed with oname {0} where not found: {1}".format(
1959
                    oname, ",".join([str(s) for s in diff])
1960
                )
1961
            )
1962
        for seg in segs:
9✔
1963
            keys.append(oname)
9✔
1964
            values.append(seg)
9✔
1965

1966
    df_key = pd.DataFrame({"obs_base": keys, "segment": values})
9✔
1967
    if include_path:
9✔
1968
        pth = os.path.join(*[p for p in os.path.split(sfr_out_file)[:-1]])
1✔
1969
        config_file = os.path.join(pth, "sfr_obs.config")
1✔
1970
    else:
1971
        config_file = "sfr_obs.config"
8✔
1972
    print("writing 'sfr_obs.config' to {0}".format(config_file))
9✔
1973
    df_key.to_csv(config_file)
9✔
1974

1975
    bd = "."
9✔
1976
    if include_path:
9✔
1977
        bd = os.getcwd()
1✔
1978
        os.chdir(pth)
1✔
1979
    try:
9✔
1980
        df = apply_sfr_obs()
9✔
1981
    except Exception as e:
×
1982
        os.chdir(bd)
×
1983
        raise Exception("error in apply_sfr_obs(): {0}".format(str(e)))
×
1984
    os.chdir(bd)
9✔
1985
    if model is not None:
9✔
1986
        dts = (
9✔
1987
            pd.to_datetime(model.start_datetime)
1988
            + pd.to_timedelta(np.cumsum(model.dis.perlen.array), unit="d")
1989
        ).date
1990
        df.loc[:, "datetime"] = df.kper.apply(lambda x: dts[x])
9✔
1991
        df.loc[:, "time_str"] = df.datetime.apply(lambda x: x.strftime("%Y%m%d"))
9✔
1992
    else:
1993
        df.loc[:, "time_str"] = df.kper.apply(lambda x: "{0:04d}".format(x))
8✔
1994
    df.loc[:, "flaqx_obsnme"] = df.apply(
9✔
1995
        lambda x: "{0}_{1}_{2}".format("fa", x.obs_base, x.time_str), axis=1
1996
    )
1997
    df.loc[:, "flout_obsnme"] = df.apply(
9✔
1998
        lambda x: "{0}_{1}_{2}".format("fo", x.obs_base, x.time_str), axis=1
1999
    )
2000

2001
    if ins_file is None:
9✔
2002
        ins_file = sfr_out_file + ".processed.ins"
9✔
2003

2004
    with open(ins_file, "w") as f:
9✔
2005
        f.write("pif ~\nl1\n")
9✔
2006
        for fla, flo in zip(df.flaqx_obsnme, df.flout_obsnme):
9✔
2007
            f.write("l1 w w !{0}! !{1}!\n".format(fla, flo))
9✔
2008

2009
    df = None
9✔
2010
    pth = os.path.split(ins_file)[:-1]
9✔
2011
    pth = os.path.join(*pth)
9✔
2012
    if pth == "":
9✔
2013
        pth = "."
8✔
2014
    bd = os.getcwd()
9✔
2015
    os.chdir(pth)
9✔
2016
    df = try_process_output_file(
9✔
2017
        os.path.split(ins_file)[-1], os.path.split(sfr_out_file + ".processed")[-1]
2018
    )
2019
    os.chdir(bd)
9✔
2020
    if df is not None:
9✔
2021
        df.loc[:, "obsnme"] = df.index.values
9✔
2022
        df.loc[:, "obgnme"] = df.obsnme.apply(
9✔
2023
            lambda x: "flaqx" if x.startswith("fa") else "flout"
2024
        )
2025
        return df
9✔
2026

2027

2028
def apply_sfr_obs():
9✔
2029
    """apply the sfr observation process
2030

2031
    Args:
2032
        None
2033

2034
    Returns:
2035
        **pandas.DataFrame**: a dataframe of aggregrated sfr segment aquifer and outflow
2036

2037
    Note:
2038
        This is the companion function of `gw_utils.setup_sfr_obs()`.
2039

2040
        Requires `sfr_obs.config`.
2041

2042
        Writes `sfr_out_file`+".processed", where `sfr_out_file` is defined in "sfr_obs.config"
2043
    """
2044
    assert os.path.exists("sfr_obs.config")
9✔
2045
    df_key = pd.read_csv("sfr_obs.config", index_col=0)
9✔
2046

2047
    assert df_key.iloc[0, 0] == "sfr_out_file", df_key.iloc[0, :]
9✔
2048
    sfr_out_file = df_key.iloc[0, 1]
9✔
2049
    df_key = df_key.iloc[1:, :]
9✔
2050
    df_key.loc[:, "segment"] = df_key.segment.apply(np.int64)
9✔
2051
    df_key.index = df_key.segment
9✔
2052
    seg_group_dict = df_key.groupby(df_key.obs_base).groups
9✔
2053

2054
    sfr_kper = load_sfr_out(sfr_out_file)
9✔
2055
    kpers = list(sfr_kper.keys())
9✔
2056
    kpers.sort()
9✔
2057
    # results = {o:[] for o in seg_group_dict.keys()}
2058
    results = []
9✔
2059
    for kper in kpers:
9✔
2060
        df = sfr_kper[kper]
9✔
2061
        for obs_base, segs in seg_group_dict.items():
9✔
2062
            agg = df.loc[
9✔
2063
                segs.values, :
2064
            ].sum()  # still agg flout where seg groups are passed!
2065
            # print(obs_base,agg)
2066
            results.append([kper, obs_base, agg["flaqx"], agg["flout"]])
9✔
2067
    df = pd.DataFrame(data=results, columns=["kper", "obs_base", "flaqx", "flout"])
9✔
2068
    df.sort_values(by=["kper", "obs_base"], inplace=True)
9✔
2069
    df.to_csv(sfr_out_file + ".processed", sep=" ", index=False)
9✔
2070
    return df
9✔
2071

2072

2073
def load_sfr_out(sfr_out_file, selection=None):
9✔
2074
    """load an ASCII SFR output file into a dictionary of kper: dataframes.
2075

2076
    Args:
2077
        sfr_out_file (`str`): SFR ASCII output file
2078
        selection (`pandas.DataFrame`): a dataframe of `reach` and `segment` pairs to
2079
            load.  If `None`, all reach-segment pairs are loaded.  Default is `None`.
2080

2081
    Returns:
2082
        **dict**: dictionary of {kper:`pandas.DataFrame`} of SFR output.
2083

2084
    Note:
2085
        Aggregates flow to aquifer for segments and returns and flow out at
2086
        downstream end of segment.
2087

2088
    """
2089
    assert os.path.exists(sfr_out_file), "couldn't find sfr out file {0}".format(
9✔
2090
        sfr_out_file
2091
    )
2092
    tag = " stream listing"
9✔
2093
    lcount = 0
9✔
2094
    sfr_dict = {}
9✔
2095
    if selection is None:
9✔
2096
        pass
9✔
2097
    elif isinstance(selection, str):
8✔
2098
        assert (
8✔
2099
            selection == "all"
2100
        ), "If string passed as selection only 'all' allowed: " "{}".format(selection)
2101
    else:
2102
        assert isinstance(
8✔
2103
            selection, pd.DataFrame
2104
        ), "'selection needs to be pandas Dataframe. " "Type {} passed.".format(
2105
            type(selection)
2106
        )
2107
        assert np.all(
8✔
2108
            [sr in selection.columns for sr in ["segment", "reach"]]
2109
        ), "Either 'segment' or 'reach' not in selection columns"
2110
    with open(sfr_out_file) as f:
9✔
2111
        while True:
4✔
2112
            line = f.readline().lower()
9✔
2113
            lcount += 1
9✔
2114
            if line == "":
9✔
2115
                break
9✔
2116
            if line.startswith(tag):
9✔
2117
                raw = line.strip().split()
9✔
2118
                kper = int(raw[3]) - 1
9✔
2119
                kstp = int(raw[5]) - 1
9✔
2120
                [f.readline() for _ in range(4)]  # skip to where the data starts
9✔
2121
                lcount += 4
9✔
2122
                dlines = []
9✔
2123
                while True:
4✔
2124
                    dline = f.readline()
9✔
2125
                    lcount += 1
9✔
2126
                    if dline.strip() == "":
9✔
2127
                        break
9✔
2128
                    draw = dline.strip().split()
9✔
2129
                    dlines.append(draw)
9✔
2130
                df = pd.DataFrame(data=np.array(dlines)).iloc[:, [3, 4, 6, 7]]
9✔
2131
                df.columns = ["segment", "reach", "flaqx", "flout"]
9✔
2132
                df["segment"] = df.segment.astype(np.int64)
9✔
2133
                df["reach"] = df.reach.astype(np.int64)
9✔
2134
                df["flaqx"] = df.flaqx.astype(np.float64)
9✔
2135
                df["flout"] = df.flout.astype(np.float64)
9✔
2136
                df.index = [
9✔
2137
                    "{0:03d}_{1:03d}".format(s, r)
2138
                    for s, r in np.array([df.segment.values, df.reach.values]).T
2139
                ]
2140
                # df.index = df.apply(
2141
                # lambda x: "{0:03d}_{1:03d}".format(
2142
                # int(x.segment), int(x.reach)), axis=1)
2143
                if selection is None:  # setup for all segs, aggregate
9✔
2144
                    gp = df.groupby(df.segment)
9✔
2145
                    bot_reaches = (
9✔
2146
                        gp[["reach"]]
2147
                        .max()
2148
                        .apply(
2149
                            lambda x: "{0:03d}_{1:03d}".format(
2150
                                int(x.name), int(x.reach)
2151
                            ),
2152
                            axis=1,
2153
                        )
2154
                    )
2155
                    # only sum distributed output # take flow out of seg
2156
                    df2 = pd.DataFrame(
9✔
2157
                        {
2158
                            "flaqx": gp.flaqx.sum(),
2159
                            "flout": df.loc[bot_reaches, "flout"].values,
2160
                        },
2161
                        index=gp.groups.keys(),
2162
                    )
2163
                    # df = df.groupby(df.segment).sum()
2164
                    df2["segment"] = df2.index
9✔
2165
                elif isinstance(selection, str) and selection == "all":
8✔
2166
                    df2 = df
8✔
2167
                else:
2168
                    seg_reach_id = selection.apply(
8✔
2169
                        lambda x: "{0:03d}_{1:03d}".format(
2170
                            int(x.segment), int(x.reach)
2171
                        ),
2172
                        axis=1,
2173
                    ).values
2174
                    for sr in seg_reach_id:
8✔
2175
                        if sr not in df.index:
8✔
2176
                            s, r = [x.lstrip("0") for x in sr.split("_")]
8✔
2177
                            warnings.warn(
8✔
2178
                                "Requested segment reach pair ({0},{1}) "
2179
                                "is not in sfr output. Dropping...".format(
2180
                                    int(r), int(s)
2181
                                ),
2182
                                PyemuWarning,
2183
                            )
2184
                            seg_reach_id = np.delete(
8✔
2185
                                seg_reach_id, np.where(seg_reach_id == sr), axis=0
2186
                            )
2187
                    df2 = df.loc[seg_reach_id].copy()
8✔
2188
                if kper in sfr_dict.keys():
9✔
2189
                    print(
×
2190
                        "multiple entries found for kper {0}, "
2191
                        "replacing...".format(kper)
2192
                    )
2193
                sfr_dict[kper] = df2
9✔
2194
    return sfr_dict
9✔
2195

2196

2197
def setup_sfr_reach_obs(
9✔
2198
    sfr_out_file, seg_reach=None, ins_file=None, model=None, include_path=False
2199
):
2200
    """setup observations using the sfr ASCII output file.  Setups
2201
    sfr point observations using segment and reach numbers.
2202

2203
    Args:
2204
        sft_out_file (`str`): the path and name of an existing SFR output file
2205
        seg_reach (varies): a dict, or list of SFR [segment,reach] pairs identifying
2206
            locations of interest.  If `dict`, the key value in the dict is the base
2207
            observation name. If None, all reaches are used as individual observations.
2208
            Default is None - THIS MAY SET UP A LOT OF OBS!
2209
        model (`flopy.mbase`): a flopy model.  If passed, the observation names will
2210
            have the datetime of the observation appended to them.  If None, the
2211
            observation names will have the stress period appended to them. Default is None.
2212
        include_path (`bool`): a flag to prepend sfr_out_file path to sfr_obs.config.  Useful
2213
            for setting up process in separate directory for where python is running.
2214

2215

2216
    Returns:
2217
        `pd.DataFrame`: a dataframe of observation names, values, and groups
2218

2219
    Note:
2220
        This is the companion function of `gw_utils.apply_sfr_reach_obs()`.
2221

2222
        This function writes "sfr_reach_obs.config" which must be kept in the dir where
2223
        "apply_sfr_reach_obs()" is being called during the forward run
2224

2225
    """
2226
    if seg_reach is None:
8✔
2227
        warnings.warn("Obs will be set up for every reach", PyemuWarning)
8✔
2228
        seg_reach = "all"
8✔
2229
    elif isinstance(seg_reach, list) or isinstance(seg_reach, np.ndarray):
8✔
2230
        if np.ndim(seg_reach) == 1:
8✔
2231
            seg_reach = [seg_reach]
×
2232
        assert (
8✔
2233
            np.shape(seg_reach)[1] == 2
2234
        ), "varible seg_reach expected shape (n,2), received {0}".format(
2235
            np.shape(seg_reach)
2236
        )
2237
        seg_reach = pd.DataFrame(seg_reach, columns=["segment", "reach"])
8✔
2238
        seg_reach.index = seg_reach.apply(
8✔
2239
            lambda x: "s{0:03d}r{1:03d}".format(int(x.segment), int(x.reach)), axis=1
2240
        )
2241
    elif isinstance(seg_reach, dict):
8✔
2242
        seg_reach = pd.DataFrame.from_dict(
8✔
2243
            seg_reach, orient="index", columns=["segment", "reach"]
2244
        )
2245
    else:
2246
        assert isinstance(
8✔
2247
            seg_reach, pd.DataFrame
2248
        ), "'selection needs to be pandas Dataframe. Type {} passed.".format(
2249
            type(seg_reach)
2250
        )
2251
        assert np.all(
8✔
2252
            [sr in seg_reach.columns for sr in ["segment", "reach"]]
2253
        ), "Either 'segment' or 'reach' not in selection columns"
2254

2255
    sfr_dict = load_sfr_out(sfr_out_file, selection=seg_reach)
8✔
2256
    kpers = list(sfr_dict.keys())
8✔
2257
    kpers.sort()
8✔
2258

2259
    if isinstance(seg_reach, str) and seg_reach == "all":
8✔
2260
        seg_reach = sfr_dict[kpers[0]][["segment", "reach"]]
8✔
2261
        seg_reach.index = seg_reach.apply(
8✔
2262
            lambda x: "s{0:03d}r{1:03d}".format(int(x.segment), int(x.reach)), axis=1
2263
        )
2264
    keys = ["sfr_out_file"]
8✔
2265
    if include_path:
8✔
2266
        values = [os.path.split(sfr_out_file)[-1]]
×
2267
    else:
2268
        values = [sfr_out_file]
8✔
2269
    diff = seg_reach.loc[
8✔
2270
        seg_reach.apply(
2271
            lambda x: "{0:03d}_{1:03d}".format(int(x.segment), int(x.reach))
2272
            not in sfr_dict[list(sfr_dict.keys())[0]].index,
2273
            axis=1,
2274
        )
2275
    ]
2276

2277
    if len(diff) > 0:
8✔
2278
        for ob in diff.itertuples():
8✔
2279
            warnings.warn(
8✔
2280
                "segs,reach pair listed with onames {0} was not found: {1}".format(
2281
                    ob.Index, "({},{})".format(ob.segment, ob.reach)
2282
                ),
2283
                PyemuWarning,
2284
            )
2285
    seg_reach = seg_reach.drop(diff.index)
8✔
2286
    seg_reach["obs_base"] = seg_reach.index
8✔
2287
    df_key = pd.DataFrame({"obs_base": keys, "segment": 0, "reach": values})
8✔
2288
    df_key = pd.concat([df_key, seg_reach], sort=True).reset_index(drop=True)
8✔
2289
    if include_path:
8✔
2290
        pth = os.path.join(*[p for p in os.path.split(sfr_out_file)[:-1]])
×
2291
        config_file = os.path.join(pth, "sfr_reach_obs.config")
×
2292
    else:
2293
        config_file = "sfr_reach_obs.config"
8✔
2294
    print("writing 'sfr_reach_obs.config' to {0}".format(config_file))
8✔
2295
    df_key.to_csv(config_file)
8✔
2296

2297
    bd = "."
8✔
2298
    if include_path:
8✔
2299
        bd = os.getcwd()
×
2300
        os.chdir(pth)
×
2301
    try:
8✔
2302
        df = apply_sfr_reach_obs()
8✔
2303
    except Exception as e:
×
2304
        os.chdir(bd)
×
2305
        raise Exception("error in apply_sfr_reach_obs(): {0}".format(str(e)))
×
2306
    os.chdir(bd)
8✔
2307
    if model is not None:
8✔
2308
        dts = (
8✔
2309
            pd.to_datetime(model.start_datetime)
2310
            + pd.to_timedelta(np.cumsum(model.dis.perlen.array), unit="d")
2311
        ).date
2312
        df.loc[:, "datetime"] = df.kper.apply(lambda x: dts[x])
8✔
2313
        df.loc[:, "time_str"] = df.datetime.apply(lambda x: x.strftime("%Y%m%d"))
8✔
2314
    else:
2315
        df.loc[:, "time_str"] = df.kper.apply(lambda x: "{0:04d}".format(x))
8✔
2316
    df.loc[:, "flaqx_obsnme"] = df.apply(
8✔
2317
        lambda x: "{0}_{1}_{2}".format("fa", x.obs_base, x.time_str), axis=1
2318
    )
2319
    df.loc[:, "flout_obsnme"] = df.apply(
8✔
2320
        lambda x: "{0}_{1}_{2}".format("fo", x.obs_base, x.time_str), axis=1
2321
    )
2322

2323
    if ins_file is None:
8✔
2324
        ins_file = sfr_out_file + ".reach_processed.ins"
8✔
2325

2326
    with open(ins_file, "w") as f:
8✔
2327
        f.write("pif ~\nl1\n")
8✔
2328
        for fla, flo in zip(df.flaqx_obsnme, df.flout_obsnme):
8✔
2329
            f.write("l1 w w !{0}! !{1}!\n".format(fla, flo))
8✔
2330

2331
    df = None
8✔
2332
    pth = os.path.split(ins_file)[:-1]
8✔
2333
    pth = os.path.join(*pth)
8✔
2334
    if pth == "":
8✔
2335
        pth = "."
8✔
2336
    bd = os.getcwd()
8✔
2337
    os.chdir(pth)
8✔
2338
    try:
8✔
2339
        df = try_process_output_file(
8✔
2340
            os.path.split(ins_file)[-1], os.path.split(sfr_out_file + ".processed")[-1]
2341
        )
2342

2343
    except Exception as e:
×
2344
        pass
×
2345
    os.chdir(bd)
8✔
2346
    if df is not None:
8✔
2347
        df.loc[:, "obsnme"] = df.index.values
×
2348
        df.loc[:, "obgnme"] = df.obsnme.apply(
×
2349
            lambda x: "flaqx" if x.startswith("fa") else "flout"
2350
        )
2351
        return df
×
2352

2353

2354
def apply_sfr_reach_obs():
9✔
2355
    """apply the sfr reach observation process.
2356

2357
    Returns:
2358
        `pd.DataFrame`: a dataframe of sfr aquifer and outflow ad segment,reach locations
2359

2360
    Note:
2361
        This is the companion function of `gw_utils.setup_sfr_reach_obs()`.
2362

2363
        Requires sfr_reach_obs.config.
2364

2365
        Writes <sfr_out_file>.processed, where <sfr_out_file> is defined in
2366
        "sfr_reach_obs.config"
2367

2368
    """
2369
    assert os.path.exists("sfr_reach_obs.config")
8✔
2370
    df_key = pd.read_csv("sfr_reach_obs.config", index_col=0)
8✔
2371

2372
    assert df_key.iloc[0, 0] == "sfr_out_file", df_key.iloc[0, :]
8✔
2373
    sfr_out_file = df_key.iloc[0].reach
8✔
2374
    df_key = df_key.iloc[1:, :].copy()
8✔
2375
    df_key.loc[:, "segment"] = df_key.segment.apply(np.int64)
8✔
2376
    df_key.loc[:, "reach"] = df_key.reach.apply(np.int64)
8✔
2377
    df_key = df_key.set_index("obs_base")
8✔
2378

2379
    sfr_kper = load_sfr_out(sfr_out_file, df_key)
8✔
2380
    kpers = list(sfr_kper.keys())
8✔
2381
    kpers.sort()
8✔
2382

2383
    results = []
8✔
2384
    for kper in kpers:
8✔
2385
        df = sfr_kper[kper]
8✔
2386
        for sr in df_key.itertuples():
8✔
2387
            ob = df.loc["{0:03d}_{1:03d}".format(sr.segment, sr.reach), :]
8✔
2388
            results.append([kper, sr.Index, ob["flaqx"], ob["flout"]])
8✔
2389
    df = pd.DataFrame(data=results, columns=["kper", "obs_base", "flaqx", "flout"])
8✔
2390
    df.sort_values(by=["kper", "obs_base"], inplace=True)
8✔
2391
    df.to_csv(sfr_out_file + ".reach_processed", sep=" ", index=False)
8✔
2392
    return df
8✔
2393

2394

2395
def modflow_sfr_gag_to_instruction_file(
9✔
2396
    gage_output_file, ins_file=None, parse_filename=False
2397
):
2398
    """writes an instruction file for an SFR gage output file to read Flow only at all times
2399

2400
    Args:
2401
        gage_output_file (`str`): the gage output filename (ASCII).
2402

2403
        ins_file (`str`, optional): the name of the instruction file to
2404
            create.  If None, the name is `gage_output_file` +".ins".
2405
            Default is None
2406

2407
        parse_filename (`bool`): if True, get the gage_num parameter by
2408
            parsing the gage output file filename if False, get the gage
2409
            number from the file itself
2410

2411
    Returns:
2412
        tuple containing
2413

2414
        - **pandas.DataFrame**: a dataframe with obsnme and obsval for the sfr simulated flows.
2415
        - **str**: file name of instructions file relating to gage output.
2416
        - **str**: file name of processed gage output for all times
2417

2418
    Note:
2419
        Sets up observations for gage outputs only for the Flow column.
2420

2421
        If `parse_namefile` is true, only text up to first '.' is used as the gage_num
2422

2423

2424
    """
2425

2426
    if ins_file is None:
×
2427
        ins_file = gage_output_file + ".ins"
×
2428

2429
    # navigate the file to be sure the header makes sense
2430
    indat = [line.strip() for line in open(gage_output_file, "r").readlines()]
×
2431
    header = [i for i in indat if i.startswith('"')]
×
2432
    # yank out the gage number to identify the observation names
2433
    if parse_filename:
×
2434
        gage_num = os.path.basename(gage_output_file).split(".")[0]
×
2435
    else:
2436
        gage_num = re.sub(
×
2437
            "[^0-9]", "", indat[0].lower().split("gage no.")[-1].strip().split()[0]
2438
        )
2439

2440
    # get the column names
2441
    cols = (
×
2442
        [i.lower() for i in header if "data" in i.lower()][0]
2443
        .lower()
2444
        .replace('"', "")
2445
        .replace("data:", "")
2446
        .split()
2447
    )
2448

2449
    # make sure "Flow" is included in the columns
2450
    if "flow" not in cols:
×
2451
        raise Exception('Requested field "Flow" not in gage output columns')
×
2452

2453
    # find which column is for  "Flow"
2454
    flowidx = np.where(np.array(cols) == "flow")[0][0]
×
2455

2456
    # write out the instruction file lines
2457
    inslines = [
×
2458
        "l1 " + (flowidx + 1) * "w " + "!g{0}_{1:d}!".format(gage_num, j)
2459
        for j in range(len(indat) - len(header))
2460
    ]
2461
    inslines[0] = inslines[0].replace("l1", "l{0:d}".format(len(header) + 1))
×
2462

2463
    # write the instruction file
2464
    with open(ins_file, "w") as ofp:
×
2465
        ofp.write("pif ~\n")
×
2466
        [ofp.write("{0}\n".format(line)) for line in inslines]
×
2467

2468
    df = try_process_output_file(ins_file, gage_output_file)
×
2469
    return df, ins_file, gage_output_file
×
2470

2471

2472
def setup_gage_obs(gage_file, ins_file=None, start_datetime=None, times=None):
9✔
2473
    """setup a forward run post processor routine for the modflow gage file
2474

2475
    Args:
2476
        gage_file (`str`): the gage output file (ASCII)
2477
        ins_file (`str`, optional): the name of the instruction file to create.  If None, the name
2478
            is `gage_file`+".processed.ins".  Default is `None`
2479
        start_datetime (`str`): a `pandas.to_datetime()` compatible `str`.  If not `None`,
2480
            then the resulting observation names have the datetime suffix.  If `None`,
2481
            the suffix is the output totim.  Default is `None`.
2482
        times ([`float`]):  a container of times to make observations for.  If None,
2483
            all times are used. Default is None.
2484

2485
    Returns:
2486
        tuple containing
2487

2488
        - **pandas.DataFrame**: a dataframe with observation name and simulated values for the
2489
          values in the gage file.
2490
        - **str**: file name of instructions file that was created relating to gage output.
2491
        - **str**: file name of processed gage output (processed according to times passed above.)
2492

2493

2494
    Note:
2495
         Setups up observations for gage outputs (all columns).
2496

2497
         This is the companion function of `gw_utils.apply_gage_obs()`
2498
    """
2499

2500
    with open(gage_file, "r") as f:
8✔
2501
        line1 = f.readline()
8✔
2502
        gage_num = int(
8✔
2503
            re.sub("[^0-9]", "", line1.split("GAGE No.")[-1].strip().split()[0])
2504
        )
2505
        gage_type = line1.split("GAGE No.")[-1].strip().split()[1].lower()
8✔
2506
        obj_num = int(line1.replace('"', "").strip().split()[-1])
8✔
2507
        line2 = f.readline()
8✔
2508
        df = pd.read_csv(
8✔
2509
            f, delim_whitespace=True, names=line2.replace('"', "").split()[1:]
2510
        )
2511

2512
    df.columns = [
8✔
2513
        c.lower().replace("-", "_").replace(".", "_").strip("_") for c in df.columns
2514
    ]
2515
    # get unique observation ids
2516
    obs_ids = {
8✔
2517
        col: "" for col in df.columns[1:]
2518
    }  # empty dictionary for observation ids
2519
    for col in df.columns[1:]:  # exclude column 1 (TIME)
8✔
2520
        colspl = col.split("_")
8✔
2521
        if len(colspl) > 1:
8✔
2522
            # obs name built out of "g"(for gage) "s" or "l"(for gage type) 2 chars from column name - date added later
2523
            obs_ids[col] = "g{0}{1}{2}".format(
8✔
2524
                gage_type[0], colspl[0][0], colspl[-1][0]
2525
            )
2526
        else:
2527
            obs_ids[col] = "g{0}{1}".format(gage_type[0], col[0:2])
8✔
2528
    with open(
8✔
2529
        "_gage_obs_ids.csv", "w"
2530
    ) as f:  # write file relating obs names to meaningfull keys!
2531
        [f.write("{0},{1}\n".format(key, obs)) for key, obs in obs_ids.items()]
8✔
2532
    # find passed times in df
2533
    if times is None:
8✔
2534
        times = df.time.unique()
8✔
2535
    missing = []
8✔
2536
    utimes = df.time.unique()
8✔
2537
    for t in times:
8✔
2538
        if not np.isclose(t, utimes).any():
8✔
2539
            missing.append(str(t))
×
2540
    if len(missing) > 0:
8✔
2541
        print(df.time)
×
2542
        raise Exception("the following times are missing:{0}".format(",".join(missing)))
×
2543
    # write output times to config file
2544
    with open("gage_obs.config", "w") as f:
8✔
2545
        f.write(gage_file + "\n")
8✔
2546
        [f.write("{0:15.10E}\n".format(t)) for t in times]
8✔
2547
    # extract data for times: returns dataframe and saves a processed df - read by pest
2548
    df, obs_file = apply_gage_obs(return_obs_file=True)
8✔
2549
    utimes = df.time.unique()
8✔
2550
    for t in times:
8✔
2551
        assert np.isclose(
8✔
2552
            t, utimes
2553
        ).any(), "time {0} missing in processed dataframe".format(t)
2554
    idx = df.time.apply(
8✔
2555
        lambda x: np.isclose(x, times).any()
2556
    )  # boolean selector of desired times in df
2557
    if start_datetime is not None:
8✔
2558
        # convert times to usable observation times
2559
        start_datetime = pd.to_datetime(start_datetime)
8✔
2560
        df["time_str"] = pd.to_timedelta(df.time, unit="d") + start_datetime
8✔
2561
        df["time_str"] = df.time_str.dt.strftime("%Y%m%d")
8✔
2562
    else:
2563
        df["time_str"] = df.time.apply(lambda x: f"{x:08.2f}")
×
2564
    # set up instructions (line feed for lines without obs (not in time)
2565
    df["ins_str"] = "l1\n"
8✔
2566
    df_times = df.loc[idx, :]  # Slice by desired times
8✔
2567
    # TODO include GAGE No. in obs name (if permissible)
2568
    df.loc[df_times.index, "ins_str"] = df_times.apply(
8✔
2569
        lambda x: "l1 w {}\n".format(
2570
            " w ".join(
2571
                ["!{0}{1}!".format(obs, x.time_str) for key, obs in obs_ids.items()]
2572
            )
2573
        ),
2574
        axis=1,
2575
    )
2576
    df.index = np.arange(df.shape[0])
8✔
2577
    if ins_file is None:
8✔
2578
        ins_file = gage_file + ".processed.ins"
8✔
2579

2580
    with open(ins_file, "w") as f:
8✔
2581
        f.write("pif ~\nl1\n")
8✔
2582
        [f.write(i) for i in df.ins_str]
8✔
2583
    df = try_process_output_file(ins_file, gage_file + ".processed")
8✔
2584
    return df, ins_file, obs_file
8✔
2585

2586

2587
def apply_gage_obs(return_obs_file=False):
9✔
2588
    """apply the modflow gage obs post-processor
2589

2590
    Args:
2591
        return_obs_file (`bool`): flag to return the processed
2592
            observation file.  Default is `False`.
2593

2594
    Note:
2595
        This is the companion function of `gw_utils.setup_gage_obs()`
2596

2597

2598

2599
    """
2600
    times = []
8✔
2601
    with open("gage_obs.config") as f:
8✔
2602
        gage_file = f.readline().strip()
8✔
2603
        for line in f:
8✔
2604
            times.append(float(line.strip()))
8✔
2605
    obs_file = gage_file + ".processed"
8✔
2606
    with open(gage_file, "r") as f:
8✔
2607
        line1 = f.readline()
8✔
2608
        gage_num = int(
8✔
2609
            re.sub("[^0-9]", "", line1.split("GAGE No.")[-1].strip().split()[0])
2610
        )
2611
        gage_type = line1.split("GAGE No.")[-1].strip().split()[1].lower()
8✔
2612
        obj_num = int(line1.replace('"', "").strip().split()[-1])
8✔
2613
        line2 = f.readline()
8✔
2614
        df = pd.read_csv(
8✔
2615
            f, delim_whitespace=True, names=line2.replace('"', "").split()[1:]
2616
        )
2617
    df.columns = [c.lower().replace("-", "_").replace(".", "_") for c in df.columns]
8✔
2618
    df = df.loc[df.time.apply(lambda x: np.isclose(x, times).any()), :]
8✔
2619
    df.to_csv(obs_file, sep=" ", index=False)
8✔
2620
    if return_obs_file:
8✔
2621
        return df, obs_file
8✔
2622
    else:
2623
        return df
8✔
2624

2625

2626
def apply_hfb_pars(par_file="hfb6_pars.csv"):
9✔
2627
    """a function to apply HFB multiplier parameters.
2628

2629
    Args:
2630
        par_file (`str`): the HFB parameter info file.
2631
            Default is `hfb_pars.csv`
2632

2633
    Note:
2634
        This is the companion function to
2635
        `gw_utils.write_hfb_zone_multipliers_template()`
2636

2637
        This is to account for the horrible HFB6 format that differs from other
2638
        BCs making this a special case
2639

2640
        Requires "hfb_pars.csv"
2641

2642
        Should be added to the forward_run.py script
2643
    """
2644
    hfb_pars = pd.read_csv(par_file)
8✔
2645

2646
    hfb_mults_contents = open(hfb_pars.mlt_file.values[0], "r").readlines()
8✔
2647
    skiprows = (
8✔
2648
        sum([1 if i.strip().startswith("#") else 0 for i in hfb_mults_contents]) + 1
2649
    )
2650
    header = hfb_mults_contents[:skiprows]
8✔
2651

2652
    # read in the multipliers
2653
    names = ["lay", "irow1", "icol1", "irow2", "icol2", "hydchr"]
8✔
2654
    hfb_mults = pd.read_csv(
8✔
2655
        hfb_pars.mlt_file.values[0],
2656
        skiprows=skiprows,
2657
        delim_whitespace=True,
2658
        names=names,
2659
    ).dropna()
2660

2661
    # read in the original file
2662
    hfb_org = pd.read_csv(
8✔
2663
        hfb_pars.org_file.values[0],
2664
        skiprows=skiprows,
2665
        delim_whitespace=True,
2666
        names=names,
2667
    ).dropna()
2668

2669
    # multiply it out
2670
    hfb_org.hydchr *= hfb_mults.hydchr
8✔
2671

2672
    for cn in names[:-1]:
8✔
2673
        hfb_mults[cn] = hfb_mults[cn].astype(np.int64)
8✔
2674
        hfb_org[cn] = hfb_org[cn].astype(np.int64)
8✔
2675
    # write the results
2676
    with open(hfb_pars.model_file.values[0], "w", newline="") as ofp:
8✔
2677
        [ofp.write("{0}\n".format(line.strip())) for line in header]
8✔
2678
        ofp.flush()
8✔
2679
        hfb_org[["lay", "irow1", "icol1", "irow2", "icol2", "hydchr"]].to_csv(
8✔
2680
            ofp, sep=" ", header=None, index=None
2681
        )
2682

2683

2684
def write_hfb_zone_multipliers_template(m):
9✔
2685
    """write a template file for an hfb using multipliers per zone (double yuck!)
2686

2687
    Args:
2688
        m (`flopy.modflow.Modflow`): a model instance with an HFB package
2689

2690
    Returns:
2691
        tuple containing
2692

2693
        - **dict**: a dictionary with original unique HFB conductivity values and their
2694
          corresponding parameter names
2695
        - **str**: the template filename that was created
2696

2697
    """
2698
    if m.hfb6 is None:
8✔
2699
        raise Exception("no HFB package found")
×
2700
    # find the model file
2701
    hfb_file = os.path.join(m.model_ws, m.hfb6.file_name[0])
8✔
2702

2703
    # this will use multipliers, so need to copy down the original
2704
    if not os.path.exists(os.path.join(m.model_ws, "hfb6_org")):
8✔
2705
        os.mkdir(os.path.join(m.model_ws, "hfb6_org"))
8✔
2706
    # copy down the original file
2707
    shutil.copy2(
8✔
2708
        os.path.join(m.model_ws, m.hfb6.file_name[0]),
2709
        os.path.join(m.model_ws, "hfb6_org", m.hfb6.file_name[0]),
2710
    )
2711

2712
    if not os.path.exists(os.path.join(m.model_ws, "hfb6_mlt")):
8✔
2713
        os.mkdir(os.path.join(m.model_ws, "hfb6_mlt"))
8✔
2714

2715
    # read in the model file
2716
    hfb_file_contents = open(hfb_file, "r").readlines()
8✔
2717

2718
    # navigate the header
2719
    skiprows = (
8✔
2720
        sum([1 if i.strip().startswith("#") else 0 for i in hfb_file_contents]) + 1
2721
    )
2722
    header = hfb_file_contents[:skiprows]
8✔
2723

2724
    # read in the data
2725
    names = ["lay", "irow1", "icol1", "irow2", "icol2", "hydchr"]
8✔
2726
    hfb_in = pd.read_csv(
8✔
2727
        hfb_file, skiprows=skiprows, delim_whitespace=True, names=names
2728
    ).dropna()
2729
    for cn in names[:-1]:
8✔
2730
        hfb_in[cn] = hfb_in[cn].astype(np.int64)
8✔
2731

2732
    # set up a multiplier for each unique conductivity value
2733
    unique_cond = hfb_in.hydchr.unique()
8✔
2734
    hfb_mults = dict(
8✔
2735
        zip(unique_cond, ["hbz_{0:04d}".format(i) for i in range(len(unique_cond))])
2736
    )
2737
    # set up the TPL line for each parameter and assign
2738
    hfb_in["tpl"] = "blank"
8✔
2739
    for cn, cg in hfb_in.groupby("hydchr"):
8✔
2740
        hfb_in.loc[hfb_in.hydchr == cn, "tpl"] = "~{0:^10s}~".format(hfb_mults[cn])
8✔
2741

2742
    assert "blank" not in hfb_in.tpl
8✔
2743

2744
    # write out the TPL file
2745
    tpl_file = os.path.join(m.model_ws, "hfb6.mlt.tpl")
8✔
2746
    with open(tpl_file, "w", newline="") as ofp:
8✔
2747
        ofp.write("ptf ~\n")
8✔
2748
        [ofp.write("{0}\n".format(line.strip())) for line in header]
8✔
2749
        ofp.flush()
8✔
2750
        hfb_in[["lay", "irow1", "icol1", "irow2", "icol2", "tpl"]].to_csv(
8✔
2751
            ofp, sep=" ", quotechar=" ", header=None, index=None, mode="a"
2752
        )
2753

2754
    # make a lookup for lining up the necessary files to
2755
    # perform multiplication with the helpers.apply_hfb_pars() function
2756
    # which must be added to the forward run script
2757
    with open(os.path.join(m.model_ws, "hfb6_pars.csv"), "w") as ofp:
8✔
2758
        ofp.write("org_file,mlt_file,model_file\n")
8✔
2759
        ofp.write(
8✔
2760
            "{0},{1},{2}\n".format(
2761
                os.path.join(m.model_ws, "hfb6_org", m.hfb6.file_name[0]),
2762
                os.path.join(
2763
                    m.model_ws,
2764
                    "hfb6_mlt",
2765
                    os.path.basename(tpl_file).replace(".tpl", ""),
2766
                ),
2767
                hfb_file,
2768
            )
2769
        )
2770

2771
    return hfb_mults, tpl_file
8✔
2772

2773

2774
def write_hfb_template(m):
9✔
2775
    """write a template file for an hfb (yuck!)
2776

2777
    Args:
2778
         m (`flopy.modflow.Modflow`): a model instance with an HFB package
2779

2780
     Returns:
2781
         tuple containing
2782

2783
         - **str**: name of the template file that was created
2784

2785
         - **pandas.DataFrame**: a dataframe with use control file info for the
2786
           HFB parameters
2787

2788
    """
2789

2790
    assert m.hfb6 is not None
8✔
2791
    hfb_file = os.path.join(m.model_ws, m.hfb6.file_name[0])
8✔
2792
    assert os.path.exists(hfb_file), "couldn't find hfb_file {0}".format(hfb_file)
8✔
2793
    f_in = open(hfb_file, "r")
8✔
2794
    tpl_file = hfb_file + ".tpl"
8✔
2795
    f_tpl = open(tpl_file, "w")
8✔
2796
    f_tpl.write("ptf ~\n")
8✔
2797
    parnme, parval1, xs, ys = [], [], [], []
8✔
2798
    iis, jjs, kks = [], [], []
8✔
2799
    try:
8✔
2800
        xc = m.sr.xcentergrid
8✔
2801
        yc = m.sr.ycentergrid
×
2802
    except AttributeError:
8✔
2803
        xc = m.modelgrid.xcellcenters
8✔
2804
        yc = m.modelgrid.ycellcenters
8✔
2805

2806
    while True:
4✔
2807
        line = f_in.readline()
8✔
2808
        if line == "":
8✔
2809
            break
×
2810
        f_tpl.write(line)
8✔
2811
        if not line.startswith("#"):
8✔
2812
            raw = line.strip().split()
8✔
2813
            nphfb = int(raw[0])
8✔
2814
            mxfb = int(raw[1])
8✔
2815
            nhfbnp = int(raw[2])
8✔
2816
            if nphfb > 0 or mxfb > 0:
8✔
2817
                raise Exception("not supporting terrible HFB pars")
×
2818
            for i in range(nhfbnp):
8✔
2819
                line = f_in.readline()
8✔
2820
                if line == "":
8✔
2821
                    raise Exception("EOF")
×
2822
                raw = line.strip().split()
8✔
2823
                k = int(raw[0]) - 1
8✔
2824
                i = int(raw[1]) - 1
8✔
2825
                j = int(raw[2]) - 1
8✔
2826
                pn = "hb{0:02}{1:04d}{2:04}".format(k, i, j)
8✔
2827
                pv = float(raw[5])
8✔
2828
                raw[5] = "~ {0}  ~".format(pn)
8✔
2829
                line = " ".join(raw) + "\n"
8✔
2830
                f_tpl.write(line)
8✔
2831
                parnme.append(pn)
8✔
2832
                parval1.append(pv)
8✔
2833
                xs.append(xc[i, j])
8✔
2834
                ys.append(yc[i, j])
8✔
2835
                iis.append(i)
8✔
2836
                jjs.append(j)
8✔
2837
                kks.append(k)
8✔
2838

2839
            break
8✔
2840

2841
    f_tpl.close()
8✔
2842
    f_in.close()
8✔
2843
    df = pd.DataFrame(
8✔
2844
        {
2845
            "parnme": parnme,
2846
            "parval1": parval1,
2847
            "x": xs,
2848
            "y": ys,
2849
            "i": iis,
2850
            "j": jjs,
2851
            "k": kks,
2852
        },
2853
        index=parnme,
2854
    )
2855
    df.loc[:, "pargp"] = "hfb_hydfac"
8✔
2856
    df.loc[:, "parubnd"] = df.parval1.max() * 10.0
8✔
2857
    df.loc[:, "parlbnd"] = df.parval1.min() * 0.1
8✔
2858
    return tpl_file, df
8✔
2859

2860

2861
class GsfReader:
9✔
2862
    """
9✔
2863
    a helper class to read a standard modflow-usg gsf file
2864

2865
    Args:
2866
        gsffilename (`str`): filename
2867

2868

2869

2870
    """
2871

2872
    def __init__(self, gsffilename):
9✔
2873

2874
        with open(gsffilename, "r") as f:
8✔
2875
            self.read_data = f.readlines()
8✔
2876

2877
        self.nnode, self.nlay, self.iz, self.ic = [
8✔
2878
            int(n) for n in self.read_data[1].split()
2879
        ]
2880

2881
        self.nvertex = int(self.read_data[2])
8✔
2882

2883
    def get_vertex_coordinates(self):
9✔
2884
        """
2885

2886

2887
        Returns:
2888
            Dictionary containing list of x, y and z coordinates for each vertex
2889
        """
2890
        # vdata = self.read_data[3:self.nvertex+3]
2891
        vertex_coords = {}
×
2892
        for vert in range(self.nvertex):
×
2893
            x, y, z = self.read_data[3 + vert].split()
×
2894
            vertex_coords[vert + 1] = [float(x), float(y), float(z)]
×
2895
        return vertex_coords
×
2896

2897
    def get_node_data(self):
9✔
2898
        """
2899

2900
        Returns:
2901
            nodedf: a pd.DataFrame containing Node information; Node, X, Y, Z, layer, numverts, vertidx
2902

2903
        """
2904

2905
        node_data = []
8✔
2906
        for node in range(self.nnode):
8✔
2907
            nid, x, y, z, lay, numverts = self.read_data[
8✔
2908
                self.nvertex + 3 + node
2909
            ].split()[:6]
2910

2911
            # vertidx = {'ivertex': [int(n) for n in self.read_data[self.nvertex+3 + node].split()[6:]]}
2912
            vertidx = [
8✔
2913
                int(n) for n in self.read_data[self.nvertex + 3 + node].split()[6:]
2914
            ]
2915

2916
            node_data.append(
8✔
2917
                [
2918
                    int(nid),
2919
                    float(x),
2920
                    float(y),
2921
                    float(z),
2922
                    int(lay),
2923
                    int(numverts),
2924
                    vertidx,
2925
                ]
2926
            )
2927

2928
        nodedf = pd.DataFrame(
8✔
2929
            node_data, columns=["node", "x", "y", "z", "layer", "numverts", "vertidx"]
2930
        )
2931
        return nodedf
8✔
2932

2933
    def get_node_coordinates(self, zcoord=False, zero_based=False):
9✔
2934
        """
2935
        Args:
2936
            zcoord (`bool`): flag to add z coord to coordinates.  Default is False
2937
            zero_based (`bool`): flag to subtract one from the node numbers in the returned
2938
                node_coords dict.  This is needed to support PstFrom.  Default is False
2939

2940

2941
        Returns:
2942
            node_coords: Dictionary containing x and y coordinates for each node
2943
        """
2944
        node_coords = {}
8✔
2945
        for node in range(self.nnode):
8✔
2946
            nid, x, y, z, lay, numverts = self.read_data[
8✔
2947
                self.nvertex + 3 + node
2948
            ].split()[:6]
2949
            nid = int(nid)
8✔
2950
            if zero_based:
8✔
2951
                nid -= 1
8✔
2952
            node_coords[nid] = [float(x), float(y)]
8✔
2953
            if zcoord:
8✔
2954
                node_coords[nid] += [float(z)]
×
2955

2956
        return node_coords
8✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc