• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pypest / pyemu / 5887625428

17 Aug 2023 06:23AM UTC coverage: 79.857% (+1.5%) from 78.319%
5887625428

push

github

briochh
Merge branch 'develop'

11386 of 14258 relevant lines covered (79.86%)

6.77 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.22
/pyemu/en.py
1
import os
9✔
2
import copy
9✔
3
import warnings
9✔
4
import numpy as np
9✔
5
import pandas as pd
9✔
6

7
import pyemu
9✔
8
from .pyemu_warnings import PyemuWarning
9✔
9

10
SEED = 358183147  # from random.org on 5 Dec 2016
9✔
11
np.random.seed(SEED)
9✔
12

13

14
class Loc(object):
9✔
15
    """thin wrapper around `pandas.DataFrame.loc` to make sure returned type
9✔
16
    is `Ensemble` (instead of `pandas.DataFrame)`
17

18
    Args:
19
        ensemble (`pyemu.Ensemble`): an ensemble instance
20

21
    Note:
22
        Users do not need to mess with this class - it is added
23
        to each `Ensemble` instance
24

25
    """
26

27
    def __init__(self, ensemble):
9✔
28
        self._ensemble = ensemble
9✔
29

30
    def __getitem__(self, item):
9✔
31
        return type(self._ensemble)(
9✔
32
            self._ensemble.pst,
33
            self._ensemble._df.loc[item],
34
            istransformed=self._ensemble.istransformed,
35
        )
36

37
    def __setitem__(self, idx, value):
9✔
38
        self._ensemble._df.loc[idx] = value
8✔
39

40

41
class Iloc(object):
9✔
42
    """thin wrapper around `pandas.DataFrame.iloc` to make sure returned type
9✔
43
    is `Ensemble` (instead of `pandas.DataFrame)`
44

45
    Args:
46
        ensemble (`pyemu.Ensemble`): an ensemble instance
47

48
    Note:
49
        Users do not need to mess with this class - it is added
50
        to each `Ensemble` instance
51

52
    """
53

54
    def __init__(self, ensemble):
9✔
55
        self._ensemble = ensemble
9✔
56

57
    def __getitem__(self, item):
9✔
58
        return type(self._ensemble)(
9✔
59
            self._ensemble.pst,
60
            self._ensemble._df.iloc[item],
61
            istransformed=self._ensemble.istransformed,
62
        )
63

64
    def __setitem__(self, idx, value):
9✔
65
        self._ensemble._df.iloc[idx] = value
8✔
66

67

68
class Ensemble(object):
9✔
69
    """based class for handling ensembles of numeric values
9✔
70

71
    Args:
72
        pst (`pyemu.Pst`): a control file instance
73
        df (`pandas.DataFrame`): a pandas dataframe.  Columns
74
            should be parameter/observation names.  Index is
75
            treated as realization names
76
        istransformed (`bool`): flag to indicate parameter values
77
            are in log space.  Not used for `ObservationEnsemble`
78

79
    Example::
80

81
        pst = pyemu.Pst("my.pst")
82
        pe = pyemu.ParameterEnsemble.from_gaussian_draw(pst)
83

84
    """
85

86
    def __init__(self, pst, df, istransformed=False):
9✔
87
        self._df = df
9✔
88
        """`pandas.DataFrame`: the underlying dataframe that stores the realized values"""
4✔
89
        self.pst = pst
9✔
90
        """`pyemu.Pst`: control file instance"""
4✔
91
        self._istransformed = istransformed
9✔
92
        self.loc = Loc(self)
9✔
93
        self.iloc = Iloc(self)
9✔
94

95
    def __repr__(self):
9✔
96
        return self._df.__repr__()
×
97

98
    def __str__(self):
9✔
99
        return self._df.__str__()
1✔
100

101
    def __sub__(self, other):
9✔
102
        try:
8✔
103
            return self._df - other
8✔
104
        except:
8✔
105
            return self._df - other._df
8✔
106

107
    def __mul__(self, other):
9✔
108
        try:
8✔
109
            return self._df * other
8✔
110
        except:
×
111
            return self._df * other._df
×
112

113
    def __truediv__(self, other):
9✔
114
        try:
8✔
115
            return self._df / other
8✔
116
        except:
×
117
            return self._df / other._df
×
118

119
    def __add__(self, other):
9✔
120
        try:
8✔
121
            return self._df + other
8✔
122
        except:
×
123
            return self._df + other._df
×
124

125
    def __pow__(self, pow):
9✔
126
        return self._df ** pow
×
127

128
    @staticmethod
9✔
129
    def reseed():
9✔
130
        """reset the `numpy.random.seed`
131

132
        Note:
133
            reseeds using the pyemu.en.SEED global variable
134

135
            The pyemu.en.SEED value is set as the numpy.random.seed on import, so
136
            make sure you know what you are doing if you call this method...
137

138
        """
139
        np.random.seed(SEED)
8✔
140

141
    def copy(self):
9✔
142
        """get a copy of `Ensemble`
143

144
        Returns:
145
            `Ensemble`: copy of this `Ensemble`
146

147
        Note:
148
            copies both `Ensemble.pst` and `Ensemble._df`: can be expensive
149

150
        """
151
        return type(self)(
8✔
152
            pst=self.pst.get(), df=self._df.copy(), istransformed=self.istransformed
153
        )
154

155
    @property
9✔
156
    def istransformed(self):
9✔
157
        """the parameter transformation status
158

159
        Returns:
160
            `bool`: flag to indicate whether or not the `ParameterEnsemble` is
161
            transformed with respect to log_{10}.  Not used for (and has no effect
162
            on) `ObservationEnsemble`.
163

164
        Note:
165
            parameter transformation status is only related to log_{10} and does not
166
            include the effects of `scale` and/or `offset`
167

168
        """
169
        return copy.deepcopy(self._istransformed)
9✔
170

171
    def transform(self):
9✔
172
        """transform parameters with respect to `partrans` value.
173

174
        Note:
175
            operates in place (None is returned).
176

177
            Parameter transform is only related to log_{10} and does not
178
            include the effects of `scale` and/or `offset`
179

180
            `Ensemble.transform() is only provided for inheritance purposes.
181
            It only changes the `Ensemble._transformed` flag
182

183
        """
184
        if self.istransformed:
8✔
185
            return
×
186
        self._transformed = True
8✔
187
        return
8✔
188

189
    def back_transform(self):
9✔
190
        """back transform parameters with respect to `partrans` value.
191

192
        Note:
193
            operates in place (None is returned).
194

195
            Parameter transform is only related to log_{10} and does not
196
            include the effects of `scale` and/or `offset`
197

198
            `Ensemble.back_transform() is only provided for inheritance purposes.
199
            It only changes the `Ensemble._transformed` flag
200

201
        """
202
        if not self.istransformed:
8✔
203
            return
8✔
204
        self._transformed = False
×
205
        return
×
206

207
    def __getattr__(self, item):
9✔
208
        if item == "loc":
9✔
209
            return self.loc[item]
×
210
        elif item == "iloc":
9✔
211
            return self.iloc[item]
×
212
        elif item == "index":
9✔
213
            return self._df.index
8✔
214
        elif item == "columns":
9✔
215
            return self._df.columns
9✔
216
        elif item in set(dir(self)):
9✔
217
            return getattr(self, item)
×
218
        elif item in set(dir(self._df)):
9✔
219
            lhs = self._df.__getattr__(item)
9✔
220
            if type(lhs) == type(self._df):
9✔
221
                return type(self)(
8✔
222
                    pst=self.pst, df=lhs, istransformed=self.istransformed
223
                )
224
            elif "DataFrame" in str(lhs):
9✔
225
                warnings.warn(
8✔
226
                    "return type uncaught, losing Ensemble type, returning DataFrame",
227
                    PyemuWarning,
228
                )
229
                print("return type uncaught, losing Ensemble type, returning DataFrame")
8✔
230
                return lhs
8✔
231
            else:
232
                return lhs
9✔
233
        else:
234
            raise AttributeError(
8✔
235
                "Ensemble error: the following item was not"
236
                + "found in Ensemble or DataFrame attributes:{0}".format(item)
237
            )
238
        return
239

240
        # def plot(self,*args,**kwargs):
241
        # self._df.plot(*args,**kwargs)
242

243
    def plot(
9✔
244
        self,
245
        bins=10,
246
        facecolor="0.5",
247
        plot_cols=None,
248
        filename="ensemble.pdf",
249
        func_dict=None,
250
        **kwargs
251
    ):
252
        """plot ensemble histograms to multipage pdf
253

254
        Args:
255
            bins (`int`): number of bins for the histograms
256
            facecolor (`str`): matplotlib color (e.g. `r`,`g`, etc)
257
            plot_cols ([`str`]): list of subset of ensemble columns to plot.
258
                If None, all are plotted. Default is None
259
            filename (`str`): multipage pdf filename. Default is "ensemble.pdf"
260
            func_dict (`dict`): a dict of functions to apply to specific
261
                columns. For example: {"par1": np.log10}
262
            **kwargs (`dict`): addkeyword args to pass to `pyemu.plot_utils.ensemble_helper()`
263

264

265
        Example::
266

267
            pst = pyemu.Pst("my.pst")
268
            pe = pyemu.ParameterEnsemble.from_gaussian_draw(pst)
269
            pe.transform() # plot log space (if needed)
270
            pe.plot(bins=30)
271

272
        """
273
        pyemu.plot_utils.ensemble_helper(
8✔
274
            self, bins=bins, facecolor=facecolor, plot_cols=plot_cols, filename=filename
275
        )
276

277
    @classmethod
9✔
278
    def from_binary(cls, pst, filename):
9✔
279
        """create an `Ensemble` from a PEST-style binary file
280

281
        Args:
282
            pst (`pyemu.Pst`): a control file instance
283
            filename (`str`): filename containing binary ensemble
284

285
        Returns:
286
            `Ensemble`: the ensemble loaded from the binary file
287

288
        Example::
289

290
            pst = pyemu.Pst("my.pst")
291
            oe = pyemu.ObservationEnsemble.from_binary("obs.jcb")
292

293

294
        """
295

296
        df = pyemu.Matrix.from_binary(filename).to_dataframe()
8✔
297
        return cls(pst=pst, df=df)
8✔
298

299
    @classmethod
9✔
300
    def from_csv(cls, pst, filename, *args, **kwargs):
9✔
301
        """create an `Ensemble` from a CSV file
302

303
        Args:
304
            pst (`pyemu.Pst`): a control file instance
305
            filename (`str`): filename containing CSV ensemble
306
            *args ([`object`]: positional arguments to pass to
307
                `pandas.read_csv()`.
308
            **kwargs ({`str`:`object`}): keyword arguments to pass
309
                to `pandas.read_csv()`.
310
        Returns:
311
            `Ensemble`
312
        Note:
313
            uses `pandas.read_csv()` to load numeric values from
314
            CSV file
315

316
        Example::
317

318
            pst = pyemu.Pst("my.pst")
319
            oe = pyemu.ObservationEnsemble.from_csv("obs.csv")
320

321
        """
322

323
        if "index_col" not in kwargs:
8✔
324
            kwargs["index_col"] = 0
8✔
325
        if "low_memory" not in kwargs:
8✔
326
            kwargs["low_memory"] = False
8✔
327
        df = pd.read_csv(filename, *args, **kwargs)
8✔
328
        return cls(pst=pst, df=df)
8✔
329

330
    def to_csv(self, filename, *args, **kwargs):
9✔
331
        """write `Ensemble` to a CSV file
332

333
        Args:
334
            filename (`str`): file to write
335
            *args ([`object`]: positional arguments to pass to
336
                `pandas.DataFrame.to_csv()`.
337
            **kwargs ({`str`:`object`}): keyword arguments to pass
338
                to `pandas.DataFrame.to_csv()`.
339

340
        Example::
341

342
            pst = pyemu.Pst("my.pst")
343
            oe = pyemu.ObservationEnsemble.from_gaussian_draw(pst)
344
            oe.to_csv("obs.csv")
345

346
        Note:
347
            back transforms `ParameterEnsemble` before writing so that
348
            values are in arithmetic space
349

350
        """
351
        retrans = False
9✔
352
        if self.istransformed:
9✔
353
            self.back_transform()
8✔
354
            retrans = True
8✔
355
        if self._df.isnull().values.any():
9✔
356
            warnings.warn("NaN in ensemble", PyemuWarning)
×
357
        self._df.to_csv(filename, *args, **kwargs)
9✔
358
        if retrans:
9✔
359
            self.transform()
8✔
360

361
    def to_binary(self, filename):
9✔
362
        """write `Ensemble` to a PEST-style binary file
363

364
        Args:
365
            filename (`str`): file to write
366

367
        Example::
368

369
            pst = pyemu.Pst("my.pst")
370
            oe = pyemu.ObservationEnsemble.from_gaussian_draw(pst)
371
            oe.to_binary("obs.jcb")
372

373
        Note:
374
            back transforms `ParameterEnsemble` before writing so that
375
            values are in arithmetic space
376

377
        """
378

379
        retrans = False
9✔
380
        if self.istransformed:
9✔
381
            self.back_transform()
8✔
382
            retrans = True
8✔
383
        if self._df.isnull().values.any():
9✔
384
            warnings.warn("NaN in ensemble", PyemuWarning)
×
385
        pyemu.Matrix.from_dataframe(self._df).to_coo(filename)
9✔
386
        if retrans:
9✔
387
            self.transform()
8✔
388

389
    def to_dense(self, filename):
9✔
390
        """write `Ensemble` to a dense-format binary file
391

392
        Args:
393
            filename (`str`): file to write
394

395
        Example::
396

397
            pst = pyemu.Pst("my.pst")
398
            oe = pyemu.ObservationEnsemble.from_gaussian_draw(pst)
399
            oe.to_dense("obs.bin")
400

401
        Note:
402
            back transforms `ParameterEnsemble` before writing so that
403
            values are in arithmatic space
404

405
        """
406

407
        retrans = False
8✔
408
        if self.istransformed:
8✔
409
            self.back_transform()
×
410
            retrans = True
×
411
        if self._df.isnull().values.any():
8✔
412
            warnings.warn("NaN in ensemble", PyemuWarning)
×
413
        pyemu.Matrix.write_dense(
8✔
414
            filename,
415
            self._df.index.tolist(),
416
            self._df.columns.tolist(),
417
            self._df.values,
418
        )
419
        if retrans:
8✔
420
            self.transform()
×
421

422
    @classmethod
9✔
423
    def from_dataframe(cls, pst, df, istransformed=False):
9✔
424
        warnings.warn(
×
425
            "Ensemble.from_dataframe() is deprecated and has been "
426
            "replaced with the standard constructor, which takes"
427
            "the same arguments"
428
        )
429
        return cls(pst=pst, df=df, istransformed=istransformed)
×
430

431
    @staticmethod
9✔
432
    def _gaussian_draw(
9✔
433
        cov, mean_values, num_reals, grouper=None, fill=True, factor="eigen"
434
    ):
435

436
        factor = factor.lower()
9✔
437
        if factor not in ["eigen", "svd"]:
9✔
438
            raise Exception(
×
439
                "Ensemble._gaussian_draw() error: unrecognized"
440
                + "'factor': {0}".format(factor)
441
            )
442
        # make sure all cov names are found in mean_values
443
        cov_names = set(cov.row_names)
9✔
444
        mv_names = set(mean_values.index.values)
9✔
445
        missing = cov_names - mv_names
9✔
446
        if len(missing) > 0:
9✔
447
            raise Exception(
×
448
                "Ensemble._gaussian_draw() error: the following cov names are not in "
449
                "mean_values: {0}".format(",".join(missing))
450
            )
451
        if cov.isdiagonal:
9✔
452
            stds = {
9✔
453
                name: std for name, std in zip(cov.row_names, np.sqrt(cov.x.flatten()))
454
            }
455
            snv = np.random.randn(num_reals, mean_values.shape[0])
9✔
456
            reals = np.zeros_like(snv)
9✔
457
            reals[:, :] = np.NaN
9✔
458
            for i, name in enumerate(mean_values.index):
9✔
459
                if name in cov_names:
9✔
460
                    reals[:, i] = (snv[:, i] * stds[name]) + mean_values.loc[name]
9✔
461
                elif fill:
9✔
462
                    reals[:, i] = mean_values.loc[name]
8✔
463
        else:
464
            reals = np.zeros((num_reals, mean_values.shape[0]))
9✔
465
            reals[:, :] = np.NaN
9✔
466
            if fill:
9✔
467
                for i, v in enumerate(mean_values.values):
9✔
468
                    reals[:, i] = v
9✔
469
            #cov_map = {n: i for n, i in zip(cov.row_names, np.arange(cov.shape[0]))}
470
            mv_map = {
9✔
471
                n: i for n, i in zip(mean_values.index, np.arange(mean_values.shape[0]))
472
            }
473
            if grouper is not None:
9✔
474

475
                for grp_name, names in grouper.items():
9✔
476
                    print("drawing from group", grp_name)
9✔
477
                    # reorder names to be in cov matrix order
478
                    # cnames = names
479
                    cnames = []
9✔
480
                    snames = set(names)
9✔
481
                    for n in cov.names:
9✔
482
                        if n in snames:
9✔
483
                            cnames.append(n)
9✔
484
                    names = None
9✔
485
                    snames = None
9✔
486
                    idxs = [mv_map[name] for name in cnames]
9✔
487
                    snv = np.random.randn(num_reals, len(cnames))
9✔
488
                    cov_grp = cov.get(cnames)
9✔
489
                    if len(cnames) == 1:
9✔
490
                        std = np.sqrt(cov_grp.x)
9✔
491
                        reals[:, idxs] = mean_values.loc[cnames].values[0] + (snv * std)
9✔
492
                    else:
493
                        if factor == "eigen":
9✔
494
                            try:
9✔
495
                                cov_grp.inv
9✔
496
                            except:
×
497
                                covname = "trouble_{0}.cov".format(grp_name)
×
498
                                cov_grp.to_ascii(covname)
×
499
                                raise Exception(
×
500
                                    "error inverting cov for group '{0}',"
501
                                    + "saved trouble cov to {1}".format(
502
                                        grp_name, covname
503
                                    )
504
                                )
505

506
                            a, i = Ensemble._get_eigen_projection_matrix(cov_grp.as_2d)
9✔
507
                        elif factor == "svd":
×
508
                            a, i = Ensemble._get_svd_projection_matrix(cov_grp.as_2d)
×
509
                            snv[:, i:] = 0.0
×
510
                        # process each realization
511
                        group_mean_values = mean_values.loc[cnames]
9✔
512
                        for i in range(num_reals):
9✔
513
                            reals[i, idxs] = group_mean_values + np.dot(a, snv[i, :])
9✔
514

515
            else:
516
                snv = np.random.randn(num_reals, cov.shape[0])
9✔
517
                if factor == "eigen":
9✔
518
                    a, i = Ensemble._get_eigen_projection_matrix(cov.as_2d)
9✔
519
                elif factor == "svd":
×
520
                    a, i = Ensemble._get_svd_projection_matrix(cov.as_2d)
×
521
                    snv[:, i:] = 0.0
×
522
                cov_mean_values = mean_values.loc[cov.row_names].values
9✔
523
                idxs = [mv_map[name] for name in cov.row_names]
9✔
524
                for i in range(num_reals):
9✔
525
                    reals[i, idxs] = cov_mean_values + np.dot(a, snv[i, :])
9✔
526
        df = pd.DataFrame(reals, columns=mean_values.index.values)
9✔
527
        df.dropna(inplace=True, axis=1)
9✔
528
        return df
9✔
529

530
    @staticmethod
9✔
531
    def _get_svd_projection_matrix(x, maxsing=None, eigthresh=1.0e-7):
9✔
532
        if x.shape[0] != x.shape[1]:
×
533
            raise Exception("matrix not square")
×
534
        u, s, v = np.linalg.svd(x, full_matrices=True)
×
535
        v = v.transpose()
×
536

537
        if maxsing is None:
×
538
            maxsing = pyemu.Matrix.get_maxsing_from_s(s, eigthresh=eigthresh)
×
539
        u = u[:, :maxsing]
×
540
        s = s[:maxsing]
×
541
        v = v[:, :maxsing]
×
542

543
        # fill in full size svd component matrices
544
        s_full = np.zeros(x.shape)
×
545
        s_full[: s.shape[0], : s.shape[1]] = np.sqrt(
×
546
            s
547
        )  # sqrt since sing vals are eigvals**2
548
        v_full = np.zeros_like(s_full)
×
549
        v_full[: v.shape[0], : v.shape[1]] = v
×
550
        # form the projection matrix
551
        proj = np.dot(v_full, s_full)
×
552
        return proj, maxsing
×
553

554
    @staticmethod
9✔
555
    def _get_eigen_projection_matrix(x):
9✔
556
        # eigen factorization
557
        v, w = np.linalg.eigh(x)
9✔
558

559
        # check for near zero eig values
560
        for i in range(v.shape[0]):
9✔
561
            if v[i] > 1.0e-10:
9✔
562
                pass
9✔
563
            else:
564
                print(
8✔
565
                    "near zero eigen value found",
566
                    v[i],
567
                    "at index",
568
                    i,
569
                    " of ",
570
                    v.shape[0],
571
                )
572
                v[i] = 0.0
8✔
573

574
        # form the projection matrix
575
        vsqrt = np.sqrt(v)
9✔
576
        vsqrt[i + 1 :] = 0.0
9✔
577
        v = np.diag(vsqrt)
9✔
578
        a = np.dot(w, v)
9✔
579

580
        return a, i
9✔
581

582
    def get_deviations(self, center_on=None):
9✔
583
        """get the deviations of the realizations around a certain
584
        point in ensemble space
585

586
        Args:
587
            center_on (`str`, optional): a realization name to use as the centering
588
                point in ensemble space.  If `None`, the mean vector is
589
                treated as the centering point.  Default is None
590

591
        Returns:
592
            `Ensemble`: an ensemble of deviations around the centering point
593

594
        Note:
595
            deviations are the Euclidean distances from the `center_on` value to
596
            realized values for each column
597

598
            `center_on=None` yields the classic ensemble smoother/ensemble Kalman
599
            filter deviations from the mean vector
600

601
            Deviations respect log-transformation status.
602

603
        Example::
604

605
            pst = pyemu.Pst("my.pst")
606
            oe = pyemu.ObservationEnsemble.from_gaussian_draw(pst)
607
            oe.add_base()
608
            oe_dev = oe.get_deviations(center_on="base")
609
            oe.to_csv("obs_base_devs.csv")
610

611
        """
612

613
        retrans = False
8✔
614
        if not self.istransformed:
8✔
615
            self.transform()
8✔
616
            retrans = True
8✔
617
        mean_vec = self.mean()
8✔
618
        if center_on is not None:
8✔
619
            if center_on not in self.index:
8✔
620
                raise Exception(
×
621
                    "'center_on' realization {0} not found".format(center_on)
622
                )
623
            mean_vec = self._df.loc[center_on, :].copy()
8✔
624

625
        df = self._df.copy()
8✔
626
        for col in df.columns:
8✔
627
            df.loc[:, col] -= mean_vec[col]
8✔
628
        if retrans:
8✔
629
            self.back_transform()
8✔
630
        return type(self)(pst=self.pst, df=df, istransformed=self.istransformed)
8✔
631

632
    def as_pyemu_matrix(self, typ=None):
9✔
633
        """get a `pyemu.Matrix` instance of `Ensemble`
634

635
        Args:
636
            typ (`pyemu.Matrix` or `pyemu.Cov`): the type of matrix to return.
637
                Default is `pyemu.Matrix`
638

639
        Returns:
640
            `pyemu.Matrix`: a matrix instance
641

642
        Example::
643

644
            oe = pyemu.ObservationEnsemble.from_gaussian_draw(pst=pst,num_reals=100)
645
            dev_mat = oe.get_deviations().as_pyemu_matrix(typ=pyemu.Cov)
646
            obscov = dev_mat.T * dev_mat
647

648
        """
649
        if typ is None:
8✔
650
            typ = pyemu.Matrix
8✔
651
        return typ.from_dataframe(self._df)
8✔
652

653
    def covariance_matrix(self, localizer=None, center_on=None):
9✔
654
        """get a empirical covariance matrix implied by the
655
        correlations between realizations
656

657
        Args:
658
            localizer (`pyemu.Matrix`, optional): a matrix to localize covariates
659
                in the resulting covariance matrix.  Default is None
660
            center_on (`str`, optional): a realization name to use as the centering
661
                point in ensemble space.  If `None`, the mean vector is
662
                treated as the centering point.  Default is None
663

664
        Returns:
665
            `pyemu.Cov`: the empirical (and optionally localized) covariance matrix
666

667
        """
668

669
        devs = self.get_deviations(center_on=center_on).as_pyemu_matrix()
8✔
670
        devs *= 1.0 / np.sqrt(float(self.shape[0] - 1.0))
8✔
671

672
        if localizer is not None:
8✔
673
            devs = devs.T * devs
×
674
            return devs.hadamard_product(localizer)
×
675

676
        return pyemu.Cov((devs.T * devs).x, names=devs.col_names)
8✔
677

678
    def dropna(self, *args, **kwargs):
9✔
679
        """override of `pandas.DataFrame.dropna()`
680

681
        Args:
682
            *args ([`object`]: positional arguments to pass to
683
                `pandas.DataFrame.dropna()`.
684
            **kwargs ({`str`:`object`}): keyword arguments to pass
685
                to `pandas.DataFrame.dropna()`.
686

687
        """
688
        df = self._df.dropna(*args, **kwargs)
8✔
689
        return type(self)(pst=self.pst, df=df, istransformed=self.istransformed)
8✔
690

691

692
class ObservationEnsemble(Ensemble):
9✔
693
    """Observation noise ensemble in the PEST(++) realm
9✔
694

695
    Args:
696
        pst (`pyemu.Pst`): a control file instance
697
        df (`pandas.DataFrame`): a pandas dataframe.  Columns
698
            should be observation names.  Index is
699
            treated as realization names
700
        istransformed (`bool`): flag to indicate parameter values
701
            are in log space.  Not used for `ObservationEnsemble`
702

703
    Example::
704

705
        pst = pyemu.Pst("my.pst")
706
        oe = pyemu.ObservationEnsemble.from_gaussian_draw(pst)
707

708
    """
709

710
    def __init__(self, pst, df, istransformed=False):
9✔
711
        super(ObservationEnsemble, self).__init__(pst, df, istransformed)
8✔
712

713
    @classmethod
9✔
714
    def from_gaussian_draw(
9✔
715
        cls, pst, cov=None, num_reals=100, by_groups=True, fill=False, factor="eigen"
716
    ):
717
        """generate an `ObservationEnsemble` from a (multivariate) gaussian
718
        distribution
719

720
        Args:
721
            pst (`pyemu.Pst`): a control file instance.
722
            cov (`pyemu.Cov`): a covariance matrix describing the second
723
                moment of the gaussian distribution.  If None, `cov` is
724
                generated from the non-zero-weighted observation weights in `pst`.
725
                Only observations listed in `cov` are sampled.  Other observations are
726
                assigned the `obsval` value from `pst`.
727
            num_reals (`int`): number of stochastic realizations to generate.  Default
728
                is 100
729
            by_groups (`bool`): flag to generate realzations be observation group.  This
730
                assumes no correlation (covariates) between observation groups.
731
            fill (`bool`): flag to fill in zero-weighted observations with control file
732
                values.  Default is False.
733
            factor (`str`): how to factorize `cov` to form the projectin matrix.  Can
734
                be "eigen" or "svd". The "eigen" option is default and is faster.  But
735
                for (nearly) singular cov matrices (such as those generated empirically
736
                from ensembles), "svd" is the only way.  Ignored for diagonal `cov`.
737

738
        Returns:
739
            `ObservationEnsemble`: the realized `ObservationEnsemble` instance
740

741
        Note:
742
            Only observations named in `cov` are sampled. Additional, `cov` is processed prior
743
            to sampling to only include non-zero-weighted observations depending on the value of `fill`.
744
            So users must take care to make sure observations have been assigned non-zero weights even if `cov`
745
            is being passed
746

747
            The default `cov` is generated from `pyemu.Cov.from_observation_data`, which assumes
748
            observation noise standard deviations are the inverse of the weights listed in `pst`
749

750
        Example::
751

752
            pst = pyemu.Pst("my.pst")
753
            # the easiest way - just relying on weights in pst
754
            oe1 = pyemu.ObservationEnsemble.from_gaussian_draw(pst)
755

756
            # generate the cov explicitly
757
            cov = pyemu.Cov.from_observation_data(pst)
758
            oe2 = pyemu.ObservationEnsemble.from_gaussian_draw(pst,cov=cov)
759

760
            # give all but one observation zero weight.  This will
761
            # result in an oe with only one randomly sampled observation noise
762
            # vector since the cov is processed to remove any zero-weighted
763
            # observations before sampling
764
            pst.observation_data.loc[pst.nnz_obs_names[1:],"weight] = 0.0
765
            oe3 = pyemu.ObservationEnsemble.from_gaussian_draw(pst,cov=cov)
766

767
        """
768
        if cov is None:
8✔
769
            cov = pyemu.Cov.from_observation_data(pst)
8✔
770
        obs = pst.observation_data
8✔
771
        mean_values = obs.obsval.copy()
8✔
772
        if len(pst.nnz_obs_names) == 0:
8✔
773
            warnings.warn("ObservationEnsemble.from_gaussian_draw(): all zero weights",PyemuWarning)
8✔
774
        # only draw for non-zero weights, get a new cov
775
        if not fill:
8✔
776
            names = set(pst.nnz_obs_names).intersection(set(cov.row_names))
8✔
777
            nz_cov = cov.get(list(names))
8✔
778
        else:
779
            nz_cov = cov.copy()
8✔
780

781
        grouper = None
8✔
782
        if not cov.isdiagonal and by_groups:
8✔
783
            nz_obs = obs.loc[pst.nnz_obs_names, :].copy()
8✔
784
            grouper = nz_obs.groupby("obgnme").groups
8✔
785
            for grp in grouper.keys():
8✔
786
                grouper[grp] = list(grouper[grp])
8✔
787
        df = Ensemble._gaussian_draw(
8✔
788
            cov=nz_cov,
789
            mean_values=mean_values,
790
            num_reals=num_reals,
791
            grouper=grouper,
792
            fill=fill,
793
            factor=factor,
794
        )
795
        if fill:
8✔
796
            df.loc[:, pst.zero_weight_obs_names] = pst.observation_data.loc[
8✔
797
                pst.zero_weight_obs_names, "obsval"
798
            ].values
799
        return cls(pst, df, istransformed=False)
8✔
800

801
    @property
9✔
802
    def phi_vector(self):
9✔
803
        """vector of L2 norm (phi) for the realizations (rows) of `Ensemble`.
804

805
        Returns:
806
            `pandas.Series`: series of realization name (`Ensemble.index`) and phi values
807

808
        Note:
809
            The ObservationEnsemble.pst.weights can be updated prior to calling
810
            this method to evaluate new weighting strategies
811

812
        """
813
        cols = self._df.columns
8✔
814
        pst = self.pst
8✔
815
        weights = self.pst.observation_data.loc[cols, "weight"]
8✔
816
        obsval = self.pst.observation_data.loc[cols, "obsval"]
8✔
817
        phi_vec = []
8✔
818
        for idx in self._df.index.values:
8✔
819
            simval = self._df.loc[idx, cols]
8✔
820
            phi = (((simval - obsval) * weights) ** 2).sum()
8✔
821
            phi_vec.append(phi)
8✔
822
        return pd.Series(data=phi_vec, index=self.index)
8✔
823

824
    def add_base(self):
9✔
825
        """add the control file `obsval` values as a realization
826

827
        Note:
828
            replaces the last realization with the current `ObservationEnsemble.pst.observation_data.obsval` values
829
            as a new realization named "base"
830

831
            the PEST++ enemble tools will add this realization also if you dont wanna fool with it here...
832

833
        """
834
        if "base" in self.index:
8✔
835
            raise Exception("'base' already in ensemble")
8✔
836
        self._df = self._df.iloc[:-1, :]
8✔
837
        self._df.loc["base", :] = self.pst.observation_data.loc[self.columns, "obsval"]
8✔
838

839
    @property
9✔
840
    def nonzero(self):
9✔
841
        """get a new `ObservationEnsemble` of just non-zero weighted observations
842

843
        Returns:
844
            `ObservationEnsemble`: non-zero weighted observation ensemble.
845

846
        Note:
847
            The `pst` attribute of the returned `ObservationEnsemble` also only includes
848
            non-zero weighted observations (and is therefore not valid for running
849
            with PEST or PEST++)
850

851
        """
852
        df = self._df.loc[:, self.pst.nnz_obs_names]
8✔
853
        return ObservationEnsemble(
8✔
854
            pst=self.pst.get(obs_names=self.pst.nnz_obs_names), df=df
855
        )
856

857

858
class ParameterEnsemble(Ensemble):
9✔
859
    """Parameter ensembles in the PEST(++) realm
9✔
860

861
    Args:
862
        pst (`pyemu.Pst`): a control file instance
863
        df (`pandas.DataFrame`): a pandas dataframe.  Columns
864
            should be parameter names.  Index is
865
            treated as realization names
866
        istransformed (`bool`): flag to indicate parameter values
867
            are in log space (if `partrans` is "log" in `pst`)
868

869
    Example::
870

871
        pst = pyemu.Pst("my.pst")
872
        pe = pyemu.ParameterEnsemble.from_gaussian_draw(pst)
873

874
    """
875

876
    def __init__(self, pst, df, istransformed=False):
9✔
877
        super(ParameterEnsemble, self).__init__(pst, df, istransformed)
9✔
878

879
    @classmethod
9✔
880
    def from_gaussian_draw(
9✔
881
        cls, pst, cov=None, num_reals=100, by_groups=True, fill=True, factor="eigen"
882
    ):
883
        """generate a `ParameterEnsemble` from a (multivariate) (log) gaussian
884
        distribution
885

886
        Args:
887
            pst (`pyemu.Pst`): a control file instance.
888
            cov (`pyemu.Cov`): a covariance matrix describing the second
889
                moment of the gaussian distribution.  If None, `cov` is
890
                generated from the bounds of the adjustable parameters in `pst`.
891
                the (log) width of the bounds is assumed to represent a multiple of
892
                the parameter standard deviation (this is the `sigma_range` argument
893
                that can be passed to `pyemu.Cov.from_parameter_data`).
894
            num_reals (`int`): number of stochastic realizations to generate.  Default
895
                is 100
896
            by_groups (`bool`): flag to generate realizations be parameter group.  This
897
                assumes no correlation (covariates) between parameter groups.  For large
898
                numbers of parameters, this help prevent memories but is slower.
899
            fill (`bool`): flag to fill in fixed and/or tied parameters with control file
900
                values.  Default is True.
901
            factor (`str`): how to factorize `cov` to form the projection matrix.  Can
902
                be "eigen" or "svd". The "eigen" option is default and is faster.  But
903
                for (nearly) singular cov matrices (such as those generated empirically
904
                from ensembles), "svd" is the only way.  Ignored for diagonal `cov`.
905

906
        Returns:
907
            `ParameterEnsemble`: the parameter ensemble realized from the gaussian
908
            distribution
909

910
        Note:
911

912
            Only parameters named in `cov` are sampled. Missing parameters are assigned values of
913
            `pst.parameter_data.parval1` along the corresponding columns of `ParameterEnsemble`
914
            according to the value of `fill`.
915

916
            The default `cov` is generated from `pyemu.Cov.from_observation_data`, which assumes
917
            parameter bounds in `ParameterEnsemble.pst` represent some multiple of parameter
918
            standard deviations.  Additionally, the default Cov only includes adjustable
919
            parameters (`partrans` not "tied" or "fixed").
920

921
            "tied" parameters are not sampled.
922

923
        Example::
924

925
            pst = pyemu.Pst("my.pst")
926
            # the easiest way - just relying on weights in pst
927
            pe1 = pyemu.ParameterEnsemble.from_gaussian_draw(pst)
928

929
            # generate the cov explicitly with a sigma_range
930
            cov = pyemu.Cov.from_parameter_data(pst,sigma_range=6)
931
            [e2 = pyemu.ParameterEnsemble.from_gaussian_draw(pst,cov=cov)
932

933
        """
934
        if cov is None:
9✔
935
            cov = pyemu.Cov.from_parameter_data(pst)
9✔
936
        par = pst.parameter_data
9✔
937
        li = par.partrans == "log"
9✔
938
        mean_values = par.parval1.copy()
9✔
939
        mean_values.loc[li] = mean_values.loc[li].apply(np.log10)
9✔
940
        if len(pst.adj_par_names) == 0:
9✔
941
            warnings.warn("ParameterEnsemble.from_gaussian_draw(): no adj pars", PyemuWarning)
×
942
        grouper = None
9✔
943
        if not cov.isdiagonal and by_groups:
9✔
944
            adj_par = par.loc[pst.adj_par_names, :]
9✔
945
            grouper = adj_par.groupby("pargp").groups
9✔
946
            for grp in grouper.keys():
9✔
947
                grouper[grp] = list(grouper[grp])
9✔
948
        df = Ensemble._gaussian_draw(
9✔
949
            cov=cov,
950
            mean_values=mean_values,
951
            num_reals=num_reals,
952
            grouper=grouper,
953
            fill=fill,
954
        )
955
        df.loc[:, li] = 10.0 ** df.loc[:, li]
9✔
956
        return cls(pst, df, istransformed=False)
9✔
957

958
    @classmethod
9✔
959
    def from_triangular_draw(cls, pst, num_reals=100, fill=True):
9✔
960
        """generate a `ParameterEnsemble` from a (multivariate) (log) triangular distribution
961

962
        Args:
963
            pst (`pyemu.Pst`): a control file instance
964
            num_reals (`int`, optional): number of realizations to generate.  Default is 100
965
            fill (`bool`): flag to fill in fixed and/or tied parameters with control file
966
                values.  Default is True.
967

968
        Returns:
969
            `ParameterEnsemble`: a parameter ensemble drawn from the multivariate (log) triangular
970
            distribution defined by the parameter upper and lower bounds and initial parameter
971
            values in `pst`
972

973
        Note:
974
            respects transformation status in `pst`: fixed and tied parameters are not realized,
975
            log-transformed parameters are drawn in log space.  The returned `ParameterEnsemble`
976
            is back transformed (not in log space)
977

978
            uses numpy.random.triangular
979

980
        Example::
981

982
            pst = pyemu.Pst("my.pst")
983
            pe = pyemu.ParameterEnsemble.from_triangular_draw(pst)
984
            pe.to_csv("my_tri_pe.csv")
985

986
        """
987

988
        li = pst.parameter_data.partrans == "log"
8✔
989
        ub = pst.parameter_data.parubnd.copy()
8✔
990
        ub.loc[li] = ub.loc[li].apply(np.log10)
8✔
991

992
        lb = pst.parameter_data.parlbnd.copy()
8✔
993
        lb.loc[li] = lb.loc[li].apply(np.log10)
8✔
994

995
        pv = pst.parameter_data.parval1.copy()
8✔
996
        pv.loc[li] = pv[li].apply(np.log10)
8✔
997

998
        ub = ub.to_dict()
8✔
999
        lb = lb.to_dict()
8✔
1000
        pv = pv.to_dict()
8✔
1001

1002
        # set up some column names
1003
        # real_names = ["{0:d}".format(i)
1004
        #              for i in range(num_reals)]
1005
        real_names = np.arange(num_reals, dtype=np.int64)
8✔
1006
        arr = np.empty((num_reals, len(ub)))
8✔
1007
        arr[:, :] = np.NaN
8✔
1008
        adj_par_names = set(pst.adj_par_names)
8✔
1009
        for i, pname in enumerate(pst.parameter_data.parnme):
8✔
1010
            # print(pname, lb[pname], ub[pname])
1011
            if pname in adj_par_names:
8✔
1012
                arr[:, i] = np.random.triangular(
8✔
1013
                    lb[pname], pv[pname], ub[pname], size=num_reals
1014
                )
1015
            elif fill:
×
1016
                arr[:, i] = (
×
1017
                    np.zeros((num_reals)) + pst.parameter_data.loc[pname, "parval1"]
1018
                )
1019

1020
        df = pd.DataFrame(arr, index=real_names, columns=pst.par_names)
8✔
1021
        df.dropna(inplace=True, axis=1)
8✔
1022
        df.loc[:, li] = 10.0 ** df.loc[:, li]
8✔
1023
        new_pe = cls(pst=pst, df=df)
8✔
1024
        return new_pe
8✔
1025

1026
    @classmethod
9✔
1027
    def from_uniform_draw(cls, pst, num_reals, fill=True):
9✔
1028
        """generate a `ParameterEnsemble` from a (multivariate) (log) uniform
1029
        distribution
1030

1031
        Args:
1032
            pst (`pyemu.Pst`): a control file instance
1033
            num_reals (`int`, optional): number of realizations to generate.  Default is 100
1034
            fill (`bool`): flag to fill in fixed and/or tied parameters with control file
1035
                values.  Default is True.
1036

1037
        Returns:
1038
            `ParameterEnsemble`: a parameter ensemble drawn from the multivariate (log) uniform
1039
            distribution defined by the parameter upper and lower bounds `pst`
1040

1041
        Note:
1042
            respects transformation status in `pst`: fixed and tied parameters are not realized,
1043
            log-transformed parameters are drawn in log space.  The returned `ParameterEnsemble`
1044
            is back transformed (not in log space)
1045

1046
            uses numpy.random.uniform
1047

1048
        Example::
1049

1050
            pst = pyemu.Pst("my.pst")
1051
            pe = pyemu.ParameterEnsemble.from_uniform_draw(pst)
1052
            pe.to_csv("my_uni_pe.csv")
1053

1054

1055
        """
1056

1057
        li = pst.parameter_data.partrans == "log"
9✔
1058
        ub = pst.parameter_data.parubnd.copy()
9✔
1059
        ub.loc[li] = ub.loc[li].apply(np.log10)
9✔
1060
        ub = ub.to_dict()
9✔
1061
        lb = pst.parameter_data.parlbnd.copy()
9✔
1062
        lb.loc[li] = lb.loc[li].apply(np.log10)
9✔
1063
        lb = lb.to_dict()
9✔
1064

1065
        real_names = np.arange(num_reals, dtype=np.int64)
9✔
1066
        arr = np.empty((num_reals, len(ub)))
9✔
1067
        arr[:, :] = np.NaN
9✔
1068
        adj_par_names = set(pst.adj_par_names)
9✔
1069
        if len(adj_par_names) == 0:
9✔
1070
            warnings.warn("ParameterEnsemble.from_uniform_draw(): no adj pars",PyemuWarning)
×
1071
        for i, pname in enumerate(pst.parameter_data.parnme):
9✔
1072
            # print(pname,lb[pname],ub[pname])
1073
            if pname in adj_par_names:
9✔
1074
                arr[:, i] = np.random.uniform(lb[pname], ub[pname], size=num_reals)
9✔
1075
            elif fill:
×
1076
                arr[:, i] = (
×
1077
                    np.zeros((num_reals)) + pst.parameter_data.loc[pname, "parval1"]
1078
                )
1079

1080
        df = pd.DataFrame(arr, index=real_names, columns=pst.par_names)
9✔
1081
        df.dropna(inplace=True, axis=1)
9✔
1082
        df.loc[:, li] = 10.0 ** df.loc[:, li]
9✔
1083

1084
        new_pe = cls(pst=pst, df=df)
9✔
1085
        return new_pe
9✔
1086

1087
    @classmethod
9✔
1088
    def from_mixed_draws(
9✔
1089
        cls,
1090
        pst,
1091
        how_dict,
1092
        default="gaussian",
1093
        num_reals=100,
1094
        cov=None,
1095
        sigma_range=6,
1096
        enforce_bounds=True,
1097
        partial=False,
1098
        fill=True,
1099
    ):
1100
        """generate a `ParameterEnsemble` using a mixture of
1101
        distributions.  Available distributions include (log) "uniform", (log) "triangular",
1102
        and (log) "gaussian". log transformation is respected.
1103

1104
        Args:
1105
            pst (`pyemu.Pst`): a control file
1106
            how_dict (`dict`): a dictionary of parameter name keys and
1107
                "how" values, where "how" can be "uniform","triangular", or "gaussian".
1108
            default (`str`): the default distribution to use for parameter not listed
1109
                in how_dict.  Default is "gaussian".
1110
            num_reals (`int`): number of realizations to draw.  Default is 100.
1111
            cov (`pyemu.Cov`): an optional Cov instance to use for drawing from gaussian distribution.
1112
                If None, and "gaussian" is listed in `how_dict` (and/or `default`), then a diagonal
1113
                covariance matrix is constructed from the parameter bounds in `pst` (with `sigma_range`).
1114
                Default is None.
1115
            sigma_range (`float`): the number of standard deviations implied by the parameter bounds in the pst.
1116
                Only used if "gaussian" is in `how_dict` (and/or `default`) and `cov` is None.  Default is 6.
1117
            enforce_bounds (`bool`): flag to enforce parameter bounds in resulting `ParameterEnsemble`. Only
1118
                matters if "gaussian" is in values of `how_dict`.  Default is True.
1119
            partial (`bool`): flag to allow a partial ensemble (not all pars included).  If True, parameters
1120
                not name in `how_dict` will be sampled using the distribution named as `default`.
1121
                Default is `False`.
1122
            fill (`bool`): flag to fill in fixed and/or tied parameters with control file
1123
                values.  Default is True.
1124

1125
        Example::
1126

1127
            pst = pyemu.Pst("pest.pst")
1128
            # uniform for the fist 10 pars
1129
            how_dict = {p:"uniform" for p in pst.adj_par_names[:10]}
1130
            pe = pyemu.ParameterEnsemble(pst,how_dict=how_dict)
1131
            pe.to_csv("my_mixed_pe.csv")
1132

1133
        """
1134

1135
        # error checking
1136
        accept = {"uniform", "triangular", "gaussian"}
8✔
1137
        assert (
8✔
1138
            default in accept
1139
        ), "ParameterEnsemble.from_mixed_draw() error: 'default' must be in {0}".format(
1140
            accept
1141
        )
1142
        par_org = pst.parameter_data.copy()
8✔
1143
        pset = set(pst.adj_par_names)
8✔
1144
        hset = set(how_dict.keys())
8✔
1145
        missing = pset.difference(hset)
8✔
1146
        if not partial and len(missing) > 0:
8✔
1147
            print(
8✔
1148
                "{0} par names missing in how_dict, these parameters will be sampled using {1} (the 'default')".format(
1149
                    len(missing), default
1150
                )
1151
            )
1152
            for m in missing:
8✔
1153
                how_dict[m] = default
8✔
1154
        missing = hset.difference(pset)
8✔
1155
        assert len(missing) == 0, (
8✔
1156
            "ParameterEnsemble.from_mixed_draws() error: the following par names are not in "
1157
            + " in the pst: {0}".format(",".join(missing))
1158
        )
1159

1160
        unknown_draw = []
8✔
1161
        for pname, how in how_dict.items():
8✔
1162
            if how not in accept:
8✔
1163
                unknown_draw.append("{0}:{1}".format(pname, how))
8✔
1164
        if len(unknown_draw) > 0:
8✔
1165
            raise Exception(
8✔
1166
                "ParameterEnsemble.from_mixed_draws() error: the following hows are not recognized:{0}".format(
1167
                    ",".join(unknown_draw)
1168
                )
1169
            )
1170

1171
        # work out 'how' grouping
1172
        how_groups = {how: [] for how in accept}
8✔
1173
        for pname, how in how_dict.items():
8✔
1174
            how_groups[how].append(pname)
8✔
1175

1176
        # gaussian
1177
        pes = []
8✔
1178
        if len(how_groups["gaussian"]) > 0:
8✔
1179
            gset = how_groups["gaussian"]
8✔
1180
            par_gaussian = par_org.loc[gset, :]
8✔
1181
            # par_gaussian.sort_values(by="parnme", inplace=True)
1182
            par_gaussian.sort_index(inplace=True)
8✔
1183
            pst.parameter_data = par_gaussian
8✔
1184

1185
            if cov is not None:
8✔
1186
                cset = set(cov.row_names)
8✔
1187
                # gset = set(how_groups["gaussian"])
1188
                diff = set(gset).difference(cset)
8✔
1189
                assert len(diff) == 0, (
8✔
1190
                    "ParameterEnsemble.from_mixed_draws() error: the 'cov' is not compatible with "
1191
                    + " the parameters listed as 'gaussian' in how_dict, the following are not in the cov:{0}".format(
1192
                        ",".join(diff)
1193
                    )
1194
                )
1195
            else:
1196

1197
                cov = pyemu.Cov.from_parameter_data(pst, sigma_range=sigma_range)
8✔
1198
            pe_gauss = ParameterEnsemble.from_gaussian_draw(
8✔
1199
                pst, cov, num_reals=num_reals
1200
            )
1201
            pes.append(pe_gauss)
8✔
1202

1203
        if len(how_groups["uniform"]) > 0:
8✔
1204
            par_uniform = par_org.loc[how_groups["uniform"], :]
8✔
1205
            # par_uniform.sort_values(by="parnme",inplace=True)
1206
            par_uniform.sort_index(inplace=True)
8✔
1207
            pst.parameter_data = par_uniform
8✔
1208
            pe_uniform = ParameterEnsemble.from_uniform_draw(pst, num_reals=num_reals)
8✔
1209
            pes.append(pe_uniform)
8✔
1210

1211
        if len(how_groups["triangular"]) > 0:
8✔
1212
            par_tri = par_org.loc[how_groups["triangular"], :]
8✔
1213
            # par_tri.sort_values(by="parnme", inplace=True)
1214
            par_tri.sort_index(inplace=True)
8✔
1215
            pst.parameter_data = par_tri
8✔
1216
            pe_tri = ParameterEnsemble.from_triangular_draw(pst, num_reals=num_reals)
8✔
1217
            pes.append(pe_tri)
8✔
1218

1219
        df = pd.DataFrame(index=np.arange(num_reals), columns=par_org.parnme.values)
8✔
1220

1221
        df.loc[:, :] = np.NaN
8✔
1222
        if fill:
8✔
1223
            fixed_tied = par_org.loc[
8✔
1224
                par_org.partrans.apply(lambda x: x in ["fixed", "tied"]), "parval1"
1225
            ].to_dict()
1226
            for p, v in fixed_tied.items():
8✔
1227
                df.loc[:, p] = v
8✔
1228

1229
        for pe in pes:
8✔
1230
            df.loc[pe.index, pe.columns] = pe._df
8✔
1231

1232
        # this dropna covers both "fill" and "partial"
1233
        df = df.dropna(axis=1)
8✔
1234

1235
        pst.parameter_data = par_org
8✔
1236
        pe = ParameterEnsemble(df=df, pst=pst)
8✔
1237
        if enforce_bounds:
8✔
1238
            pe.enforce()
8✔
1239
        return pe
8✔
1240

1241
    @classmethod
9✔
1242
    def from_parfiles(cls, pst, parfile_names, real_names=None):
9✔
1243
        """create a parameter ensemble from PEST-style parameter value files.
1244
        Accepts parfiles with less than the parameters in the control
1245
        (get NaNs in the ensemble) or extra parameters in the
1246
        parfiles (get dropped)
1247

1248
        Args:
1249
            pst (`pyemu.Pst`): control file instance
1250
            parfile_names (`[str`]): par file names
1251
            real_names (`str`): optional list of realization names.
1252
                If None, a single integer counter is used
1253

1254
        Returns:
1255
            `ParameterEnsemble`: parameter ensemble loaded from par files
1256

1257

1258
        """
1259
        if isinstance(pst, str):
8✔
1260
            pst = pyemu.Pst(pst)
×
1261
        dfs = {}
8✔
1262
        if real_names is not None:
8✔
1263
            assert len(real_names) == len(parfile_names)
×
1264
        else:
1265
            real_names = np.arange(len(parfile_names))
8✔
1266

1267
        for rname, pfile in zip(real_names, parfile_names):
8✔
1268
            assert os.path.exists(pfile), (
8✔
1269
                "ParameterEnsemble.from_parfiles() error: "
1270
                + "file: {0} not found".format(pfile)
1271
            )
1272
            df = pyemu.pst_utils.read_parfile(pfile)
8✔
1273
            # check for scale differences - I don't who is dumb enough
1274
            # to change scale between par files and pst...
1275
            diff = df.scale - pst.parameter_data.scale
8✔
1276
            if diff.abs().sum() > 0.0:
8✔
1277
                warnings.warn(
×
1278
                    "differences in scale detected, applying scale in par file",
1279
                    PyemuWarning,
1280
                )
1281
                # df.loc[:,"parval1"] *= df.scale
1282

1283
            dfs[rname] = df.parval1.values
8✔
1284

1285
        df_all = pd.DataFrame(data=dfs).T
8✔
1286
        df_all.columns = df.index
8✔
1287

1288
        if len(pst.par_names) != df_all.shape[1]:
8✔
1289
            # if len(pst.par_names) < df_all.shape[1]:
1290
            #    raise Exception("pst is not compatible with par files")
1291
            pset = set(pst.par_names)
×
1292
            dset = set(df_all.columns)
×
1293
            diff = pset.difference(dset)
×
1294
            if len(diff) > 0:
×
1295
                warnings.warn(
×
1296
                    "the following parameters are not in the par files (getting NaNs) :{0}".format(
1297
                        ",".join(diff)
1298
                    ),
1299
                    PyemuWarning,
1300
                )
1301
                blank_df = pd.DataFrame(index=df_all.index, columns=diff)
×
1302

1303
                df_all = pd.concat([df_all, blank_df], axis=1)
×
1304

1305
            diff = dset.difference(pset)
×
1306
            if len(diff) > 0:
×
1307
                warnings.warn(
×
1308
                    "the following par file parameters are not in the control (being dropped):{0}".format(
1309
                        ",".join(diff)
1310
                    ),
1311
                    PyemuWarning,
1312
                )
1313
                df_all = df_all.loc[:, pst.par_names]
×
1314

1315
        return ParameterEnsemble(pst=pst, df=df_all)
8✔
1316

1317
    def back_transform(self):
9✔
1318
        """back transform parameters with respect to `partrans` value.
1319

1320
        Note:
1321
            operates in place (None is returned).
1322

1323
            Parameter transform is only related to log_{10} and does not
1324
            include the effects of `scale` and/or `offset`
1325

1326
        """
1327
        if not self.istransformed:
8✔
1328
            return
×
1329
        li = self.pst.parameter_data.loc[:, "partrans"] == "log"
8✔
1330
        self.loc[:, li] = 10.0 ** (self._df.loc[:, li])
8✔
1331
        # self.loc[:,:] = (self.loc[:,:] -\
1332
        #                 self.pst.parameter_data.offset)/\
1333
        #                 self.pst.parameter_data.scale
1334
        self._istransformed = False
8✔
1335

1336
    def transform(self):
9✔
1337
        """transform parameters with respect to `partrans` value.
1338

1339
        Note:
1340
            operates in place (None is returned).
1341

1342
            Parameter transform is only related to log_{10} and does not
1343
            include the effects of `scale` and/or `offset`
1344

1345
        """
1346
        if self.istransformed:
8✔
1347
            return
8✔
1348
        li = self.pst.parameter_data.loc[:, "partrans"] == "log"
8✔
1349
        df = self._df
8✔
1350
        # self.loc[:,:] = (self.loc[:,:] * self.pst.parameter_data.scale) +\
1351
        #                 self.pst.parameter_data.offset
1352
        df.loc[:, li] = df.loc[:, li].apply(np.log10)
8✔
1353
        self._istransformed = True
8✔
1354

1355
    def add_base(self):
9✔
1356
        """add the control file `obsval` values as a realization
1357

1358
        Note:
1359
            replaces the last realization with the current `ParameterEnsemble.pst.parameter_data.parval1` values
1360
            as a new realization named "base"
1361

1362
            The PEST++ ensemble tools will add this realization also if you dont wanna fool with it here...
1363

1364
        """
1365
        if "base" in self.index:
8✔
1366
            raise Exception("'base' realization already in ensemble")
8✔
1367
        self._df = self._df.iloc[:-1, :]
8✔
1368
        if self.istransformed:
8✔
1369
            self.pst.add_transform_columns()
8✔
1370
            self.loc["base", :] = self.pst.parameter_data.loc[
8✔
1371
                self.columns, "parval1_trans"
1372
            ]
1373
        else:
1374
            self.loc["base", :] = self.pst.parameter_data.loc[self.columns, "parval1"]
8✔
1375

1376
    @property
9✔
1377
    def adj_names(self):
9✔
1378
        """the names of adjustable parameters in `ParameterEnsemble`
1379

1380
        Returns:
1381
            [`str`]: adjustable parameter names
1382

1383
        """
1384
        return self.pst.parameter_data.parnme.loc[~self.fixed_indexer].to_list()
×
1385

1386
    @property
9✔
1387
    def ubnd(self):
9✔
1388
        """the upper bound vector while respecting current log transform status
1389

1390
        Returns:
1391
            `pandas.Series`: (log-transformed) upper parameter bounds listed in
1392
            `ParameterEnsemble.pst.parameter_data.parubnd`
1393

1394
        """
1395
        if not self.istransformed:
9✔
1396
            return self.pst.parameter_data.parubnd.copy()
9✔
1397
        else:
1398
            ub = self.pst.parameter_data.parubnd.copy()
8✔
1399
            ub[self.log_indexer] = np.log10(ub[self.log_indexer])
8✔
1400
            return ub
8✔
1401

1402
    @property
9✔
1403
    def lbnd(self):
9✔
1404
        """the lower bound vector while respecting current log transform status
1405

1406
        Returns:
1407
            `pandas.Series`: (log-transformed) lower parameter bounds listed in
1408
            `ParameterEnsemble.pst.parameter_data.parlbnd`
1409

1410
        """
1411
        if not self.istransformed:
9✔
1412
            return self.pst.parameter_data.parlbnd.copy()
9✔
1413
        else:
1414
            lb = self.pst.parameter_data.parlbnd.copy()
8✔
1415
            lb[self.log_indexer] = np.log10(lb[self.log_indexer])
8✔
1416
            return lb
8✔
1417

1418
    @property
9✔
1419
    def log_indexer(self):
9✔
1420
        """boolean indexer for log transform
1421

1422
        Returns:
1423
            `numpy.ndarray(bool)`: boolean array indicating which parameters are log
1424
            transformed
1425

1426
        """
1427
        istransformed = self.pst.parameter_data.partrans == "log"
8✔
1428
        return istransformed.values
8✔
1429

1430
    @property
9✔
1431
    def fixed_indexer(self):
9✔
1432
        """boolean indexer for non-adjustable parameters
1433

1434
        Returns:
1435
            `numpy.ndarray(bool)`: boolean array indicating which parameters have
1436
            `partrans` equal to "log" or "fixed"
1437

1438
        """
1439
        # isfixed = self.pst.parameter_data.partrans == "fixed"
1440
        tf = set(["tied", "fixed"])
×
1441
        isfixed = self.pst.parameter_data.partrans.apply(lambda x: x in tf)
×
1442
        return isfixed.values
×
1443

1444
    def project(
9✔
1445
        self, projection_matrix, center_on=None, log=None, enforce_bounds="reset"
1446
    ):
1447
        """project the ensemble using the null-space Monte Carlo method
1448

1449
        Args:
1450
            projection_matrix (`pyemu.Matrix`): null-space projection operator.
1451
            center_on (`str`): the name of the realization to use as the centering
1452
                point for the null-space differening operation.  If `center_on` is `None`,
1453
                the `ParameterEnsemble` mean vector is used.  Default is `None`
1454
            log (`pyemu.Logger`, optional): for logging progress
1455
            enforce_bounds (`str`): parameter bound enforcement option to pass to
1456
                `ParameterEnsemble.enforce()`.  Valid options are `reset`, `drop`,
1457
                `scale` or `None`.  Default is `reset`.
1458

1459
        Returns:
1460
            `ParameterEnsemble`: untransformed, null-space projected ensemble.
1461

1462
        Example::
1463

1464
            ev = pyemu.ErrVar(jco="my.jco") #assumes my.pst exists
1465
            pe = pyemu.ParameterEnsemble.from_gaussian_draw(ev.pst)
1466
            pe_proj = pe.project(ev.get_null_proj(maxsing=25))
1467
            pe_proj.to_csv("proj_par.csv")
1468

1469
        """
1470

1471
        retrans = False
8✔
1472
        if not self.istransformed:
8✔
1473
            self.transform()
8✔
1474
            retrans = True
8✔
1475

1476
        # base = self._df.mean()
1477
        self.pst.add_transform_columns()
8✔
1478
        base = self.pst.parameter_data.parval1_trans
8✔
1479
        if center_on is not None:
8✔
1480
            if isinstance(center_on, pd.Series):
×
1481
                base = center_on
×
1482
            elif center_on in self._df.index:
×
1483
                base = self._df.loc[center_on, :].copy()
×
1484
            elif isinstance(center_on, "str"):
×
1485
                try:
×
1486
                    base = pyemu.pst_utils.read_parfile(center_on)
×
1487
                except:
×
1488
                    raise Exception(
×
1489
                        "'center_on' arg not found in index and couldnt be loaded as a '.par' file"
1490
                    )
1491
            else:
1492
                raise Exception(
×
1493
                    "error processing 'center_on' arg.  should be realization names, par file, or series"
1494
                )
1495
        names = list(base.index)
8✔
1496
        projection_matrix = projection_matrix.get(names, names)
8✔
1497

1498
        new_en = self.copy()
8✔
1499

1500
        for real in new_en.index:
8✔
1501
            if log is not None:
8✔
1502
                log("projecting realization {0}".format(real))
×
1503

1504
            # null space projection of difference vector
1505
            pdiff = self._df.loc[real, names] - base
8✔
1506
            pdiff = np.dot(projection_matrix.x, pdiff.values)
8✔
1507
            new_en.loc[real, names] = base + pdiff
8✔
1508

1509
            if log is not None:
8✔
1510
                log("projecting realization {0}".format(real))
×
1511

1512
        new_en.enforce(enforce_bounds)
8✔
1513

1514
        new_en.back_transform()
8✔
1515
        if retrans:
8✔
1516
            self.transform()
8✔
1517
        return new_en
8✔
1518

1519
    def enforce(self, how="reset", bound_tol=0.0):
9✔
1520
        """entry point for bounds enforcement.
1521

1522
        Args:
1523
            enforce_bounds (`str`): can be 'reset' to reset offending values or 'drop' to drop
1524
                offending realizations.  Default is "reset"
1525

1526
        Note:
1527
            In very high dimensions, the "drop" and "scale" `how` types will result in either very few realizations
1528
            or very short realizations.
1529

1530
        Example::
1531

1532
            pst = pyemu.Pst("my.pst")
1533
            pe = pyemu.ParameterEnsemble.from_gaussian_draw()
1534
            pe.enforce(how="scale")
1535
            pe.to_csv("par.csv")
1536

1537

1538
        """
1539

1540
        if how.lower().strip() == "reset":
9✔
1541
            self._enforce_reset(bound_tol=bound_tol)
9✔
1542
        elif how.lower().strip() == "drop":
8✔
1543
            self._enforce_drop(bound_tol=bound_tol)
8✔
1544
        elif how.lower().strip() == "scale":
8✔
1545
            self._enforce_scale(bound_tol=bound_tol)
8✔
1546
        else:
1547
            raise Exception(
×
1548
                "unrecognized enforce_bounds arg:"
1549
                + "{0}, should be 'reset' or 'drop'".format(how)
1550
            )
1551

1552
    def _enforce_scale(self, bound_tol):
9✔
1553
        retrans = False
8✔
1554
        if self.istransformed:
8✔
1555
            self.back_transform()
×
1556
            retrans = True
×
1557
        ub = self.ubnd * (1.0 - bound_tol)
8✔
1558
        lb = self.lbnd * (1.0 + bound_tol)
8✔
1559
        base_vals = self.pst.parameter_data.loc[self._df.columns, "parval1"].copy()
8✔
1560
        ub_dist = (ub - base_vals).apply(np.abs)
8✔
1561
        lb_dist = (base_vals - lb).apply(np.abs)
8✔
1562

1563
        if ub_dist.min() <= 0.0:
8✔
1564
            raise Exception(
8✔
1565
                "Ensemble._enforce_scale() error: the following parameter"
1566
                + "are at or over ubnd: {0}".format(
1567
                    ub_dist.loc[ub_dist <= 0.0].index.values
1568
                )
1569
            )
1570
        if lb_dist.min() <= 0.0:
8✔
1571
            raise Exception(
8✔
1572
                "Ensemble._enforce_scale() error: the following parameter"
1573
                + "are at or under lbnd: {0}".format(
1574
                    lb_dist.loc[ub_dist <= 0.0].index.values
1575
                )
1576
            )
1577
        for ridx in self._df.index:
8✔
1578
            real = self._df.loc[ridx, :]
8✔
1579
            real_dist = (real - base_vals).apply(np.abs)
8✔
1580
            out_ubnd = real - ub
8✔
1581
            out_ubnd = out_ubnd.loc[out_ubnd > 0.0]
8✔
1582
            ubnd_facs = ub_dist.loc[out_ubnd.index] / real_dist.loc[out_ubnd.index]
8✔
1583

1584
            out_lbnd = lb - real
8✔
1585
            out_lbnd = out_lbnd.loc[out_lbnd > 0.0]
8✔
1586
            lbnd_facs = lb_dist.loc[out_lbnd.index] / real_dist.loc[out_lbnd.index]
8✔
1587

1588
            if ubnd_facs.shape[0] == 0 and lbnd_facs.shape[0] == 0:
8✔
1589
                continue
×
1590
            lmin = 1.0
8✔
1591
            umin = 1.0
8✔
1592
            sign = np.ones((self.pst.npar_adj))
8✔
1593
            sign[
8✔
1594
                real.loc[self.pst.adj_par_names] < base_vals.loc[self.pst.adj_par_names]
1595
            ] = -1.0
1596
            if ubnd_facs.shape[0] > 0:
8✔
1597
                umin = ubnd_facs.min()
8✔
1598
                umin_idx = ubnd_facs.idxmin()
8✔
1599
                print(
8✔
1600
                    "enforce_scale ubnd controlling parameter, scale factor, "
1601
                    + "current value for realization {0}: {1}, {2}, {3}".format(
1602
                        ridx, umin_idx, umin, real.loc[umin_idx]
1603
                    )
1604
                )
1605

1606
            if lbnd_facs.shape[0] > 0:
8✔
1607
                lmin = lbnd_facs.min()
8✔
1608
                # print(lbnd_facs)
1609
                lmin_idx = lbnd_facs.idxmin()
8✔
1610
                print(
8✔
1611
                    "enforce_scale lbnd controlling parameter, scale factor, "
1612
                    + "current value for realization {0}: {1}, {2}. {3}".format(
1613
                        ridx, lmin_idx, lmin, real.loc[lmin_idx]
1614
                    )
1615
                )
1616
            min_fac = min(umin, lmin)
8✔
1617

1618
            self._df.loc[ridx, :] = base_vals + (sign * real_dist * min_fac)
8✔
1619

1620
        if retrans:
8✔
1621
            self.transform()
×
1622

1623
    def _enforce_drop(self, bound_tol):
9✔
1624
        """enforce parameter bounds on the ensemble by dropping
1625
        violating realizations
1626

1627
        Note:
1628
            with a large (realistic) number of parameters, the
1629
            probability that any one parameter is out of
1630
            bounds is large, meaning most realization will
1631
            be dropped.
1632

1633
        """
1634
        ub = self.ubnd * (1.0 - bound_tol)
8✔
1635
        lb = self.lbnd * (1.0 + bound_tol)
8✔
1636
        drop = []
8✔
1637
        for ridx in self._df.index:
8✔
1638
            # mx = (ub - self.loc[id,:]).min()
1639
            # mn = (lb - self.loc[id,:]).max()
1640
            if (ub - self._df.loc[ridx, :]).min() < 0.0 or (
8✔
1641
                lb - self._df.loc[ridx, :]
1642
            ).max() > 0.0:
1643
                drop.append(ridx)
8✔
1644
        self.loc[drop, :] = np.NaN
8✔
1645
        self.dropna(inplace=True)
8✔
1646

1647
    def _enforce_reset(self, bound_tol):
9✔
1648
        """enforce parameter bounds on the ensemble by resetting
1649
        violating vals to bound
1650
        """
1651

1652
        ub = (self.ubnd * (1.0 - bound_tol)).to_dict()
9✔
1653
        lb = (self.lbnd * (1.0 + bound_tol)).to_dict()
9✔
1654

1655
        val_arr = self._df.values
9✔
1656
        for iname, name in enumerate(self.columns):
9✔
1657
            val_arr[val_arr[:, iname] > ub[name], iname] = ub[name]
9✔
1658
            val_arr[val_arr[:, iname] < lb[name], iname] = lb[name]
9✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc