• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pypest / pyemu / 5887625428

17 Aug 2023 06:23AM UTC coverage: 79.857% (+1.5%) from 78.319%
5887625428

push

github

briochh
Merge branch 'develop'

11386 of 14258 relevant lines covered (79.86%)

6.77 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.97
/pyemu/utils/geostats.py
1
"""Geostatistics in the PEST(++) realm
9✔
2
"""
3
from __future__ import print_function
9✔
4
import os
9✔
5
import copy
9✔
6
from datetime import datetime
9✔
7
import multiprocessing as mp
9✔
8
import warnings
9✔
9
import numpy as np
9✔
10
import pandas as pd
9✔
11
from pyemu.mat.mat_handler import Cov
9✔
12
from pyemu.utils.pp_utils import pp_file_to_dataframe
9✔
13
from ..pyemu_warnings import PyemuWarning
9✔
14

15
EPSILON = 1.0e-7
9✔
16

17
# class KrigeFactors(pd.DataFrame):
18
#     def __init__(self,*args,**kwargs):
19
#         super(KrigeFactors,self).__init__(*args,**kwargs)
20
#
21
#     def to_factors(self,filename,nrow,ncol,
22
#                    points_file="points.junk",
23
#                    zone_file="zone.junk"):
24
#         with open(filename,'w') as f:
25
#             f.write(points_file+'\n')
26
#             f.write(zone_file+'\n')
27
#             f.write("{0} {1}\n".format(ncol,nrow))
28
#             f.write("{0}\n".format(self.shape[0]))
29
#
30
#
31
#
32
#     def from_factors(self,filename):
33
#         raise NotImplementedError()
34

35

36
class GeoStruct(object):
9✔
37
    """a geostatistical structure object that mimics the behavior of a PEST
9✔
38
    geostatistical structure.  The object contains variogram instances and
39
    (optionally) nugget information.
40

41
    Args:
42
        nugget (`float` (optional)): nugget contribution. Default is 0.0
43
        variograms : ([`pyemu.Vario2d`] (optional)): variogram(s) associated
44
            with this GeoStruct instance. Default is empty list
45
        name (`str` (optional)): name to assign the structure.  Default
46
            is "struct1".
47
        transform  (`str` (optional)): the transformation to apply to
48
            the GeoStruct.  Can be "none" or "log", depending on the
49
            transformation of the property being represented by the `GeoStruct`.
50
            Default is "none"
51

52
    Example::
53

54
        v = pyemu.utils.geostats.ExpVario(a=1000,contribution=1.0)
55
        gs = pyemu.utils.geostats.GeoStruct(variograms=v,nugget=0.5)
56
        gs.plot()
57
        # get a covariance matrix implied by the geostruct for three points
58
        px = [0,1000,2000]
59
        py = [0,0,0]
60
        pnames ["p1","p2","p3"]
61
        cov = gs.covariance_matrix(px,py,names=pnames)
62

63
    """
64

65
    def __init__(self, nugget=0.0, variograms=[], name="struct1", transform="none"):
9✔
66
        self.name = name
9✔
67
        self.nugget = float(nugget)
9✔
68
        """`float`: the nugget effect contribution"""
4✔
69
        if not isinstance(variograms, list):
9✔
70
            variograms = [variograms]
9✔
71
        for vario in variograms:
9✔
72
            assert isinstance(vario, Vario2d)
9✔
73
        self.variograms = variograms
9✔
74
        """[`pyemu.utils.geostats.Vario2d`]: a list of variogram instances"""
4✔
75
        transform = transform.lower()
9✔
76
        assert transform in ["none", "log"]
9✔
77
        self.transform = transform
9✔
78
        """`str`: the transformation of the `GeoStruct`.  Can be 'log' or 'none'"""
9✔
79

80
    def __lt__(self, other):
9✔
81
        return self.name < other.name
9✔
82

83
    def __gt__(self, other):
9✔
84
        return self.name > other.name
×
85

86
    def same_as_other(self, other):
9✔
87
        """compared to geostructs for similar attributes
88

89
        Args:
90
            other (`pyemu.geostats.Geostruct`): the other one
91

92
        Returns:
93
            same (`bool`): True is the `other` and `self` have the same characteristics
94

95

96
        """
97
        if self.nugget != other.nugget:
×
98
            return False
×
99
        if len(self.variograms) != len(other.variograms):
×
100
            return False
×
101
        for sv, ov in zip(self.variograms, other.variograms):
×
102
            if not sv.same_as_other(ov):
×
103
                return False
×
104
        return True
×
105

106
    def to_struct_file(self, f):
9✔
107
        """write a PEST-style structure file
108

109
        Args:
110
            f (`str`): file to write the GeoStruct information in to.  Can
111
                also be an open file handle
112

113
        """
114
        if isinstance(f, str):
8✔
115
            f = open(f, "w")
×
116
        f.write("STRUCTURE {0}\n".format(self.name))
8✔
117
        f.write("  NUGGET {0}\n".format(self.nugget))
8✔
118
        f.write("  NUMVARIOGRAM {0}\n".format(len(self.variograms)))
8✔
119
        for v in self.variograms:
8✔
120
            f.write("  VARIOGRAM {0} {1}\n".format(v.name, v.contribution))
8✔
121
        f.write("  TRANSFORM {0}\n".format(self.transform))
8✔
122
        f.write("END STRUCTURE\n\n")
8✔
123
        for v in self.variograms:
8✔
124
            v.to_struct_file(f)
8✔
125

126
    def covariance_matrix(self, x, y, names=None, cov=None):
9✔
127
        """build a `pyemu.Cov` instance from `GeoStruct`
128

129
        Args:
130
            x ([`floats`]): x-coordinate locations
131
            y ([`float`]): y-coordinate locations
132
            names ([`str`] (optional)): names of location. If None,
133
                cov must not be None.  Default is None.
134
            cov (`pyemu.Cov`): an existing Cov instance.  The contribution
135
                of this GeoStruct is added to cov.  If cov is None,
136
                names must not be None. Default is None
137

138
        Returns:
139
            `pyemu.Cov`: the covariance matrix implied by this
140
            GeoStruct for the x,y pairs. `cov` has row and column
141
            names supplied by the names argument unless the "cov"
142
            argument was passed.
143

144
        Note:
145
            either "names" or "cov" must be passed.  If "cov" is passed, cov.shape
146
            must equal len(x) and len(y).
147

148
        Example::
149

150
            pp_df = pyemu.pp_utils.pp_file_to_dataframe("hkpp.dat")
151
            cov = gs.covariance_matrix(pp_df.x,pp_df.y,pp_df.name)
152
            cov.to_binary("cov.jcb")
153

154

155
        """
156

157
        if not isinstance(x, np.ndarray):
9✔
158
            x = np.array(x)
9✔
159
        if not isinstance(y, np.ndarray):
9✔
160
            y = np.array(y)
9✔
161
        assert x.shape[0] == y.shape[0]
9✔
162

163
        if names is not None:
9✔
164
            assert x.shape[0] == len(names)
9✔
165
            c = np.zeros((len(names), len(names)))
9✔
166
            np.fill_diagonal(c, self.nugget)
9✔
167
            cov = Cov(x=c, names=names)
9✔
168
        elif cov is not None:
×
169
            assert cov.shape[0] == x.shape[0]
×
170
            names = cov.row_names
×
171
            c = np.zeros((len(names), 1))
×
172
            c += self.nugget
×
173
            cont = Cov(x=c, names=names, isdiagonal=True)
×
174
            cov += cont
×
175

176
        else:
177
            raise Exception(
×
178
                "GeoStruct.covariance_matrix() requires either " + "names or cov arg"
179
            )
180
        for v in self.variograms:
9✔
181
            v.covariance_matrix(x, y, cov=cov)
9✔
182
        return cov
9✔
183

184
    def covariance(self, pt0, pt1):
9✔
185
        """get the covariance between two points implied by the `GeoStruct`.
186
        This is used during the ordinary kriging process to get the RHS
187

188
        Args:
189
            pt0 ([`float`]): xy-pair
190
            pt1 ([`float`]): xy-pair
191

192
        Returns:
193
            `float`: the covariance between pt0 and pt1 implied
194
            by the GeoStruct
195

196
        Example::
197

198
            p1 = [0,0]
199
            p2 = [1,1]
200
            v = pyemu.geostats.ExpVario(a=0.1,contribution=1.0)
201
            gs = pyemu.geostats.Geostruct(variograms=v)
202
            c = gs.covariance(p1,p2)
203

204
        """
205
        # raise Exception()
206
        cov = self.nugget
8✔
207
        for vario in self.variograms:
8✔
208
            cov += vario.covariance(pt0, pt1)
8✔
209
        return cov
8✔
210

211
    def covariance_points(self, x0, y0, xother, yother):
9✔
212
        """Get the covariance between point (x0,y0) and the points
213
        contained in xother, yother.
214

215
        Args:
216
            x0 (`float`): x-coordinate
217
            y0 (`float`): y-coordinate
218
            xother ([`float`]): x-coordinates of other points
219
            yother ([`float`]): y-coordinates of other points
220

221
        Returns:
222
            `numpy.ndarray`: a 1-D array of covariance between point x0,y0 and the
223
            points contained in xother, yother.  len(cov) = len(xother) =
224
            len(yother)
225

226
        Example::
227

228
            x0,y0 = 1,1
229
            xother = [2,3,4,5]
230
            yother = [2,3,4,5]
231
            v = pyemu.geostats.ExpVario(a=0.1,contribution=1.0)
232
            gs = pyemu.geostats.Geostruct(variograms=v)
233
            c = gs.covariance_points(x0,y0,xother,yother)
234

235

236
        """
237

238
        cov = np.zeros((len(xother))) + self.nugget
9✔
239
        for v in self.variograms:
9✔
240
            cov += v.covariance_points(x0, y0, xother, yother)
9✔
241
        return cov
9✔
242

243
    @property
9✔
244
    def sill(self):
9✔
245
        """get the sill of the `GeoStruct`
246

247
        Returns:
248
            `float`: the sill of the (nested) `GeoStruct`, including
249
            nugget and contribution from each variogram
250

251
        """
252
        sill = self.nugget
9✔
253
        for v in self.variograms:
9✔
254
            sill += v.contribution
9✔
255
        return sill
9✔
256

257
    def plot(self, **kwargs):
9✔
258
        """make a cheap plot of the `GeoStruct`
259

260
        Args:
261
            **kwargs : (dict)
262
                keyword arguments to use for plotting.
263
        Returns:
264
            `matplotlib.pyplot.axis`: the axis with the GeoStruct plot
265

266
        Note:
267
            optional arguments include "ax" (an existing axis),
268
            "individuals" (plot each variogram on a separate axis),
269
            "legend" (add a legend to the plot(s)).  All other kwargs
270
            are passed to matplotlib.pyplot.plot()
271

272
        Example::
273

274
            v = pyemu.geostats.ExpVario(a=0.1,contribution=1.0)
275
            gs = pyemu.geostats.Geostruct(variograms=v)
276
            gs.plot()
277

278
        """
279
        #
280
        if "ax" in kwargs:
1✔
281
            ax = kwargs.pop("ax")
×
282
        else:
283
            try:
1✔
284
                import matplotlib.pyplot as plt
1✔
285
            except Exception as e:
×
286
                raise Exception("error importing matplotlib: {0}".format(str(e)))
×
287

288
            ax = plt.subplot(111)
1✔
289
        legend = kwargs.pop("legend", False)
1✔
290
        individuals = kwargs.pop("individuals", False)
1✔
291
        xmx = max([v.a * 3.0 for v in self.variograms])
1✔
292
        x = np.linspace(0, xmx, 100)
1✔
293
        y = np.zeros_like(x)
1✔
294
        for v in self.variograms:
1✔
295
            yv = v.inv_h(x)
1✔
296
            if individuals:
1✔
297
                ax.plot(x, yv, label=v.name, **kwargs)
×
298
            y += yv
1✔
299
        y += self.nugget
1✔
300
        ax.plot(x, y, label=self.name, **kwargs)
1✔
301
        if legend:
1✔
302
            ax.legend()
×
303
        ax.set_xlabel("distance")
1✔
304
        ax.set_ylabel(r"$\gamma$")
1✔
305
        return ax
1✔
306

307
    def __str__(self):
9✔
308
        """the `str` representation of the `GeoStruct`
309

310
        Returns:
311
            `str`: the string representation of the GeoStruct
312
        """
313
        s = ""
9✔
314
        s += "name:{0},nugget:{1},structures:\n".format(self.name, self.nugget)
9✔
315
        for v in self.variograms:
9✔
316
            s += str(v)
9✔
317
        return s
9✔
318

319

320
class SpecSim2d(object):
9✔
321
    """2-D unconditional spectral simulation for regular grids
9✔
322

323
    Args:
324
        delx (`numpy.ndarray`): a 1-D array of x-dimension cell centers
325
            (or leading/trailing edges).  Only the distance between points
326
            is important
327
        dely (`numpy.ndarray`): a 1-D array of y-dimension cell centers
328
            (or leading/trailing edges).  Only the distance between points
329
            is important
330
        geostruct (`pyemu.geostats.Geostruct`): geostatistical structure instance
331

332
    Example::
333

334
        v = pyemu.utils.geostats.ExpVario(a=100,contribution=1.0)
335
        gs = pyemu.utils.geostats.GeoStruct(variograms=v,nugget=0.5)
336
        delx,dely = np.ones(150), np.ones(50)
337
        ss = pyemu.utils.geostats.SpecSim2d(delx,dely,gs)
338
        arr = np.squeeze(ss.draw_arrays(num_reals=1))*.05 + .08
339
        plt.imshow(arr)
340
        plt.colorbar(shrink=.40)
341

342
    """
343

344
    def __init__(self, delx, dely, geostruct):
9✔
345

346
        self.geostruct = geostruct
9✔
347
        self.delx = delx
9✔
348
        self.dely = dely
9✔
349
        self.num_pts = np.NaN
9✔
350
        self.sqrt_fftc = np.NaN
9✔
351
        self.effective_variograms = None
9✔
352
        self.initialize()
9✔
353

354
    @staticmethod
9✔
355
    def grid_is_regular(delx, dely, tol=1.0e-4):
9✔
356
        """check that a grid is regular using delx and dely vectors
357

358
        Args:
359
            delx : `numpy.ndarray`
360
                a 1-D array of x-dimension cell centers (or leading/trailing edges).  Only the
361
                distance between points is important
362
            dely : `numpy.ndarray`
363
                a 1-D array of y-dimension cell centers (or leading/trailing edges).  Only the
364
                distance between points is important
365
            tol : `float` (optional)
366
                tolerance to determine grid regularity.  Default is 1.0e-4
367

368
        Returns:
369
            `bool`: flag indicating if the grid defined by `delx` and `dely` is regular
370

371
        """
372
        if np.abs((delx.mean() - delx.min()) / delx.mean()) > tol:
9✔
373
            return False
8✔
374
        if (np.abs(dely.mean() - dely.min()) / dely.mean()) > tol:
9✔
375
            return False
8✔
376
        if (np.abs(delx.mean() - dely.mean()) / delx.mean()) > tol:
9✔
377
            return False
×
378
        return True
9✔
379

380
    def initialize(self):
9✔
381
        """prepare for spectral simulation.
382

383
        Note:
384
            `initialize()` prepares for simulation by undertaking
385
            the fast FFT on the wave number matrix and should be called
386
            if the `SpecSim2d.geostruct` is changed.
387
            This method is called by the constructor.
388

389

390
        """
391
        if not SpecSim2d.grid_is_regular(self.delx, self.dely):
9✔
392
            raise Exception("SpectSim2d() error: grid not regular")
8✔
393

394
        for v in self.geostruct.variograms:
9✔
395
            if v.bearing % 90.0 != 0.0:
9✔
396
                raise Exception("SpecSim2d only supports grid-aligned anisotropy...")
×
397

398
        # since we checked for grid regularity, we now can work in unit space
399
        # and use effective variograms:
400
        self.effective_variograms = []
9✔
401
        dist = self.delx[0]
9✔
402
        for v in self.geostruct.variograms:
9✔
403
            eff_v = type(v)(
9✔
404
                contribution=v.contribution,
405
                a=v.a / dist,
406
                bearing=v.bearing,
407
                anisotropy=v.anisotropy,
408
            )
409
            self.effective_variograms.append(eff_v)
9✔
410
        # pad the grid with 3X max range
411
        mx_a = -1.0e10
9✔
412
        for v in self.effective_variograms:
9✔
413
            mx_a = max(mx_a, v.a)
9✔
414
        mx_dim = max(self.delx.shape[0], self.dely.shape[0])
9✔
415
        freq_pad = int(np.ceil(mx_a * 3))
9✔
416
        freq_pad = int(np.ceil(freq_pad / 8.0) * 8.0)
9✔
417
        # use the max dimension so that simulation grid is square
418
        full_delx = np.ones((mx_dim + (2 * freq_pad)))
9✔
419
        full_dely = np.ones_like(full_delx)
9✔
420
        print(
9✔
421
            "SpecSim.initialize() summary: full_delx X full_dely: {0} X {1}".format(
422
                full_delx.shape[0], full_dely.shape[0]
423
            )
424
        )
425

426
        xdist = np.cumsum(full_delx)
9✔
427
        ydist = np.cumsum(full_dely)
9✔
428
        xdist -= xdist.min()
9✔
429
        ydist -= ydist.min()
9✔
430
        xgrid = np.zeros((ydist.shape[0], xdist.shape[0]))
9✔
431
        ygrid = np.zeros_like(xgrid)
9✔
432
        for j, d in enumerate(xdist):
9✔
433
            xgrid[:, j] = d
9✔
434
        for i, d in enumerate(ydist):
9✔
435
            ygrid[i, :] = d
9✔
436
        grid = np.array((xgrid, ygrid))
9✔
437
        domainsize = np.array((full_dely.shape[0], full_delx.shape[0]))
9✔
438
        for i in range(2):
9✔
439
            domainsize = domainsize[:, np.newaxis]
9✔
440
        grid = np.min((grid, np.array(domainsize) - grid), axis=0)
9✔
441
        # work out the contribution from each effective variogram and nugget
442
        c = np.zeros_like(xgrid)
9✔
443
        for v in self.effective_variograms:
9✔
444
            c += v._specsim_grid_contrib(grid)
9✔
445
        if self.geostruct.nugget > 0.0:
9✔
446
            h = ((grid ** 2).sum(axis=0)) ** 0.5
×
447
            c[np.where(h == 0)] += self.geostruct.nugget
×
448
        # fft components
449
        fftc = np.abs(np.fft.fftn(c))
9✔
450
        self.num_pts = np.prod(xgrid.shape)
9✔
451
        self.sqrt_fftc = np.sqrt(fftc / self.num_pts)
9✔
452

453
    def draw_arrays(self, num_reals=1, mean_value=1.0):
9✔
454
        """draw realizations
455

456
        Args:
457
            num_reals (`int`): number of realizations to generate
458
            mean_value (`float`): the mean value of the realizations
459

460
        Returns:
461
            `numpy.ndarray`: a 3-D array of realizations.  Shape
462
            is (num_reals,self.dely.shape[0],self.delx.shape[0])
463
        Note:
464
            log transformation is respected and the returned `reals` array is
465
            in linear space
466

467
        """
468
        reals = []
9✔
469

470
        for ireal in range(num_reals):
9✔
471
            real = np.random.standard_normal(size=self.sqrt_fftc.shape)
9✔
472
            imag = np.random.standard_normal(size=self.sqrt_fftc.shape)
9✔
473
            epsilon = real + 1j * imag
9✔
474
            rand = epsilon * self.sqrt_fftc
9✔
475
            real = np.real(np.fft.ifftn(rand)) * self.num_pts
9✔
476
            real = real[: self.dely.shape[0], : self.delx.shape[0]]
9✔
477
            reals.append(real)
9✔
478
        reals = np.array(reals)
9✔
479
        if self.geostruct.transform == "log":
9✔
480
            reals += np.log10(mean_value)
9✔
481
            reals = 10 ** reals
9✔
482

483
        else:
484
            reals += mean_value
8✔
485
        return reals
9✔
486

487
    def grid_par_ensemble_helper(
9✔
488
        self, pst, gr_df, num_reals, sigma_range=6, logger=None
489
    ):
490
        """wrapper around `SpecSim2d.draw()` designed to support `PstFromFlopy`
491
        and `PstFrom` grid-based parameters
492

493
        Args:
494
            pst (`pyemu.Pst`): a control file instance
495
            gr_df (`pandas.DataFrame`): a dataframe listing `parval1`,
496
                `pargp`, `i`, `j` for each grid based parameter
497
            num_reals (`int`): number of realizations to generate
498
            sigma_range (`float` (optional)): number of standard deviations
499
                implied by parameter bounds in control file. Default is 6
500
            logger (`pyemu.Logger` (optional)): a logger instance for logging
501

502
        Returns:
503
            `pyemu.ParameterEnsemble`: an untransformed parameter ensemble of
504
            realized grid-parameter values
505

506
        Note:
507
            the method processes each unique `pargp` value in `gr_df` and resets the sill of `self.geostruct` by
508
            the maximum bounds-implied variance of each `pargp`.  This method makes repeated calls to
509
            `self.initialize()` to deal with the geostruct changes.
510

511
        """
512

513
        if "i" not in gr_df.columns:
9✔
514
            print(gr_df.columns)
×
515
            raise Exception(
×
516
                "SpecSim2d.grid_par_ensmeble_helper() error: 'i' not in gr_df"
517
            )
518
        if "j" not in gr_df.columns:
9✔
519
            print(gr_df.columns)
×
520
            raise Exception(
×
521
                "SpecSim2d.grid_par_ensmeble_helper() error: 'j' not in gr_df"
522
            )
523
        if len(self.geostruct.variograms) > 1:
9✔
524
            raise Exception(
×
525
                "SpecSim2D grid_par_ensemble_helper() error: only a single variogram can be used..."
526
            )
527
        gr_df.loc[:, ["i", "j"]] = gr_df[["i", "j"]].astype(int)
9✔
528

529
        # scale the total contrib
530
        org_var = self.geostruct.variograms[0].contribution
9✔
531
        org_nug = self.geostruct.nugget
9✔
532
        new_var = org_var
9✔
533
        new_nug = org_nug
9✔
534
        if self.geostruct.sill != 1.0:
9✔
535
            print(
×
536
                "SpecSim2d.grid_par_ensemble_helper() warning: scaling contribution and nugget to unity"
537
            )
538
            tot = org_var + org_nug
×
539
            new_var = org_var / tot
×
540
            new_nug = org_nug / tot
×
541
            self.geostruct.variograms[0].contribution = new_var
×
542
            self.geostruct.nugget = new_nug
×
543

544
        gr_grps = gr_df.pargp.unique()
9✔
545
        pst.add_transform_columns()
9✔
546
        par = pst.parameter_data
9✔
547

548
        # real and name containers
549
        real_arrs, names = [], []
9✔
550
        for gr_grp in gr_grps:
9✔
551

552
            gp_df = gr_df.loc[gr_df.pargp == gr_grp, :]
9✔
553

554
            gp_par = par.loc[gp_df.parnme, :]
9✔
555
            # use the parval1 as the mean
556
            mean_arr = np.zeros((self.dely.shape[0], self.delx.shape[0])) + np.NaN
9✔
557
            mean_arr[gp_df.i, gp_df.j] = gp_par.parval1
9✔
558
            # fill missing mean values
559
            mean_arr[np.isnan(mean_arr)] = gp_par.parval1.mean()
9✔
560

561
            # use the max upper and min lower (transformed) bounds for the variance
562
            mx_ubnd = gp_par.parubnd_trans.max()
9✔
563
            mn_lbnd = gp_par.parlbnd_trans.min()
9✔
564
            var = ((mx_ubnd - mn_lbnd) / sigma_range) ** 2
9✔
565

566
            # update the geostruct
567
            self.geostruct.variograms[0].contribution = var * new_var
9✔
568
            self.geostruct.nugget = var * new_nug
9✔
569
            # print(gr_grp, var,new_var,mx_ubnd,mn_lbnd)
570
            # reinitialize and draw
571
            if logger is not None:
9✔
572
                logger.log(
9✔
573
                    "SpecSim: drawing {0} realization for group {1} with {4} pars, (log) variance {2} (sill {3})".format(
574
                        num_reals, gr_grp, var, self.geostruct.sill, gp_df.shape[0]
575
                    )
576
                )
577
            self.initialize()
9✔
578
            reals = self.draw_arrays(num_reals=num_reals, mean_value=mean_arr)
9✔
579
            # put the pieces into the par en
580
            reals = reals[:, gp_df.i, gp_df.j].reshape(num_reals, gp_df.shape[0])
9✔
581
            real_arrs.append(reals)
9✔
582
            names.extend(list(gp_df.parnme.values))
9✔
583
            if logger is not None:
9✔
584
                logger.log(
9✔
585
                    "SpecSim: drawing {0} realization for group {1} with {4} pars, (log) variance {2} (sill {3})".format(
586
                        num_reals, gr_grp, var, self.geostruct.sill, gp_df.shape[0]
587
                    )
588
                )
589

590
        # get into a dataframe
591
        reals = real_arrs[0]
9✔
592
        for r in real_arrs[1:]:
9✔
593
            reals = np.append(reals, r, axis=1)
9✔
594
        pe = pd.DataFrame(data=reals, columns=names)
9✔
595
        # reset to org conditions
596
        self.geostruct.nugget = org_nug
9✔
597
        self.geostruct.variograms[0].contribution = org_var
9✔
598
        self.initialize()
9✔
599

600
        return pe
9✔
601

602
    def draw_conditional(
9✔
603
        self,
604
        seed,
605
        obs_points,
606
        sg,
607
        base_values_file,
608
        local=True,
609
        factors_file=None,
610
        num_reals=1,
611
        mean_value=1.0,
612
        R_factor=1.0,
613
    ):
614

615
        """Generate a conditional, correlated random field using the Spec2dSim
616
            object, a set of observation points, and a factors file.
617

618
            The conditional field is made by generating an unconditional correlated random
619
            field that captures the covariance in the variogram and conditioning it by kriging
620
            a second surface using the value of the random field as observations.
621
            This second conditioning surface provides an estimate of uncertainty (kriging error)
622
            away from the observation points. At the observation points, the kriged surface is
623
            equal to (less nugget effects) the observation. The conditioned correlated field
624
            is then generated using: T(x) = Z(x) + [S(x) − S∗(x)]
625
            where T(x) is the conditioned simulation, Z(x) is a kriging estimator of the
626
            unknown field, S(x) is an unconditioned random field with the same covariance
627
            structure as the desired field, and S∗(x) is a kriging estimate of the unconditioned
628
            random field using its values at the observation points (pilot points).
629
            [S(x) − S∗(x)] is an estimate of the kriging error.
630

631
            This approach makes T(x) match the observed values at the observation points
632
            (x_a, y_z), T(a) = Z(a), and have a structure away from the observation points that
633
            follows the variogram used to generate Z, S, and S∗.
634

635
            Chiles, J-P, and Delfiner, P., Geostatistics- Modeling Spatial Uncertainty: Wiley,
636
                London, 695 p.
637

638
        Args:
639
            seed (`int`): integer used for random seed.  If seed is used as a PEST parameter,
640
                then passing the same value for seed will yield the same
641
                conditioned random fields. This allows runs to be recreated
642
                given an ensemble of seeds.
643
            obs_points (`str` or `dataframe`): locations for observation points.
644
                Either filename in pyemupilot point file format:
645
                ["name","x","y","zone","parval1"] ora dataframe with these columns.
646
                Note that parval1 is not used.
647
            base_values_file (`str`): filename containing 2d array with the base
648
                parameter values from which the random field will depart (Z(x)) above.
649
                Values of Z(x) are used for conditioning, not parval1 in the
650
                observation point file.
651
            factors_file (`str`): name of the factors file generated using the
652
                locations of the observation points and the target grid.
653
                If None this file will be generated and called conditional_factors.dat;
654
                but this is a slow step and should not generally be called for every simulation.
655
            sg: flopy StructuredGrid object
656
            local (`boolean`): whether coordinates in obs_points are in local (model) or map coordinates
657
            num_reals (`int`): number of realizations to generate
658
            mean_value (`float`): the mean value of the realizations
659
            R_factor (`float`): a factor to scale the field, sometimes the variation from the
660
                                geostruct parameters is larger or smaller than desired.
661

662
        Returns:
663
            `numpy.ndarray`: a 3-D array of realizations.  Shape is
664
                (num_reals, self.dely.shape[0], self.delx.shape[0])
665
        Note:
666
            log transformation is respected and the returned `reals`
667
                array is in arithmetic space
668
        """
669

670
        # get a dataframe for the observation points, from file unless passed
671
        if isinstance(obs_points, str):
×
672
            obs_points = pp_file_to_dataframe(obs_points)
×
673
        assert isinstance(obs_points, pd.DataFrame), "need a DataFrame, not {0}".format(
×
674
            type(obs_points)
675
        )
676

677
        # if factors_file is not passed, generate one from the geostruct associated
678
        # with the calling object and the observation points dataframe
679
        if factors_file is None:
×
680
            ok = OrdinaryKrige(self.geostruct, obs_points)
×
681
            ok.calc_factors_grid(sg, zone_array=None, var_filename=None)
×
682
            ok.to_grid_factors_file("conditional_factors.dat")
×
683
            factors_file = "conditional_factors.dat"
×
684

685
        # read in the base values, Z(x), assume these are not log-transformed
686
        values_krige = np.loadtxt(base_values_file)
×
687

688
        np.random.seed(int(seed))
×
689

690
        # draw random fields for num_reals
691
        unconditioned = self.draw_arrays(num_reals=num_reals, mean_value=mean_value)
×
692

693
        # If geostruct is log transformed, then work with log10 of field
694
        if self.geostruct.transform == "log":
×
695
            unconditioned = np.log10(unconditioned)
×
696

697
        # now do the conditioning by making another kriged surface with the
698
        # values of the unconditioned random fields at the pilot points
699
        conditioning_df = obs_points.copy()
×
700
        # need to row and column for the x and y values in the observation
701
        # dataframe, regular grid is tested when object is instantiated
702
        # so grid spacing can be used.
703
        conditioning_df["row"] = conditioning_df.apply(
×
704
            lambda row: sg.intersect(row["x"], row["y"], local=local)[0], axis=1
705
        )
706
        conditioning_df["col"] = conditioning_df.apply(
×
707
            lambda row: sg.intersect(row["x"], row["y"], local=local)[1], axis=1
708
        )
709
        reals = []
×
710
        for layer in range(0, num_reals):
×
711
            unconditioned[layer] = (
×
712
                unconditioned[layer] * R_factor
713
            )  # scale the unconditioned values
714
            conditioning_df["unconditioned"] = conditioning_df.apply(
×
715
                lambda row: unconditioned[layer][row["row"], row["col"]], axis=1
716
            )
717
            conditioning_df.to_csv(
×
718
                "unconditioned.dat",
719
                columns=["name", "x", "y", "zone", "unconditioned"],
720
                sep=" ",
721
                header=False,
722
                index=False,
723
            )
724
            # krige a surface using unconditioned observations to make the conditioning surface
725
            fac2real(
×
726
                pp_file="unconditioned.dat",
727
                factors_file=factors_file,
728
                out_file="conditioning.dat",
729
            )
730
            conditioning = np.loadtxt("conditioning.dat")
×
731

732
            if self.geostruct.transform == "log":
×
733
                conditioned = np.log10(values_krige) + (
×
734
                    unconditioned[layer] - conditioning
735
                )
736
                conditioned = np.power(10, conditioned)
×
737
            else:
738
                conditioned = values_krige + (unconditioned[layer] - conditioning)
×
739

740
            reals.append(conditioned)
×
741

742
        reals = np.array(reals)
×
743
        return reals
×
744

745

746
class OrdinaryKrige(object):
9✔
747
    """Ordinary Kriging using Pandas and Numpy.
9✔
748

749
    Args:
750
        geostruct (`GeoStruct`): a pyemu.geostats.GeoStruct to use for the kriging
751
        point_data (`pandas.DataFrame`): the conditioning points to use for kriging.
752
            `point_data` must contain columns "name", "x", "y".
753

754
    Note:
755
        if `point_data` is an `str`, then it is assumed to be a pilot points file
756
        and is loaded as such using `pyemu.pp_utils.pp_file_to_dataframe()`
757

758
        If zoned interpolation is used for grid-based interpolation, then
759
        `point_data` must also contain a "zone" column
760

761

762
    Example::
763

764
        import pyemu
765
        v = pyemu.utils.geostats.ExpVario(a=1000,contribution=1.0)
766
        gs = pyemu.utils.geostats.GeoStruct(variograms=v,nugget=0.5)
767
        pp_df = pyemu.pp_utils.pp_file_to_dataframe("hkpp.dat")
768
        ok = pyemu.utils.geostats.OrdinaryKrige(gs,pp_df)
769

770
    """
771

772
    def __init__(self, geostruct, point_data):
9✔
773
        if isinstance(geostruct, str):
9✔
774
            geostruct = read_struct_file(geostruct)
8✔
775
        assert isinstance(geostruct, GeoStruct), "need a GeoStruct, not {0}".format(
9✔
776
            type(geostruct)
777
        )
778
        self.geostruct = geostruct
9✔
779
        if isinstance(point_data, str):
9✔
780
            point_data = pp_file_to_dataframe(point_data)
8✔
781
        assert isinstance(point_data, pd.DataFrame)
9✔
782
        assert "name" in point_data.columns, "point_data missing 'name'"
9✔
783
        assert "x" in point_data.columns, "point_data missing 'x'"
9✔
784
        assert "y" in point_data.columns, "point_data missing 'y'"
9✔
785
        # check for duplicates in point data
786
        unique_name = point_data.name.unique()
9✔
787
        if len(unique_name) != point_data.shape[0]:
9✔
788
            warnings.warn(
8✔
789
                "duplicates detected in point_data..attempting to rectify", PyemuWarning
790
            )
791
            ux_std = point_data.groupby(point_data.name).x.std()
8✔
792
            if ux_std.max() > 0.0:
8✔
793
                raise Exception("duplicate point_info entries with different x values")
×
794
            uy_std = point_data.groupby(point_data.name).y.std()
8✔
795
            if uy_std.max() > 0.0:
8✔
796
                raise Exception("duplicate point_info entries with different y values")
×
797

798
            self.point_data = point_data.drop_duplicates(subset=["name"])
8✔
799
        else:
800
            self.point_data = point_data.copy()
9✔
801
        self.point_data.index = self.point_data.name
9✔
802
        self.check_point_data_dist()
9✔
803
        self.interp_data = None
9✔
804
        self.spatial_reference = None
9✔
805
        # X, Y = np.meshgrid(point_data.x,point_data.y)
806
        # self.point_data_dist = pd.DataFrame(data=np.sqrt((X - X.T) ** 2 + (Y - Y.T) ** 2),
807
        #                                    index=point_data.name,columns=point_data.name)
808
        self.point_cov_df = self.geostruct.covariance_matrix(
9✔
809
            self.point_data.x, self.point_data.y, self.point_data.name
810
        ).to_dataframe()
811
        # for name in self.point_cov_df.index:
812
        #    self.point_cov_df.loc[name,name] -= self.geostruct.nugget
813

814
    def check_point_data_dist(self, rectify=False):
9✔
815
        """check for point_data entries that are closer than
816
        EPSILON distance - this will cause a singular kriging matrix.
817

818
        Args:
819
            rectify (`bool`): flag to fix the problems with point_data
820
                by dropping additional points that are
821
                closer than EPSILON distance.  Default is False
822

823
        Note:
824
            this method will issue warnings for points that are closer
825
            than EPSILON distance
826

827
        """
828

829
        ptx_array = self.point_data.x.values
9✔
830
        pty_array = self.point_data.y.values
9✔
831
        ptnames = self.point_data.name.values
9✔
832
        drop = []
9✔
833
        for i in range(self.point_data.shape[0]):
9✔
834
            ix, iy, iname = ptx_array[i], pty_array[i], ptnames[i]
9✔
835
            dist = pd.Series(
9✔
836
                (ptx_array[i + 1 :] - ix) ** 2 + (pty_array[i + 1 :] - iy) ** 2,
837
                ptnames[i + 1 :],
838
            )
839
            if dist.min() < EPSILON ** 2:
9✔
840
                print(iname, ix, iy)
×
841
                warnings.warn(
×
842
                    "points {0} and {1} are too close. This will cause a singular kriging matrix ".format(
843
                        iname, dist.idxmin()
844
                    ),
845
                    PyemuWarning,
846
                )
847
                drop_idxs = dist.loc[dist <= EPSILON ** 2]
×
848
                drop.extend([pt for pt in list(drop_idxs.index) if pt not in drop])
×
849
        if rectify and len(drop) > 0:
9✔
850
            print(
×
851
                "rectifying point data by removing the following points: {0}".format(
852
                    ",".join(drop)
853
                )
854
            )
855
            print(self.point_data.shape)
×
856
            self.point_data = self.point_data.loc[
×
857
                self.point_data.index.map(lambda x: x not in drop), :
858
            ]
859
            print(self.point_data.shape)
×
860

861
    # def prep_for_ppk2fac(self,struct_file="structure.dat",pp_file="points.dat",):
862
    #    pass
863

864
    def calc_factors_grid(
9✔
865
        self,
866
        spatial_reference,
867
        zone_array=None,
868
        minpts_interp=1,
869
        maxpts_interp=20,
870
        search_radius=1.0e10,
871
        verbose=False,
872
        var_filename=None,
873
        forgive=False,
874
        num_threads=1,
875
    ):
876
        """calculate kriging factors (weights) for a structured grid.
877

878
        Args:
879
            spatial_reference (`flopy.utils.reference.SpatialReference`): a spatial
880
                reference that describes the orientation and
881
                spatail projection of the the structured grid
882
            zone_array (`numpy.ndarray`): an integer array of zones to use for kriging.
883
                If not None, then `point_data` must also contain a "zone" column.  `point_data`
884
                entries with a zone value not found in zone_array will be skipped.
885
                If None, then all `point_data` will (potentially) be used for
886
                interpolating each grid node. Default is None
887
            minpts_interp (`int`): minimum number of `point_data` entires to use for interpolation at
888
                a given grid node.  grid nodes with less than `minpts_interp`
889
                `point_data` found will be skipped (assigned np.NaN).  Defaut is 1
890
            maxpts_interp (`int`) maximum number of `point_data` entries to use for interpolation at
891
                a given grid node.  A larger `maxpts_interp` will yield "smoother"
892
                interplation, but using a large `maxpts_interp` will slow the
893
                (already) slow kriging solution process and may lead to
894
                memory errors. Default is 20.
895
            search_radius (`float`) the size of the region around a given grid node to search for
896
                `point_data` entries. Default is 1.0e+10
897
            verbose : (`bool`): a flag to  echo process to stdout during the interpolatino process.
898
                Default is False
899
            var_filename (`str`): a filename to save the kriging variance for each interpolated grid node.
900
                Default is None.
901
            forgive (`bool`):  flag to continue if inversion of the kriging matrix failes at one or more
902
                grid nodes.  Inversion usually fails if the kriging matrix is singular,
903
                resulting from `point_data` entries closer than EPSILON distance.  If True,
904
                warnings are issued for each failed inversion.  If False, an exception
905
                is raised for failed matrix inversion.
906
            num_threads (`int`): number of multiprocessing workers to use to try to speed up
907
                kriging in python.  Default is 1.
908

909
        Returns:
910
            `pandas.DataFrame`: a dataframe with information summarizing the ordinary kriging
911
            process for each grid node
912

913
        Note:
914
            this method calls OrdinaryKrige.calc_factors()
915
            this method is the main entry point for grid-based kriging factor generation
916

917

918
        Example::
919

920
            import flopy
921
            v = pyemu.utils.geostats.ExpVario(a=1000,contribution=1.0)
922
            gs = pyemu.utils.geostats.GeoStruct(variograms=v,nugget=0.5)
923
            pp_df = pyemu.pp_utils.pp_file_to_dataframe("hkpp.dat")
924
            ok = pyemu.utils.geostats.OrdinaryKrige(gs,pp_df)
925
            m = flopy.modflow.Modflow.load("mymodel.nam")
926
            df = ok.calc_factors_grid(m.sr,zone_array=m.bas6.ibound[0].array,
927
                                      var_filename="ok_var.dat")
928
            ok.to_grid_factor_file("factors.dat")
929

930
        """
931

932
        self.spatial_reference = spatial_reference
9✔
933
        self.interp_data = None
9✔
934
        # assert isinstance(spatial_reference,SpatialReference)
935
        try:
9✔
936
            x = self.spatial_reference.xcentergrid.copy()
9✔
937
            y = self.spatial_reference.ycentergrid.copy()
9✔
938
        except Exception as e:
×
939
            raise Exception(
×
940
                "spatial_reference does not have proper attributes:{0}".format(str(e))
941
            )
942

943
        if var_filename is not None:
9✔
944
            if self.spatial_reference.grid_type=='vertex':
9✔
945
                arr = (
8✔
946
                np.zeros((self.spatial_reference.ncpl, 1))
947
                - 1.0e30
948
                )
949
            else:
950
                arr = (
9✔
951
                    np.zeros((self.spatial_reference.nrow, self.spatial_reference.ncol))
952
                    - 1.0e30
953
                )
954

955
        # the simple case of no zone array: ignore point_data zones
956
        if zone_array is None:
9✔
957

958
            df = self.calc_factors(
8✔
959
                x.ravel(),
960
                y.ravel(),
961
                minpts_interp=minpts_interp,
962
                maxpts_interp=maxpts_interp,
963
                search_radius=search_radius,
964
                verbose=verbose,
965
                forgive=forgive,
966
                num_threads=num_threads,
967
            )
968

969
            if var_filename is not None:
8✔
970
                arr = df.err_var.values.reshape(x.shape)
8✔
971
                np.savetxt(var_filename, arr, fmt="%15.6E")
8✔
972

973
        if zone_array is not None:
9✔
974
            if self.spatial_reference.grid_type=='vertex':
9✔
975
                assert zone_array.shape[0] == x.shape[0]
8✔
976
            else:
977
                assert zone_array.shape == x.shape
9✔
978
            if "zone" not in self.point_data.columns:
9✔
979
                warnings.warn(
×
980
                    "'zone' columns not in point_data, assigning generic zone",
981
                    PyemuWarning,
982
                )
983
                self.point_data.loc[:, "zone"] = 1
×
984
            pt_data_zones = self.point_data.zone.unique()
9✔
985
            dfs = []
9✔
986
            for pt_data_zone in pt_data_zones:
9✔
987
                if pt_data_zone not in zone_array:
9✔
988
                    warnings.warn(
×
989
                        "pt zone {0} not in zone array {1}, skipping".format(
990
                            pt_data_zone, np.unique(zone_array)
991
                        ),
992
                        PyemuWarning,
993
                    )
994
                    continue
×
995
                # cutting list of cell positions to just in zone
996
                if spatial_reference.grid_type == "vertex":
9✔
997
                    xzone = x[(zone_array == pt_data_zone).ravel()].copy()
8✔
998
                    yzone = y[(zone_array == pt_data_zone).ravel()].copy()
8✔
999
                else:
1000
                    xzone = x[zone_array == pt_data_zone].copy()
9✔
1001
                    yzone = y[zone_array == pt_data_zone].copy()
9✔
1002
                
1003
                idx = np.arange(
9✔
1004
                    len(zone_array.ravel())
1005
                )[(zone_array == pt_data_zone).ravel()]
1006
                # xzone[zone_array != pt_data_zone] = np.NaN
1007
                # yzone[zone_array != pt_data_zone] = np.NaN
1008

1009
                df = self.calc_factors(
9✔
1010
                    xzone,
1011
                    yzone,
1012
                    idx_vals=idx,  # need to pass if xzone,yzone is not all x,y
1013
                    minpts_interp=minpts_interp,
1014
                    maxpts_interp=maxpts_interp,
1015
                    search_radius=search_radius,
1016
                    verbose=verbose,
1017
                    pt_zone=pt_data_zone,
1018
                    forgive=forgive,
1019
                    num_threads=num_threads,
1020
                )
1021

1022
                dfs.append(df)
9✔
1023
                if var_filename is not None:
9✔
1024
                    # rebuild full df so we can build array, as per
1025
                    fulldf = pd.DataFrame(data={"x": x.ravel(), "y": y.ravel()})
9✔
1026
                    fulldf[['idist', 'inames', 'ifacts', 'err_var']] = np.array(
9✔
1027
                        [[[], [], [], np.nan]] * len(fulldf),
1028
                        dtype=object)
1029
                    fulldf = fulldf.set_index(['x', 'y'])
9✔
1030
                    fulldf.loc[df.set_index(['x', 'y']).index] = df.set_index(['x', 'y'])
9✔
1031
                    fulldf = fulldf.reset_index()
9✔
1032
                    a = fulldf.err_var.values.reshape(x.shape)
9✔
1033
                    na_idx = np.isfinite(a.astype(float))
9✔
1034
                    if len(a.shape)==1:
9✔
1035
                        a = np.reshape(a, (a.shape[0], 1))
8✔
1036
                    arr[na_idx] = a[na_idx]
9✔
1037
            if self.interp_data is None or self.interp_data.dropna().shape[0] == 0:
9✔
1038
                raise Exception("no interpolation took place...something is wrong")
×
1039
            df = pd.concat(dfs)
9✔
1040
        if var_filename is not None:
9✔
1041
            np.savetxt(var_filename, arr, fmt="%15.6E")
9✔
1042
        return df
9✔
1043

1044
    def _dist_calcs(self, ix, iy, ptx_array, pty_array, ptnames, sqradius):
9✔
1045
        """private: find nearby points"""
1046
        #  calc dist from this interp point to all point data...slow
1047
        dist = pd.Series((ptx_array - ix) ** 2 + (pty_array - iy) ** 2, ptnames)
9✔
1048
        dist.sort_values(inplace=True)
9✔
1049
        dist = dist.loc[dist <= sqradius]
9✔
1050
        return dist
9✔
1051

1052
    def _remove_neg_factors(self):
9✔
1053
        """
1054
        private function to remove negative kriging factors and
1055
        renormalize remaining positive factors following the
1056
        method of Deutsch (1996):
1057
        https://doi.org/10.1016/0098-3004(96)00005-2
1058

1059

1060
        """
1061
        newd, newn, newf = (
9✔
1062
            [],
1063
            [],
1064
            [],
1065
        )
1066
        for d, n, f in zip(
9✔
1067
            self.interp_data.idist.values,
1068
            self.interp_data.inames.values,
1069
            self.interp_data.ifacts.values,
1070
        ):
1071
            # if the factor list is empty, no changes are made
1072
            # if the factor list has only one value, it is 1.0 so no changes
1073
            # if more than one factor, remove negatives and renormalize
1074
            if len(f) > 1:
9✔
1075
                # only keep dist, names, and factors of factor > 0
1076
                d = np.array(d)
9✔
1077
                n = np.array(n)
9✔
1078
                f = np.array(f)
9✔
1079
                d = d[f > 0]
9✔
1080
                n = n[f > 0]
9✔
1081
                f = f[f > 0]
9✔
1082
                f /= f.sum()  # renormalize to sum to unity
9✔
1083
            newd.append(d)
9✔
1084
            newn.append(n)
9✔
1085
            newf.append(f)
9✔
1086
        # update the interp_data dataframe
1087
        self.interp_data.idist = newd
9✔
1088
        self.interp_data.inames = newn
9✔
1089
        self.interp_data.ifacts = newf
9✔
1090

1091
    def _cov_points(self, ix, iy, pt_names):
9✔
1092
        """private: get covariance between points"""
1093
        interp_cov = self.geostruct.covariance_points(
9✔
1094
            ix,
1095
            iy,
1096
            self.point_data.loc[pt_names, "x"],
1097
            self.point_data.loc[pt_names, "y"],
1098
        )
1099

1100
        return interp_cov
9✔
1101

1102
    def _form(self, pt_names, point_cov, interp_cov):
9✔
1103
        """private: form the kriging equations"""
1104

1105
        d = len(pt_names) + 1  # +1 for lagrange mult
9✔
1106
        A = np.ones((d, d))
9✔
1107
        A[:-1, :-1] = point_cov.values
9✔
1108
        A[-1, -1] = 0.0  # unbiaised constraint
9✔
1109
        rhs = np.ones((d, 1))
9✔
1110
        rhs[:-1, 0] = interp_cov
9✔
1111
        return A, rhs
9✔
1112

1113
    def _solve(self, A, rhs):
9✔
1114
        return np.linalg.solve(A, rhs)
9✔
1115

1116
    def calc_factors(
9✔
1117
        self,
1118
        x,
1119
        y,
1120
        minpts_interp=1,
1121
        maxpts_interp=20,
1122
        search_radius=1.0e10,
1123
        verbose=False,
1124
        pt_zone=None,
1125
        forgive=False,
1126
        num_threads=1,
1127
        idx_vals=None,
1128
        remove_negative_factors=True,
1129
    ):
1130
        """calculate ordinary kriging factors (weights) for the points
1131
        represented by arguments x and y
1132

1133
        Args:
1134
            x ([`float`]):  x-coordinates to calculate kriging factors for
1135
            y (([`float`]): y-coordinates to calculate kriging factors for
1136
            minpts_interp (`int`): minimum number of point_data entires to use for interpolation at
1137
                a given x,y interplation point.  interpolation points with less
1138
                than `minpts_interp` `point_data` found will be skipped
1139
                (assigned np.NaN).  Defaut is 1
1140
            maxpts_interp (`int`): maximum number of point_data entries to use for interpolation at
1141
                a given x,y interpolation point.  A larger `maxpts_interp` will
1142
                yield "smoother" interplation, but using a large `maxpts_interp`
1143
                will slow the (already) slow kriging solution process and may
1144
                lead to memory errors. Default is 20.
1145
            search_radius (`float`): the size of the region around a given x,y
1146
                interpolation point to search for `point_data` entries. Default is 1.0e+10
1147
            verbose (`bool`): a flag to  echo process to stdout during the interpolatino process.
1148
                Default is False
1149
            forgive (`bool`): flag to continue if inversion of the kriging matrix failes at one or more
1150
                interpolation points.  Inversion usually fails if the kriging matrix is singular,
1151
                resulting from `point_data` entries closer than EPSILON distance.  If True,
1152
                warnings are issued for each failed inversion.  If False, an exception
1153
                is raised for failed matrix inversion.
1154
            num_threads (`int`): number of multiprocessing workers to use to try to speed up
1155
                kriging in python.  Default is 1.
1156
            idx_vals (iterable of `int`): optional index values to use in the interpolation dataframe.  This is
1157
                used to set the proper node number in the factors file for unstructured grids.
1158
            remove_negative_factors (`bool`): option to remove negative Kriging factors, following the method of
1159
                Deutsch (1996) https://doi.org/10.1016/0098-3004(96)00005-2. Default is True
1160
        Returns:
1161
            `pandas.DataFrame`: a dataframe with information summarizing the ordinary kriging
1162
            process for each interpolation points
1163

1164
        Note:
1165
            this method calls either `OrdinaryKrige.calc_factors_org()` or
1166
            `OrdinaryKrige.calc_factors_mp()` depending on the value of `num_threads`
1167

1168
        Example::
1169

1170
            v = pyemu.utils.geostats.ExpVario(a=1000,contribution=1.0)
1171
            gs = pyemu.utils.geostats.GeoStruct(variograms=v,nugget=0.5)
1172
            pp_df = pyemu.pp_utils.pp_file_to_dataframe("hkpp.dat")
1173
            ok = pyemu.utils.geostats.OrdinaryKrige(gs,pp_df)
1174
            x = np.arange(100)
1175
            y = np.ones_like(x)
1176
            zone_array = y.copy()
1177
            zone_array[:zone_array.shape[0]/2] = 2
1178
            # only calc factors for the points in zone 1
1179
            ok.calc_factors(x,y,pt_zone=1)
1180
            ok.to_grid_factors_file("zone_1.fac",ncol=x.shape[0])
1181

1182

1183

1184
        """
1185
        # can do this up here, as same between org and mp method
1186
        assert len(x) == len(y)
9✔
1187
        if idx_vals is not None and len(idx_vals) != len(x):
9✔
1188
            raise Exception("len(idx_vals) != len(x)")
×
1189
        # find the point data to use for each interp point
1190
        df = pd.DataFrame(data={"x": x, "y": y})
9✔
1191
        if idx_vals is not None:
9✔
1192
            df.index = np.array(idx_vals).astype(int)
9✔
1193
        # now can just pass df (contains x and y)
1194
        # trunc to just deal with pp locations in zones
1195
        pt_data = self.point_data
9✔
1196
        if pt_zone is None:
9✔
1197
            ptx_array = self.point_data.x.values
8✔
1198
            pty_array = self.point_data.y.values
8✔
1199
            ptnames = self.point_data.name.values
8✔
1200
        else:
1201
            ptx_array = pt_data.loc[pt_data.zone == pt_zone, "x"].values
9✔
1202
            pty_array = pt_data.loc[pt_data.zone == pt_zone, "y"].values
9✔
1203
            ptnames = pt_data.loc[pt_data.zone == pt_zone, "name"].values
9✔
1204
            # pt_data = pt_data.loc[ptnames]
1205
        if num_threads == 1:
9✔
1206
            return self._calc_factors_org(
9✔
1207
                df,
1208
                ptx_array,
1209
                pty_array,
1210
                ptnames,
1211
                minpts_interp,
1212
                maxpts_interp,
1213
                search_radius,
1214
                verbose,
1215
                pt_zone,
1216
                forgive,
1217
                remove_negative_factors,
1218
            )
1219
        else:
1220
            return self._calc_factors_mp(
9✔
1221
                df,
1222
                ptx_array,
1223
                pty_array,
1224
                ptnames,
1225
                minpts_interp,
1226
                maxpts_interp,
1227
                search_radius,
1228
                verbose,
1229
                pt_zone,
1230
                forgive,
1231
                num_threads,
1232
                remove_negative_factors,
1233
            )
1234

1235
    def _calc_factors_org(
9✔
1236
        self,
1237
        df,
1238
        ptx_array,
1239
        pty_array,
1240
        ptnames,
1241
        minpts_interp=1,
1242
        maxpts_interp=20,
1243
        search_radius=1.0e10,
1244
        verbose=False,
1245
        pt_zone=None,
1246
        forgive=False,
1247
        remove_negative_factors=True,
1248
    ):
1249
        # assert len(x) == len(y)
1250
        # if idx_vals is not None and len(idx_vals) != len(x):
1251
        #     raise Exception("len(idx_vals) != len(x)")
1252
        #
1253
        # df = pd.DataFrame(data={"x": x, "y": y})
1254
        # if idx_vals is not None:
1255
        #     df.index = [int(i) for i in idx_vals]
1256
        inames, idist, ifacts, err_var = [], [], [], []
9✔
1257
        sill = self.geostruct.sill
9✔
1258
        # ptnames = ptd.name.values
1259
        # find the point data to use for each interp point
1260
        sqradius = search_radius ** 2
9✔
1261
        print("starting interp point loop for {0} points".format(df.shape[0]))
9✔
1262
        start_loop = datetime.now()
9✔
1263
        for idx, (ix, iy) in enumerate(zip(df.x, df.y)):
9✔
1264
            if np.isnan(ix) or np.isnan(iy):  # if nans, skip
9✔
1265
                inames.append([])
×
1266
                idist.append([])
×
1267
                ifacts.append([])
×
1268
                err_var.append(np.NaN)
×
1269
                continue
×
1270
            if verbose:
9✔
1271
                istart = datetime.now()
×
1272
                print("processing interp point:{0} of {1}".format(idx, df.shape[0]))
×
1273
            if verbose == 2:
9✔
1274
                start = datetime.now()
×
1275
                print("calc ipoint dist...", end='')
×
1276

1277
            #  calc dist from this interp point to all point data...slow
1278
            # dist = pd.Series((ptx_array-ix)**2 + (pty_array-iy)**2,ptnames)
1279
            # dist.sort_values(inplace=True)
1280
            # dist = dist.loc[dist <= sqradius]
1281
            # def _dist_calcs(self, ix, iy, ptx_array, pty_array, ptnames, sqradius):
1282
            dist = self._dist_calcs(ix, iy, ptx_array, pty_array, ptnames,
9✔
1283
                                    sqradius)
1284

1285
            # if too few points were found, skip
1286
            if len(dist) < minpts_interp:
9✔
1287
                inames.append([])
×
1288
                idist.append([])
×
1289
                ifacts.append([])
×
1290
                err_var.append(sill)
×
1291
                continue
×
1292

1293
            # only the maxpts_interp points
1294
            pt_names = dist.index.values[:maxpts_interp]
9✔
1295
            dist = np.sqrt(dist.values[:maxpts_interp])
9✔
1296
            
1297
            # if one of the points is super close, just use it and skip
1298
            if dist.min() <= EPSILON:
9✔
1299
                ifacts.append([1.0])
9✔
1300
                idist.append([EPSILON])
9✔
1301
                inames.append([pt_names[dist.argmin()]])
9✔
1302
                err_var.append(self.geostruct.nugget)
9✔
1303
                continue
9✔
1304
            # if verbose == 2:
1305
            #     td = (datetime.now()-start).total_seconds()
1306
            #     print("...took {0}".format(td))
1307
            #     start = datetime.now()
1308
            #     print("extracting pt cov...",end='')
1309

1310
            # vextract the point-to-point covariance matrix
1311
            point_cov = self.point_cov_df.loc[pt_names, pt_names]
9✔
1312
            # if verbose == 2:
1313
            #     td = (datetime.now()-start).total_seconds()
1314
            #     print("...took {0}".format(td))
1315
            #     print("forming ipt-to-point cov...",end='')
1316

1317
            # calc the interp point to points covariance
1318
            # interp_cov = self.geostruct.covariance_points(ix,iy,self.point_data.loc[pt_names,"x"],
1319
            #                                               self.point_data.loc[pt_names,"y"])
1320
            interp_cov = self._cov_points(ix, iy, pt_names)
9✔
1321
            if verbose == 2:
9✔
1322
                td = (datetime.now() - start).total_seconds()
×
1323
                print("...took {0} seconds".format(td))
×
1324
                print("forming lin alg components...", end="")
×
1325

1326
            # form the linear algebra parts and solve
1327
            # d = len(pt_names) + 1 # +1 for lagrange mult
1328
            # A = np.ones((d,d))
1329
            # A[:-1,:-1] = point_cov.values
1330
            # A[-1,-1] = 0.0 #unbiaised constraint
1331
            # rhs = np.ones((d,1))
1332
            # rhs[:-1,0] = interp_cov
1333
            A, rhs = self._form(pt_names, point_cov, interp_cov)
9✔
1334

1335
            # if verbose == 2:
1336
            #     td = (datetime.now()-start).total_seconds()
1337
            #     print("...took {0}".format(td))
1338
            #     print("solving...",end='')
1339
            # # solve
1340
            try:
9✔
1341
                facs = self._solve(A, rhs)
9✔
1342
            except Exception as e:
×
1343
                print("error solving for factors: {0}".format(str(e)))
×
1344
                print("point:", ix, iy)
×
1345
                print("dist:", dist)
×
1346
                print("A:", A)
×
1347
                print("rhs:", rhs)
×
1348
                if forgive:
×
1349
                    inames.append([])
×
1350
                    idist.append([])
×
1351
                    ifacts.append([])
×
1352
                    err_var.append(np.NaN)
×
1353
                    continue
×
1354
                else:
1355
                    raise Exception("error solving for factors:{0}".format(str(e)))
×
1356
            assert len(facs) - 1 == len(dist)
9✔
1357

1358
            err_var.append(
9✔
1359
                float(
1360
                    (sill
1361
                     + facs[-1]
1362
                     - sum([f * c for f, c in zip(facs[:-1], interp_cov)])
1363
                     ).squeeze()
1364
                )
1365
            )
1366
            inames.append(pt_names)
9✔
1367

1368
            idist.append(dist)
9✔
1369
            ifacts.append(facs[:-1, 0])
9✔
1370
            # if verbose == 2:
1371
            #     td = (datetime.now()-start).total_seconds()
1372
            #     print("...took {0}".format(td))
1373
            if verbose:
9✔
1374
                td = (datetime.now() - istart).total_seconds()
×
1375
                print("point took {0}".format(td))
×
1376
        df["idist"] = idist
9✔
1377
        df["inames"] = inames
9✔
1378
        df["ifacts"] = ifacts
9✔
1379
        df["err_var"] = err_var
9✔
1380

1381
        if pt_zone is None:
9✔
1382
            self.interp_data = df
8✔
1383
        else:
1384
            if self.interp_data is None:
9✔
1385
                self.interp_data = df
9✔
1386
            else:
1387
                self.interp_data = pd.concat([self.interp_data, df])
8✔
1388
        # correct for negative kriging factors, if requested
1389
        if remove_negative_factors == True:
9✔
1390
            self._remove_neg_factors()
9✔
1391
        td = (datetime.now() - start_loop).total_seconds()
9✔
1392
        print("took {0} seconds".format(td))
9✔
1393
        return df
9✔
1394

1395
    def _calc_factors_mp(
9✔
1396
        self,
1397
        df,
1398
        ptx_array,
1399
        pty_array,
1400
        ptnames,
1401
        minpts_interp=1,
1402
        maxpts_interp=20,
1403
        search_radius=1.0e10,
1404
        verbose=False,
1405
        pt_zone=None,
1406
        forgive=False,
1407
        num_threads=1,
1408
        remove_negative_factors=True,
1409
    ):
1410
        sqradius = search_radius ** 2
9✔
1411
        print("starting interp point loop for {0} points".format(df.shape[0]))
9✔
1412
        start_loop = datetime.now()
9✔
1413
        # ensure same order as point data and just pass array
1414
        # ptnames = ptd.name
1415
        # point_data = self.point_data.loc[self.point_data.zone == pt_zone]
1416
        point_cov_data = self.point_cov_df.loc[ptnames, ptnames].values
9✔
1417
        point_pairs = [(i, xx, yy) for i, (xx, yy) in enumerate(zip(df.x, df.y))]
9✔
1418
        idist = [[]] * len(df.x)
9✔
1419
        inames = [[]] * len(df.x)
9✔
1420
        ifacts = [[]] * len(df.x)
9✔
1421
        err_var = [np.NaN] * len(df.x)
9✔
1422
        with mp.Manager() as manager:
9✔
1423
            point_pairs = manager.list(point_pairs)
9✔
1424
            idist = manager.list(idist)
9✔
1425
            inames = manager.list(inames)
9✔
1426
            ifacts = manager.list(ifacts)
9✔
1427
            err_var = manager.list(err_var)
9✔
1428
            lock = mp.Lock()
9✔
1429
            procs = []
9✔
1430
            for i in range(num_threads):
9✔
1431
                print("starting", i)
9✔
1432
                p = mp.Process(
9✔
1433
                    target=OrdinaryKrige._worker,
1434
                    args=(
1435
                        ptx_array,
1436
                        pty_array,
1437
                        ptnames,
1438
                        point_pairs,
1439
                        inames,
1440
                        idist,
1441
                        ifacts,
1442
                        err_var,
1443
                        point_cov_data,
1444
                        self.geostruct,
1445
                        EPSILON,
1446
                        sqradius,
1447
                        minpts_interp,
1448
                        maxpts_interp,
1449
                        lock,
1450
                    ),
1451
                )
1452
                p.start()
9✔
1453
                procs.append(p)
9✔
1454
            for p in procs:
9✔
1455
                p.join()
9✔
1456

1457
            df[["idist", "inames", "ifacts"]] = pd.DataFrame(
9✔
1458
                [[s[0], n[0], f[0]] for s, n, f in zip(idist, inames, ifacts)],
1459
                columns=["idist", "inames", "ifacts"], index=df.index)
1460

1461
            df["err_var"] = [
9✔
1462
                float(e[0]) if not isinstance(e[0], list) else float(e[0][0])
1463
                for e in err_var
1464
            ]
1465

1466
        if pt_zone is None:
9✔
1467
            self.interp_data = df
×
1468
        else:
1469
            if self.interp_data is None:
9✔
1470
                self.interp_data = df
9✔
1471
            else:
1472
                self.interp_data = pd.concat([self.interp_data, df])
8✔
1473
        # correct for negative kriging factors, if requested
1474
        if remove_negative_factors == True:
9✔
1475
            self._remove_neg_factors()
9✔
1476
        td = (datetime.now() - start_loop).total_seconds()
9✔
1477
        print("took {0} seconds".format(td))
9✔
1478
        return df
9✔
1479

1480
    @staticmethod
9✔
1481
    def _worker(
9✔
1482
        ptx_array,
1483
        pty_array,
1484
        ptnames,
1485
        point_pairs,
1486
        inames,
1487
        idist,
1488
        ifacts,
1489
        err_var,
1490
        full_point_cov,
1491
        geostruct,
1492
        epsilon,
1493
        sqradius,
1494
        minpts_interp,
1495
        maxpts_interp,
1496
        lock,
1497
    ):
1498
        sill = geostruct.sill
5✔
1499
        while True:
2✔
1500
            if len(point_pairs) == 0:
5✔
1501
                return
5✔
1502
            else:
1503
                try:
5✔
1504
                    idx, ix, iy = point_pairs.pop(0)
5✔
1505
                except IndexError:
3✔
1506
                    return
3✔
1507

1508
            # if idx % 1000 == 0 and idx != 0:
1509
            #    print (ithread, idx,"done",datetime.now())
1510
            if np.isnan(ix) or np.isnan(iy):  # if nans, skip
5✔
1511
                ifacts[idx] = [[]]
×
1512
                idist[idx] = [[]]
×
1513
                inames[idx] = [[]]
×
1514
                err_var[idx] = [np.NaN]
×
1515
                continue
×
1516

1517
            # calc dist from this interp point to all point data...
1518
            # can we just use a numpy approach...?
1519
            dist = (ptx_array - ix) ** 2 + (pty_array - iy) ** 2
5✔
1520
            sortorder = np.argsort(dist)
5✔
1521
            dist = dist[sortorder]
5✔
1522
            pt_names = ptnames[sortorder]
5✔
1523
            trunc = dist <= sqradius
5✔
1524
            dist = dist[trunc]
5✔
1525
            pt_names = pt_names[trunc]
5✔
1526
            sortorder = sortorder[trunc]
5✔
1527

1528
            # if too few points were found, skip
1529
            if len(dist) < minpts_interp:
5✔
1530
                ifacts[idx] = [[]]
×
1531
                idist[idx] = [[]]
×
1532
                inames[idx] = [[]]
×
1533
                err_var[idx] = [sill]
×
1534
                continue
×
1535

1536
            # only the maxpts_interp points
1537
            dist = np.sqrt(dist[:maxpts_interp])
5✔
1538
            pt_names = pt_names[:maxpts_interp]
5✔
1539
            sortorder = sortorder[:maxpts_interp]
5✔
1540

1541
            # if one of the points is super close, just use it and skip
1542
            if dist[0] <= epsilon:
5✔
1543
                ifacts[idx] = [[1.0]]
5✔
1544
                idist[idx] = [[epsilon]]
5✔
1545
                inames[idx] = [[pt_names[0]]]
5✔
1546
                err_var[idx] = [[geostruct.nugget]]
5✔
1547
                continue
5✔
1548

1549
            # vextract the point-to-point covariance matrix
1550
            # point_cov = full_point_cov.loc[pt_names, pt_names]
1551
            point_cov = full_point_cov[tuple([sortorder[:, None], sortorder])]
5✔
1552
            # calc the interp point to points covariance
1553
            interp_cov = geostruct.covariance_points(
5✔
1554
                ix, iy, ptx_array[sortorder], pty_array[sortorder]
1555
            )
1556

1557
            # form the linear algebra parts and solve
1558
            d = len(pt_names) + 1  # +1 for lagrange mult
5✔
1559
            A = np.ones((d, d))
5✔
1560
            A[:-1, :-1] = point_cov  # .values
5✔
1561
            A[-1, -1] = 0.0  # unbiaised constraint
5✔
1562
            rhs = np.ones((d, 1))
5✔
1563
            rhs[:-1, 0] = interp_cov
5✔
1564

1565
            try:
5✔
1566
                facs = np.linalg.solve(A, rhs)
5✔
1567
            except Exception as e:
×
1568
                print("error solving for factors: {0}".format(str(e)))
×
1569
                print("point:", ix, iy)
×
1570
                print("dist:", dist)
×
1571
                print("A:", A)
×
1572
                print("rhs:", rhs)
×
1573
                continue
×
1574

1575
            assert len(facs) - 1 == len(dist)
5✔
1576

1577
            err_var[idx] = [
5✔
1578
                float(
1579
                    (sill
1580
                     + facs[-1]
1581
                     - sum([f * c for f, c in zip(facs[:-1], interp_cov)])
1582
                     ).squeeze()
1583
                )
1584
            ]
1585
            inames[idx] = [pt_names.tolist()]
5✔
1586
            idist[idx] = [dist.tolist()]
5✔
1587
            ifacts[idx] = [facs[:-1, 0].tolist()]
5✔
1588
            # if verbose == 2:
1589
            #     td = (datetime.now()-start).total_seconds()
1590
            #     print("...took {0}".format(td))
1591

1592
    def to_grid_factors_file(
9✔
1593
        self, filename, points_file="points.junk", zone_file="zone.junk", ncol=None
1594
    ):
1595
        """write a PEST-style factors file.  This file can be used with
1596
        the fac2real() method to write an interpolated structured or unstructured array
1597

1598
        Args:
1599
            filename (`str`): factor filename
1600
            points_file (`str`): points filename to add to the header of the factors file.
1601
                This is not used by the fac2real() method.  Default is "points.junk"
1602
            zone_file (`str`): zone filename to add to the header of the factors file.
1603
                This is not used by the fac2real() method.  Default is "zone.junk"
1604

1605
            ncol (`int`) column value to write to factors file.  This is normally determined
1606
                from the spatial reference and should only be passed for unstructured grids -
1607
                it should be equal to the number of nodes in the current property file. Default is None.
1608
                Required for unstructured grid models.
1609

1610
        Note:
1611
            this method should be called after OrdinaryKrige.calc_factors_grid() for structured
1612
            models or after OrdinaryKrige.calc_factors() for unstructured models.
1613

1614
        """
1615
        if self.interp_data is None:
9✔
1616
            raise Exception(
×
1617
                "ok.interp_data is None, must call calc_factors_grid() first"
1618
            )
1619
        if self.spatial_reference is None:
9✔
1620
            print(
8✔
1621
                "OrdinaryKrige.to_grid_factors_file(): spatial_reference attr is None, assuming unstructured grid"
1622
            )
1623
            if ncol is None:
8✔
1624
                raise Exception("'ncol' arg must be passed for unstructured grids")
×
1625
            nrow = 1
8✔
1626
            if ncol < self.interp_data.shape[0]:
8✔
1627
                raise Exception("something is wrong")
×
1628
        elif self.spatial_reference.grid_type=='vertex':
9✔
1629
            nrow = self.spatial_reference.ncpl
8✔
1630
            ncol = 1
8✔
1631
        else:
1632
            nrow = self.spatial_reference.nrow
9✔
1633
            ncol = self.spatial_reference.ncol
9✔
1634
        with open(filename, "w") as f:
9✔
1635
            f.write(points_file + "\n")
9✔
1636
            f.write(zone_file + "\n")
9✔
1637
            f.write("{0} {1}\n".format(ncol, nrow))
9✔
1638
            f.write("{0}\n".format(self.point_data.shape[0]))
9✔
1639
            [f.write("{0}\n".format(name)) for name in self.point_data.name]
9✔
1640
            t = 0
9✔
1641
            if self.geostruct.transform == "log":
9✔
1642
                t = 1
9✔
1643
            pt_names = list(self.point_data.name)
9✔
1644
            for idx, names, facts in zip(
9✔
1645
                self.interp_data.index, self.interp_data.inames, self.interp_data.ifacts
1646
            ):
1647
                if len(facts) == 0:
9✔
1648
                    continue
×
1649
                n_idxs = [pt_names.index(name) for name in names]
9✔
1650
                f.write("{0} {1} {2} {3:8.5e} ".format(idx + 1, t, len(names), 0.0))
9✔
1651
                [
9✔
1652
                    f.write("{0} {1:12.8g} ".format(i + 1, w))
1653
                    for i, w in zip(n_idxs, facts)
1654
                ]
1655
                f.write("\n")
9✔
1656

1657

1658
class Vario2d(object):
9✔
1659
    """base class for 2-D variograms.
9✔
1660

1661
    Args:
1662
        contribution (float): sill of the variogram
1663
        a (`float`): (practical) range of correlation
1664
        anisotropy (`float`, optional): Anisotropy ratio. Default is 1.0
1665
        bearing : (`float`, optional): angle in degrees East of North corresponding
1666
            to anisotropy ellipse. Default is 0.0
1667
        name (`str`, optinoal): name of the variogram.  Default is "var1"
1668

1669
    Note:
1670
        This base class should not be instantiated directly as it does not implement
1671
        an h_function() method.
1672

1673
    """
1674

1675
    def __init__(self, contribution, a, anisotropy=1.0, bearing=0.0, name="var1"):
9✔
1676
        self.name = name
9✔
1677
        self.epsilon = EPSILON
9✔
1678
        self.contribution = float(contribution)
9✔
1679
        assert self.contribution > 0.0
9✔
1680
        self.a = float(a)
9✔
1681
        assert self.a > 0.0
9✔
1682
        self.anisotropy = float(anisotropy)
9✔
1683
        assert self.anisotropy > 0.0
9✔
1684
        self.bearing = float(bearing)
9✔
1685

1686
    def same_as_other(self, other):
9✔
1687
        if type(self) != type(other):
×
1688
            return False
×
1689
        if self.contribution != other.contribution:
×
1690
            return False
×
1691
        if self.anisotropy != other.anisotropy:
×
1692
            return False
×
1693
        if self.a != other.a:
×
1694
            return False
×
1695
        if self.bearing != other.bearing:
×
1696
            return False
×
1697
        return True
×
1698

1699
    def to_struct_file(self, f):
9✔
1700
        """write the `Vario2d` to a PEST-style structure file
1701

1702
        Args:
1703
            f (`str`): filename to write to.  `f` can also be an open
1704
                file handle.
1705

1706
        """
1707
        if isinstance(f, str):
8✔
1708
            f = open(f, "w")
×
1709
        f.write("VARIOGRAM {0}\n".format(self.name))
8✔
1710
        f.write("  VARTYPE {0}\n".format(self.vartype))
8✔
1711
        f.write("  A {0}\n".format(self.a))
8✔
1712
        f.write("  ANISOTROPY {0}\n".format(self.anisotropy))
8✔
1713
        f.write("  BEARING {0}\n".format(self.bearing))
8✔
1714
        f.write("END VARIOGRAM\n\n")
8✔
1715

1716
    @property
9✔
1717
    def bearing_rads(self):
9✔
1718
        """get the bearing of the Vario2d in radians
1719

1720
        Returns:
1721
            `float`: the Vario2d bearing in radians
1722
        """
1723
        return (np.pi / 180.0) * (90.0 - self.bearing)
9✔
1724

1725
    @property
9✔
1726
    def rotation_coefs(self):
9✔
1727
        """get the rotation coefficents in radians
1728

1729
        Returns:
1730
            [`float`]: the rotation coefficients implied by `Vario2d.bearing`
1731

1732

1733
        """
1734
        return [
9✔
1735
            np.cos(self.bearing_rads),
1736
            np.sin(self.bearing_rads),
1737
            -1.0 * np.sin(self.bearing_rads),
1738
            np.cos(self.bearing_rads),
1739
        ]
1740

1741
    def inv_h(self, h):
9✔
1742
        """the inverse of the h_function.  Used for plotting
1743

1744
        Args:
1745
            h (`float`): the value of h_function to invert
1746

1747
        Returns:
1748
            `float`: the inverse of h
1749

1750
        """
1751
        return self.contribution - self._h_function(h)
1✔
1752

1753
    def plot(self, **kwargs):
9✔
1754
        """get a cheap plot of the Vario2d
1755

1756
        Args:
1757
            **kwargs (`dict`): keyword arguments to use for plotting
1758

1759
        Returns:
1760
            `matplotlib.pyplot.axis`
1761

1762
        Note:
1763
            optional arguments in kwargs include
1764
            "ax" (existing `matplotlib.pyplot.axis`).  Other
1765
            kwargs are passed to `matplotlib.pyplot.plot()`
1766

1767
        """
1768
        try:
×
1769
            import matplotlib.pyplot as plt
×
1770
        except Exception as e:
×
1771
            raise Exception("error importing matplotlib: {0}".format(str(e)))
×
1772

1773
        ax = kwargs.pop("ax", plt.subplot(111))
×
1774
        x = np.linspace(0, self.a * 3, 100)
×
1775
        y = self.inv_h(x)
×
1776
        ax.set_xlabel("distance")
×
1777
        ax.set_ylabel(r"$\gamma$")
×
1778
        ax.plot(x, y, **kwargs)
×
1779
        return ax
×
1780

1781
    def covariance_matrix(self, x, y, names=None, cov=None):
9✔
1782
        """build a pyemu.Cov instance implied by Vario2d
1783

1784
        Args:
1785
            x ([`float`]): x-coordinate locations
1786
            y ([`float`]): y-coordinate locations
1787
            names ([`str`]): names of locations. If None, cov must not be None
1788
            cov (`pyemu.Cov`): an existing Cov instance.  Vario2d contribution is added to cov
1789
            in place
1790

1791
        Returns:
1792
            `pyemu.Cov`: the covariance matrix for `x`, `y` implied by `Vario2d`
1793

1794
        Note:
1795
            either `names` or `cov` must not be None.
1796

1797
        """
1798
        if not isinstance(x, np.ndarray):
9✔
1799
            x = np.array(x)
8✔
1800
        if not isinstance(y, np.ndarray):
9✔
1801
            y = np.array(y)
8✔
1802
        assert x.shape[0] == y.shape[0]
9✔
1803

1804
        if names is not None:
9✔
1805
            assert x.shape[0] == len(names)
8✔
1806
            c = np.zeros((len(names), len(names)))
8✔
1807
            np.fill_diagonal(c, self.contribution)
8✔
1808
            cov = Cov(x=c, names=names)
8✔
1809
        elif cov is not None:
9✔
1810
            assert cov.shape[0] == x.shape[0]
9✔
1811
            names = cov.row_names
9✔
1812
            c = np.zeros((len(names), 1)) + self.contribution
9✔
1813
            cont = Cov(x=c, names=names, isdiagonal=True)
9✔
1814
            cov += cont
9✔
1815

1816
        else:
1817
            raise Exception(
×
1818
                "Vario2d.covariance_matrix() requires either" + "names or cov arg"
1819
            )
1820
        rc = self.rotation_coefs
9✔
1821
        for i1, (n1, x1, y1) in enumerate(zip(names, x, y)):
9✔
1822
            dx = x1 - x[i1 + 1 :]
9✔
1823
            dy = y1 - y[i1 + 1 :]
9✔
1824
            dxx, dyy = self._apply_rotation(dx, dy)
9✔
1825
            h = np.sqrt(dxx * dxx + dyy * dyy)
9✔
1826

1827
            h[h < 0.0] = 0.0
9✔
1828
            h = self._h_function(h)
9✔
1829
            if np.any(np.isnan(h)):
9✔
1830
                raise Exception("nans in h for i1 {0}".format(i1))
×
1831
            cov.x[i1, i1 + 1 :] += h
9✔
1832
        for i in range(len(names)):
9✔
1833
            cov.x[i + 1 :, i] = cov.x[i, i + 1 :]
9✔
1834
        return cov
9✔
1835

1836
    def _specsim_grid_contrib(self, grid):
9✔
1837
        rot_grid = grid
9✔
1838
        if self.bearing % 90.0 != 0:
9✔
1839
            dx, dy = self._apply_rotation(grid[0, :, :], grid[1, :, :])
×
1840
            rot_grid = np.array((dx, dy))
×
1841
        h = ((rot_grid ** 2).sum(axis=0)) ** 0.5
9✔
1842
        c = self._h_function(h)
9✔
1843
        return c
9✔
1844

1845
    def _apply_rotation(self, dx, dy):
9✔
1846
        """private method to rotate points
1847
        according to Vario2d.bearing and Vario2d.anisotropy
1848

1849

1850
        """
1851
        if self.anisotropy == 1.0:
9✔
1852
            return dx, dy
9✔
1853
        rcoefs = self.rotation_coefs
8✔
1854
        dxx = (dx * rcoefs[0]) + (dy * rcoefs[1])
8✔
1855
        dyy = ((dx * rcoefs[2]) + (dy * rcoefs[3])) * self.anisotropy
8✔
1856
        return dxx, dyy
8✔
1857

1858
    def covariance_points(self, x0, y0, xother, yother):
9✔
1859
        """get the covariance between base point (x0,y0) and
1860
        other points xother,yother implied by `Vario2d`
1861

1862
        Args:
1863
            x0 (`float`): x-coordinate
1864
            y0 (`float`): y-coordinate
1865
            xother ([`float`]): x-coordinates of other points
1866
            yother ([`float`]): y-coordinates of other points
1867

1868
        Returns:
1869
            `numpy.ndarray`: a 1-D array of covariance between point x0,y0 and the
1870
            points contained in xother, yother.  len(cov) = len(xother) =
1871
            len(yother)
1872

1873
        """
1874
        dxx = x0 - xother
9✔
1875
        dyy = y0 - yother
9✔
1876
        dxx, dyy = self._apply_rotation(dxx, dyy)
9✔
1877
        h = np.sqrt(dxx * dxx + dyy * dyy)
9✔
1878
        return self._h_function(h)
9✔
1879

1880
    def covariance(self, pt0, pt1):
9✔
1881
        """get the covarince between two points implied by Vario2d
1882

1883
        Args:
1884
            pt0 : ([`float`]): first point x and y
1885
            pt1 : ([`float`]): second point x and y
1886

1887
        Returns:
1888
            `float`: covariance between pt0 and pt1
1889

1890
        """
1891

1892
        x = np.array([pt0[0], pt1[0]])
8✔
1893
        y = np.array([pt0[1], pt1[1]])
8✔
1894
        names = ["n1", "n2"]
8✔
1895
        return self.covariance_matrix(x, y, names=names).x[0, 1]
8✔
1896

1897
    def __str__(self):
9✔
1898
        """get the str representation of Vario2d
1899

1900
        Returns:
1901
            `str`: string rep
1902
        """
1903
        s = "name:{0},contribution:{1},a:{2},anisotropy:{3},bearing:{4}\n".format(
9✔
1904
            self.name, self.contribution, self.a, self.anisotropy, self.bearing
1905
        )
1906
        return s
9✔
1907

1908

1909
class ExpVario(Vario2d):
9✔
1910
    """Exponential variogram derived type
9✔
1911

1912
    Args:
1913
        contribution (float): sill of the variogram
1914
        a (`float`): (practical) range of correlation
1915
        anisotropy (`float`, optional): Anisotropy ratio. Default is 1.0
1916
        bearing : (`float`, optional): angle in degrees East of North corresponding
1917
            to anisotropy ellipse. Default is 0.0
1918
        name (`str`, optinoal): name of the variogram.  Default is "var1"
1919

1920
    Example::
1921

1922
        v = pyemu.utils.geostats.ExpVario(a=1000,contribution=1.0)
1923

1924

1925
    """
1926

1927
    def __init__(self, contribution, a, anisotropy=1.0, bearing=0.0, name="var1"):
9✔
1928
        super(ExpVario, self).__init__(
9✔
1929
            contribution, a, anisotropy=anisotropy, bearing=bearing, name=name
1930
        )
1931
        self.vartype = 2
9✔
1932

1933
    def _h_function(self, h):
9✔
1934
        """private method exponential variogram "h" function"""
1935
        return self.contribution * np.exp(-1.0 * h / self.a)
9✔
1936

1937

1938
class GauVario(Vario2d):
9✔
1939
    """Gaussian variogram derived type
9✔
1940

1941
    Args:
1942
        contribution (float): sill of the variogram
1943
        a (`float`): (practical) range of correlation
1944
        anisotropy (`float`, optional): Anisotropy ratio. Default is 1.0
1945
        bearing : (`float`, optional): angle in degrees East of North corresponding
1946
            to anisotropy ellipse. Default is 0.0
1947
        name (`str`, optinoal): name of the variogram.  Default is "var1"
1948

1949
    Example::
1950

1951
        v = pyemu.utils.geostats.GauVario(a=1000,contribution=1.0)
1952

1953
    Note:
1954
        the Gaussian variogram can be unstable (not invertible) for long ranges.
1955

1956
    """
1957

1958
    def __init__(self, contribution, a, anisotropy=1.0, bearing=0.0, name="var1"):
9✔
1959
        super(GauVario, self).__init__(
8✔
1960
            contribution, a, anisotropy=anisotropy, bearing=bearing, name=name
1961
        )
1962
        self.vartype = 3
8✔
1963

1964
    def _h_function(self, h):
9✔
1965
        """private method for the gaussian variogram "h" function"""
1966

1967
        hh = -1.0 * (h * h) / (self.a * self.a)
8✔
1968
        return self.contribution * np.exp(hh)
8✔
1969

1970

1971
class SphVario(Vario2d):
9✔
1972
    """Spherical variogram derived type
9✔
1973

1974
    Args:
1975
         contribution (float): sill of the variogram
1976
         a (`float`): (practical) range of correlation
1977
         anisotropy (`float`, optional): Anisotropy ratio. Default is 1.0
1978
         bearing : (`float`, optional): angle in degrees East of North corresponding
1979
             to anisotropy ellipse. Default is 0.0
1980
         name (`str`, optinoal): name of the variogram.  Default is "var1"
1981

1982
     Example::
1983

1984
         v = pyemu.utils.geostats.SphVario(a=1000,contribution=1.0)
1985

1986
    """
1987

1988
    def __init__(self, contribution, a, anisotropy=1.0, bearing=0.0, name="var1"):
9✔
1989
        super(SphVario, self).__init__(
8✔
1990
            contribution, a, anisotropy=anisotropy, bearing=bearing, name=name
1991
        )
1992
        self.vartype = 1
8✔
1993

1994
    def _h_function(self, h):
9✔
1995
        """private method for the spherical variogram "h" function"""
1996

1997
        hh = h / self.a
8✔
1998
        h = self.contribution * (1.0 - (hh * (1.5 - (0.5 * hh * hh))))
8✔
1999
        h[hh > 1.0] = 0.0
8✔
2000
        return h
8✔
2001
        # try:
2002
        #     h[hh < 1.0] = 0.0
2003
        #
2004
        # except TypeError:
2005
        #     if hh > 0.0:
2006
        #         h = 0.0
2007
        # return h
2008
        # if hh < 1.0:
2009
        #     return self.contribution * (1.0 - (hh * (1.5 - (0.5 * hh * hh))))
2010
        # else:
2011
        #     return 0.0
2012

2013

2014
def read_struct_file(struct_file, return_type=GeoStruct):
9✔
2015
    """read an existing PEST-type structure file into a GeoStruct instance
2016

2017
    Args:
2018
        struct_file (`str`): existing pest-type structure file
2019
        return_type (`object`): the instance type to return.  Default is GeoStruct
2020

2021
    Returns:
2022
        [`pyemu.GeoStruct`]: list of `GeoStruct` instances.  If
2023
        only one `GeoStruct` is in the file, then a `GeoStruct` is returned
2024

2025

2026
    Example::
2027

2028
        gs = pyemu.utils.geostats.reads_struct_file("struct.dat")
2029

2030

2031
    """
2032

2033
    VARTYPE = {1: SphVario, 2: ExpVario, 3: GauVario, 4: None}
8✔
2034
    assert os.path.exists(struct_file)
8✔
2035
    structures = []
8✔
2036
    variograms = []
8✔
2037
    with open(struct_file, "r") as f:
8✔
2038
        while True:
4✔
2039
            line = f.readline()
8✔
2040
            if line == "":
8✔
2041
                break
8✔
2042
            line = line.strip().lower()
8✔
2043
            if line.startswith("structure"):
8✔
2044
                name = line.strip().split()[1]
8✔
2045
                nugget, transform, variogram_info = _read_structure_attributes(f)
8✔
2046
                s = return_type(nugget=nugget, transform=transform, name=name)
8✔
2047
                s.variogram_info = variogram_info
8✔
2048
                # not sure what is going on, but if I don't copy s here,
2049
                # all the structures end up sharing all the variograms later
2050
                structures.append(copy.deepcopy(s))
8✔
2051
            elif line.startswith("variogram"):
8✔
2052
                name = line.strip().split()[1].lower()
8✔
2053
                vartype, bearing, a, anisotropy = _read_variogram(f)
8✔
2054
                if name in variogram_info:
8✔
2055
                    v = VARTYPE[vartype](
8✔
2056
                        variogram_info[name],
2057
                        a,
2058
                        anisotropy=anisotropy,
2059
                        bearing=bearing,
2060
                        name=name,
2061
                    )
2062
                    variograms.append(v)
8✔
2063

2064
    for i, st in enumerate(structures):
8✔
2065
        for vname in st.variogram_info:
8✔
2066
            vfound = None
8✔
2067
            for v in variograms:
8✔
2068
                if v.name == vname:
8✔
2069
                    vfound = v
8✔
2070
                    break
8✔
2071
            if vfound is None:
8✔
2072
                raise Exception(
×
2073
                    "variogram {0} not found for structure {1}".format(vname, s.name)
2074
                )
2075

2076
            st.variograms.append(vfound)
8✔
2077
    if len(structures) == 1:
8✔
2078
        return structures[0]
8✔
2079
    return structures
8✔
2080

2081

2082
def _read_variogram(f):
9✔
2083
    """Function to instantiate a Vario2d from a PEST-style structure file"""
2084

2085
    line = ""
8✔
2086
    vartype = None
8✔
2087
    bearing = 0.0
8✔
2088
    a = None
8✔
2089
    anisotropy = 1.0
8✔
2090
    while "end variogram" not in line:
8✔
2091
        line = f.readline()
8✔
2092
        if line == "":
8✔
2093
            raise Exception("EOF while read variogram")
×
2094
        line = line.strip().lower().split()
8✔
2095
        if line[0].startswith("#"):
8✔
2096
            continue
×
2097
        if line[0] == "vartype":
8✔
2098
            vartype = int(line[1])
8✔
2099
        elif line[0] == "bearing":
8✔
2100
            bearing = float(line[1])
8✔
2101
        elif line[0] == "a":
8✔
2102
            a = float(line[1])
8✔
2103
        elif line[0] == "anisotropy":
8✔
2104
            anisotropy = float(line[1])
8✔
2105
        elif line[0] == "end":
8✔
2106
            break
8✔
2107
        else:
2108
            raise Exception("unrecognized arg in variogram:{0}".format(line[0]))
×
2109
    return vartype, bearing, a, anisotropy
8✔
2110

2111

2112
def _read_structure_attributes(f):
9✔
2113
    """function to read information from a PEST-style structure file"""
2114

2115
    line = ""
8✔
2116
    variogram_info = {}
8✔
2117
    while "end structure" not in line:
8✔
2118
        line = f.readline()
8✔
2119
        if line == "":
8✔
2120
            raise Exception("EOF while reading structure")
×
2121
        line = line.strip().lower().split()
8✔
2122
        if line[0].startswith("#"):
8✔
2123
            continue
×
2124
        if line[0] == "nugget":
8✔
2125
            nugget = float(line[1])
8✔
2126
        elif line[0] == "transform":
8✔
2127
            transform = line[1]
8✔
2128
        elif line[0] == "numvariogram":
8✔
2129
            numvariograms = int(line[1])
8✔
2130
        elif line[0] == "variogram":
8✔
2131
            variogram_info[line[1]] = float(line[2])
8✔
2132
        elif line[0] == "end":
8✔
2133
            break
8✔
2134
        elif line[0] == "mean":
×
2135
            warnings.warn("'mean' attribute not supported, skipping", PyemuWarning)
×
2136
        else:
2137
            raise Exception(
×
2138
                "unrecognized line in structure definition:{0}".format(line[0])
2139
            )
2140
    assert numvariograms == len(variogram_info)
8✔
2141
    return nugget, transform, variogram_info
8✔
2142

2143

2144
def read_sgems_variogram_xml(xml_file, return_type=GeoStruct):
9✔
2145
    """function to read an SGEMS-type variogram XML file into
2146
    a `GeoStruct`
2147

2148
    Args:
2149
        xml_file (`str`): SGEMS variogram XML file
2150
        return_type (`object`): the instance type to return.  Default is `GeoStruct`
2151

2152
    Returns:
2153
        gs : `GeoStruct`
2154

2155

2156
    Example::
2157

2158
        gs = pyemu.utils.geostats.read_sgems_variogram_xml("sgems.xml")
2159

2160
    """
2161
    try:
8✔
2162
        import xml.etree.ElementTree as ET
8✔
2163

2164
    except Exception as e:
×
2165
        print("error import elementtree, skipping...")
×
2166
    VARTYPE = {1: SphVario, 2: ExpVario, 3: GauVario, 4: None}
8✔
2167
    assert os.path.exists(xml_file)
8✔
2168
    tree = ET.parse(xml_file)
8✔
2169
    gs_model = tree.getroot()
8✔
2170
    structures = []
8✔
2171
    variograms = []
8✔
2172
    nugget = 0.0
8✔
2173
    num_struct = 0
8✔
2174
    for key, val in gs_model.items():
8✔
2175
        # print(key,val)
2176
        if str(key).lower() == "nugget":
8✔
2177
            if len(val) > 0:
8✔
2178
                nugget = float(val)
8✔
2179
        if str(key).lower() == "structures_count":
8✔
2180
            num_struct = int(val)
8✔
2181
    if num_struct == 0:
8✔
2182
        raise Exception("no structures found")
×
2183
    if num_struct != 1:
8✔
2184
        raise NotImplementedError()
×
2185
    for structure in gs_model:
8✔
2186
        vtype, contribution = None, None
8✔
2187
        mx_range, mn_range = None, None
8✔
2188
        x_angle, y_angle = None, None
8✔
2189
        # struct_name = structure.tag
2190
        for key, val in structure.items():
8✔
2191
            key = str(key).lower()
8✔
2192
            if key == "type":
8✔
2193
                vtype = str(val).lower()
8✔
2194
                if vtype.startswith("sph"):
8✔
2195
                    vtype = SphVario
8✔
2196
                elif vtype.startswith("exp"):
×
2197
                    vtype = ExpVario
×
2198
                elif vtype.startswith("gau"):
×
2199
                    vtype = GauVario
×
2200
                else:
2201
                    raise Exception("unrecognized variogram type:{0}".format(vtype))
×
2202

2203
            elif key == "contribution":
8✔
2204
                contribution = float(val)
8✔
2205
            for item in structure:
8✔
2206
                if item.tag.lower() == "ranges":
8✔
2207
                    mx_range = float(item.attrib["max"])
8✔
2208
                    mn_range = float(item.attrib["min"])
8✔
2209
                elif item.tag.lower() == "angles":
8✔
2210
                    x_angle = float(item.attrib["x"])
8✔
2211
                    y_angle = float(item.attrib["y"])
8✔
2212

2213
        assert contribution is not None
8✔
2214
        assert mn_range is not None
8✔
2215
        assert mx_range is not None
8✔
2216
        assert x_angle is not None
8✔
2217
        assert y_angle is not None
8✔
2218
        assert vtype is not None
8✔
2219
        v = vtype(
8✔
2220
            contribution=contribution,
2221
            a=mx_range,
2222
            anisotropy=mx_range / mn_range,
2223
            bearing=(180.0 / np.pi) * np.arctan2(x_angle, y_angle),
2224
            name=structure.tag,
2225
        )
2226
        return GeoStruct(nugget=nugget, variograms=[v])
8✔
2227

2228

2229
def gslib_2_dataframe(filename, attr_name=None, x_idx=0, y_idx=1):
9✔
2230
    """function to read a GSLIB point data file into a pandas.DataFrame
2231

2232
    Args:
2233
        filename (`str`): GSLIB file
2234
        attr_name (`str`): the column name in the dataframe for the attribute.
2235
            If None, GSLIB file can have only 3 columns.  `attr_name` must be in
2236
            the GSLIB file header
2237
        x_idx (`int`): the index of the x-coordinate information in the GSLIB file. Default is
2238
            0 (first column)
2239
        y_idx (`int`): the index of the y-coordinate information in the GSLIB file.
2240
            Default is 1 (second column)
2241

2242
    Returns:
2243
        `pandas.DataFrame`: a dataframe of info from the GSLIB file
2244

2245
    Note:
2246
        assigns generic point names ("pt0, pt1, etc)
2247

2248
    Example::
2249

2250
        df = pyemu.utiils.geostats.gslib_2_dataframe("prop.gslib",attr_name="hk")
2251

2252

2253
    """
2254
    with open(filename, "r") as f:
8✔
2255
        title = f.readline().strip()
8✔
2256
        num_attrs = int(f.readline().strip())
8✔
2257
        attrs = [f.readline().strip() for _ in range(num_attrs)]
8✔
2258
        if attr_name is not None:
8✔
2259
            assert attr_name in attrs, "{0} not in attrs:{1}".format(
×
2260
                attr_name, ",".join(attrs)
2261
            )
2262
        else:
2263
            assert (
8✔
2264
                len(attrs) == 3
2265
            ), "propname is None but more than 3 attrs in gslib file"
2266
            attr_name = attrs[2]
8✔
2267
        assert len(attrs) > x_idx
8✔
2268
        assert len(attrs) > y_idx
8✔
2269
        a_idx = attrs.index(attr_name)
8✔
2270
        x, y, a = [], [], []
8✔
2271
        while True:
4✔
2272
            line = f.readline()
8✔
2273
            if line == "":
8✔
2274
                break
8✔
2275
            raw = line.strip().split()
8✔
2276
            try:
8✔
2277
                x.append(float(raw[x_idx]))
8✔
2278
                y.append(float(raw[y_idx]))
8✔
2279
                a.append(float(raw[a_idx]))
8✔
2280
            except Exception as e:
×
2281
                raise Exception("error paring line {0}: {1}".format(line, str(e)))
×
2282
    df = pd.DataFrame({"x": x, "y": y, "value": a})
8✔
2283
    df.loc[:, "name"] = ["pt{0}".format(i) for i in range(df.shape[0])]
8✔
2284
    df.index = df.name
8✔
2285
    return df
8✔
2286

2287

2288
# class ExperimentalVariogram(object):
2289
#    def __init__(self,na)
2290

2291

2292
def load_sgems_exp_var(filename):
9✔
2293
    """read an SGEM experimental variogram into a sequence of
2294
    pandas.DataFrames
2295

2296
    Args:
2297
        filename (`str`): an SGEMS experimental variogram XML file
2298

2299
    Returns:
2300
        [`pandas.DataFrame`]: a list of pandas.DataFrames of x, y, pairs for each
2301
        division in the experimental variogram
2302

2303
    """
2304

2305
    assert os.path.exists(filename)
8✔
2306
    import xml.etree.ElementTree as etree
8✔
2307

2308
    tree = etree.parse(filename)
8✔
2309
    root = tree.getroot()
8✔
2310
    dfs = {}
8✔
2311
    for variogram in root:
8✔
2312
        # print(variogram.tag)
2313
        for attrib in variogram:
8✔
2314

2315
            # print(attrib.tag,attrib.text)
2316
            if attrib.tag == "title":
8✔
2317
                title = attrib.text.split(",")[0].split("=")[-1]
8✔
2318
            elif attrib.tag == "x":
8✔
2319
                x = [float(i) for i in attrib.text.split()]
8✔
2320
            elif attrib.tag == "y":
8✔
2321
                y = [float(i) for i in attrib.text.split()]
8✔
2322
            elif attrib.tag == "pairs":
8✔
2323
                pairs = [int(i) for i in attrib.text.split()]
8✔
2324

2325
            for item in attrib:
8✔
2326
                print(item, item.tag)
×
2327
        df = pd.DataFrame({"x": x, "y": y, "pairs": pairs})
8✔
2328
        df.loc[df.y < 0.0, "y"] = np.NaN
8✔
2329
        dfs[title] = df
8✔
2330
    return dfs
8✔
2331

2332

2333
def fac2real(
9✔
2334
    pp_file=None,
2335
    factors_file="factors.dat",
2336
    out_file="test.ref",
2337
    upper_lim=1.0e30,
2338
    lower_lim=-1.0e30,
2339
    fill_value=1.0e30,
2340
):
2341
    """A python replication of the PEST fac2real utility for creating a
2342
    structure grid array from previously calculated kriging factors (weights)
2343

2344
    Args:
2345
        pp_file (`str`): PEST-type pilot points file
2346
        factors_file (`str`): PEST-style factors file
2347
        out_file (`str`): filename of array to write.  If None, array is returned, else
2348
            value of out_file is returned.  Default is "test.ref".
2349
        upper_lim (`float`): maximum interpolated value in the array.  Values greater than
2350
            `upper_lim` are set to fill_value
2351
        lower_lim (`float`): minimum interpolated value in the array.  Values less than
2352
            `lower_lim` are set to fill_value
2353
        fill_value (`float`): the value to assign array nodes that are not interpolated
2354

2355

2356
    Returns:
2357
        `numpy.ndarray`: if out_file is None
2358

2359
        `str`: if out_file it not None
2360

2361
    Example::
2362

2363
        pyemu.utils.geostats.fac2real("hkpp.dat",out_file="hk_layer_1.ref")
2364

2365
    """
2366

2367
    if pp_file is not None and isinstance(pp_file, str):
9✔
2368
        assert os.path.exists(pp_file)
9✔
2369
        # pp_data = pd.read_csv(pp_file,delim_whitespace=True,header=None,
2370
        #                       names=["name","parval1"],usecols=[0,4])
2371
        pp_data = pp_file_to_dataframe(pp_file)
9✔
2372
        pp_data.loc[:, "name"] = pp_data.name.apply(lambda x: x.lower())
9✔
2373
    elif pp_file is not None and isinstance(pp_file, pd.DataFrame):
8✔
2374
        assert "name" in pp_file.columns
8✔
2375
        assert "parval1" in pp_file.columns
8✔
2376
        pp_data = pp_file
8✔
2377
    else:
2378
        raise Exception(
×
2379
            "unrecognized pp_file arg: must be str or pandas.DataFrame, not {0}".format(
2380
                type(pp_file)
2381
            )
2382
        )
2383
    assert os.path.exists(factors_file), "factors file not found"
9✔
2384
    f_fac = open(factors_file, "r")
9✔
2385
    fpp_file = f_fac.readline()
9✔
2386
    if pp_file is None and pp_data is None:
9✔
2387
        pp_data = pp_file_to_dataframe(fpp_file)
×
2388
        pp_data.loc[:, "name"] = pp_data.name.apply(lambda x: x.lower())
×
2389

2390
    fzone_file = f_fac.readline()
9✔
2391
    ncol, nrow = [int(i) for i in f_fac.readline().strip().split()]
9✔
2392
    npp = int(f_fac.readline().strip())
9✔
2393
    pp_names = [f_fac.readline().strip().lower() for _ in range(npp)]
9✔
2394

2395
    # check that pp_names is sync'd with pp_data
2396
    diff = set(list(pp_data.name)).symmetric_difference(set(pp_names))
9✔
2397
    if len(diff) > 0:
9✔
2398
        raise Exception(
×
2399
            "the following pilot point names are not common "
2400
            + "between the factors file and the pilot points file "
2401
            + ",".join(list(diff))
2402
        )
2403

2404
    arr = np.zeros((nrow, ncol), dtype=np.float64) + fill_value
9✔
2405
    pp_dict = {int(name): val for name, val in zip(pp_data.index, pp_data.parval1)}
9✔
2406
    try:
9✔
2407
        pp_dict_log = {
9✔
2408
            name: np.log10(val) for name, val in zip(pp_data.index, pp_data.parval1)
2409
        }
2410
    except:
×
2411
        pp_dict_log = {}
×
2412
    # for i in range(nrow):
2413
    #    for j in range(ncol):
2414
    while True:
4✔
2415
        line = f_fac.readline()
9✔
2416
        if len(line) == 0:
9✔
2417
            # raise Exception("unexpected EOF in factors file")
2418
            break
9✔
2419
        try:
9✔
2420
            inode, itrans, fac_data = _parse_factor_line(line)
9✔
2421
        except Exception as e:
×
2422
            raise Exception("error parsing factor line {0}:{1}".format(line, str(e)))
×
2423
        # fac_prods = [pp_data.loc[pp,"value"]*fac_data[pp] for pp in fac_data]
2424
        if itrans == 0:
9✔
2425
            fac_sum = sum([pp_dict[pp] * fac_data[pp] for pp in fac_data])
8✔
2426
        else:
2427
            fac_sum = sum([pp_dict_log[pp] * fac_data[pp] for pp in fac_data])
9✔
2428
        if itrans != 0:
9✔
2429
            fac_sum = 10 ** fac_sum
9✔
2430
        # col = ((inode - 1) // nrow) + 1
2431
        # row = inode - ((col - 1) * nrow)
2432
        row = ((inode - 1) // ncol) + 1
9✔
2433
        col = inode - ((row - 1) * ncol)
9✔
2434
        # arr[row-1,col-1] = np.sum(np.array(fac_prods))
2435
        arr[row - 1, col - 1] = fac_sum
9✔
2436
    arr[arr < lower_lim] = lower_lim
9✔
2437
    arr[arr > upper_lim] = upper_lim
9✔
2438

2439
    # print(out_file,arr.min(),pp_data.parval1.min(),lower_lim)
2440

2441
    if out_file is not None:
9✔
2442
        np.savetxt(out_file, arr, fmt="%15.6E", delimiter="")
9✔
2443
        return out_file
9✔
2444
    return arr
8✔
2445

2446

2447
def _parse_factor_line(line):
9✔
2448
    """function to parse a factor file line.  Used by fac2real()"""
2449

2450
    raw = line.strip().split()
9✔
2451
    inode, itrans, nfac = [int(i) for i in raw[:3]]
9✔
2452
    fac_data = {
9✔
2453
        int(raw[ifac]) - 1: float(raw[ifac + 1]) for ifac in range(4, 4 + nfac * 2, 2)
2454
    }
2455
    # fac_data = {}
2456
    # for ifac in range(4,4+nfac*2,2):
2457
    #     pnum = int(raw[ifac]) - 1 #zero based to sync with pandas
2458
    #     fac = float(raw[ifac+1])
2459
    #     fac_data[pnum] = fac
2460
    return inode, itrans, fac_data
9✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc