• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mcuntz / hesseflux / 9537697060

16 Jun 2024 05:21PM UTC coverage: 3.779% (+0.03%) from 3.746%
9537697060

push

github

mcuntz
Removed all warning with new pandas and numpy versions

1 of 161 new or added lines in 4 files covered. (0.62%)

9 existing lines in 2 files now uncovered.

39 of 1032 relevant lines covered (3.78%)

0.42 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

6.03
/src/hesseflux/madspikes.py
1
#!/usr/bin/env python
2
"""
11✔
3
Spike detection using a moving median absolute difference filter
4

5
This module was original written by Tino Rau and Matthias Cuntz, and
6
maintained by Arndt Piayda while at Department of Computational
7
Hydrosystems, Helmholtz Centre for Environmental Research - UFZ,
8
Leipzig, Germany, and continued by Matthias Cuntz while at Institut
9
National de Recherche pour l'Agriculture, l'Alimentation et
10
l'Environnement (INRAE), Nancy, France.
11

12
:copyright: Copyright 2008-2022 Matthias Cuntz, see AUTHORS.rst for details.
13
:license: MIT License, see LICENSE for details.
14

15
.. moduleauthor:: Matthias Cuntz, Arndt Piayda, Tino Rau
16

17
The following functions are provided
18

19
.. autosummary::
20
   madspikes
21

22
History
23
    * Written 2008 by Tino Rau and Matthias Cuntz - mc (at) macu (dot) de
24
    * Maintained by Arndt Piayda since Aug 2014.
25
    * Input can be pandas Dataframe or numpy array(s), Apr 2020, Matthias Cuntz
26
    * Removed iteration, Apr 2020, Matthias Cuntz
27
    * Using numpy docstring format, May 2020, Matthias Cuntz
28
    * Improved flake8 and numpy docstring, Oct 2021, Matthias Cuntz
29
    * Removed np.float and np.bool, Jun 2024, Matthias Cuntz
30

31
"""
32
from __future__ import division, absolute_import, print_function
11✔
33
import numpy as np
11✔
34
import pandas as pd
11✔
35
from pyjams import mad
11✔
36

37

38
__all__ = ['madspikes']
11✔
39

40

41
def madspikes(dfin, flag=None, isday=None,
11✔
42
              colhead=None, undef=-9999,
43
              nscan=15 * 48, nfill=1 * 48,
44
              z=7, deriv=2, swthr=10.,
45
              plot=False):
46
    """
47
    Spike detection using a moving median absolute difference filter
48

49
    Used with Eddy vovariance data in Papale et al. (Biogeosciences, 2006).
50

51
    Parameters
52
    ----------
53
    dfin : pandas.Dataframe or numpy.array
54
        time series of data where spike detection with MAD should be applied.
55
        *dfin* can be a pandas.Dataframe.
56
        *dfin* can also me a numpy array. In this case *colhead* must be given.
57
        MAD will be applied along axis=0, i.e. on each column of axis=1.
58
    flag : pandas.Dataframe or numpy.array, optional
59
        Dataframe or array has the same shape as *dfin*. Non-zero values in
60
        *flag* will be treated as missing values in *dfin*.
61
        If *flag* is numpy array, *df.columns.values* will be used as column
62
        heads.
63
    isday : array_like of bool, optional
64
        True when it is day, False when night. Must have the same length as
65
        `dfin.shape[0]`. If *isday* is not given, *dfin* must have a column
66
        with head 'SW_IN' or starting with 'SW_IN'. *isday* will then be
67
        `dfin['SW_IN'] > swthr`.
68
    colhed : array_like of str, optional
69
        column names if *dfin* is numpy array.
70
    undef : float, optional
71
        values having *undef* value are treated as missing values in *dfin*
72
        (default: -9999). np.nan as *undef* is not allowed (not working).
73
    nscan : int, optional
74
        size of moving window to calculate mad in time steps (default: 15*48)
75
    nfill : int, optional
76
        step size of moving window to calculate mad in time steps
77
        (default: 1*48). MAD will be calculated in *nscan* time window.
78
        Resulting mask will be applied only in *nfill* window in the middle
79
        of the *nscan* window. Then *nscan* window will be moved by *nfill*
80
        time steps.
81
    z : float, optional
82
        Input is allowed to deviate maximum *z* standard deviations from the
83
        median (default: 7)
84
    deriv : int, optional
85
        0: Act on raw input.
86

87
        1: Use first derivatives.
88

89
        2: Use 2nd derivatives (default).
90
    swthr : float, optional
91
        Threshold to determine daytime from incoming shortwave radiation if
92
        *isday* not given (default: 10).
93
    plot : bool, optional
94
        True: data and spikes are plotted into madspikes.pdf (default: False).
95

96
    Returns
97
    -------
98
    pandas.Dataframe or numpy array
99
        flags, 0 everywhere except detected spikes set to 2
100

101
    """
102
    # numpy or panda
103
    if isinstance(dfin, (np.ndarray, np.ma.MaskedArray)):
×
104
        isnumpy = True
×
105
        istrans = False
×
106
        astr = 'colhead must be given if input is numpy.ndarray.'
×
107
        assert colhead is not None, astr
×
108
        if dfin.shape[0] == len(colhead):
×
109
            istrans = True
×
110
            df = pd.DataFrame(dfin.T, columns=colhead)
×
111
        elif dfin.shape[1] == len(colhead):
×
112
            df = pd.DataFrame(dfin, columns=colhead)
×
113
        else:
114
            estr = ('Length of colhead must be number of columns in input'
×
115
                    'array. len(colhead)=' + str(len(colhead)) +
116
                    ' shape(input)=(' + str(dfin.shape[0]) + ',' +
117
                    str(dfin.shape[1]) + ').')
118
            raise ValueError(estr)
×
119
    else:
120
        isnumpy = False
×
121
        istrans = False
×
122
        astr = 'Input must be either numpy.ndarray or pandas.DataFrame.'
×
123
        assert isinstance(dfin, pd.core.frame.DataFrame), astr
×
124
        df = dfin.copy(deep=True)
×
125

126
    # Incoming flags
127
    if flag is not None:
×
128
        if isinstance(flag, (np.ndarray, np.ma.MaskedArray)):
×
129
            fisnumpy = True
×
130
            fistrans = False
×
131
            if flag.shape[0] == len(df):
×
132
                ff = pd.DataFrame(flag, columns=df.columns.values)
×
133
            elif flag.shape[1] == len(df):
×
134
                fistrans = True
×
135
                ff = pd.DataFrame(flag.T, columns=df.columns.values)
×
136
            else:
137
                estr = ('flag must have same shape as data array. data:'
×
138
                        ' ({:d},{:d}); flag: ({:d},{:d})'.format(
139
                            dfin.shape[0], dfin.shape[1], flag.shape[0],
140
                            flag.shape[1]))
141
                raise ValueError(estr)
×
142
            ff = ff.set_index(df.index)
×
143
        else:
144
            fisnumpy = False
×
145
            fistrans = False
×
146
            astr = 'Flag must be either numpy.ndarray or pandas.DataFrame.'
×
147
            assert isinstance(flag, pd.core.frame.DataFrame), astr
×
148
            ff = flag.copy(deep=True)
×
149
    else:
150
        fisnumpy = isnumpy
×
151
        fistrans = istrans
×
152
        # flags: 0: good; 1: input flagged; 2: output flagged
153
        ff = df.copy(deep=True).astype(int)
×
154
        ff[:]           = 0
×
155
        ff[df == undef] = 1
×
156
        ff[df.isna()]   = 1
×
157

158
    # day or night
159
    if isday is None:
×
160
        sw_id = ''
×
161
        for cc in df.columns:
×
162
            if cc.startswith('SW_IN'):
×
163
                sw_id = cc
×
164
                break
×
165
        astr = ('Global radiation with name SW or starting with SW_ must be'
×
166
                ' in input if isday not given.')
167
        assert sw_id, astr
×
168
        # Papale et al. (Biogeosciences, 2006): 20; REddyProc: 10
169
        isday = df[sw_id] > swthr
×
170
    if isinstance(isday, (pd.core.series.Series, pd.core.frame.DataFrame)):
×
171
        isday = isday.to_numpy()
×
172
    isday[isday == undef] = np.nan
×
173
    ff[np.isnan(isday)] = 1
×
174

175
    # parameters
176
    nrow, ncol = df.shape
×
NEW
177
    half_scan_win = nscan // 2
×
NEW
178
    half_fill_win = nfill // 2
×
179

180
    # calculate dusk and dawn times and separate in day and night
NEW
181
    isdawn = np.zeros(nrow, dtype=bool)
×
NEW
182
    isdusk = np.zeros(nrow, dtype=bool)
×
NEW
183
    dis    = isday.astype(int) - np.roll(isday, -1).astype(int)
×
184
    isdawn[:-1]    = np.where(dis[:-1] == -1, True, False)
×
NEW
185
    isdusk[:-1]    = np.where(dis[:-1] == 1, True, False)
×
186
    isddday        = isdawn
×
187
    tmp            = np.roll(isdusk, 1)
×
188
    isddday[1:]   += tmp[1:]  # start and end of day
×
189
    isddnight      = isdusk
×
190
    tmp            = np.roll(isdawn, 1)
×
191
    isddnight[1:] += tmp[1:]  # start and end of night
×
192

193
    # iterate over each column of data
194
    if plot:
×
195
        import matplotlib.pyplot as plt
×
196
        import matplotlib.backends.backend_pdf as pdf
×
197
        pd.plotting.register_matplotlib_converters()
×
198
        pp = pdf.PdfPages('madspikes.pdf')
×
199

200
    cols = list(df.columns)
×
201
    for hcol in df.columns:
×
202

203
        if hcol.startswith == 'SW_IN':
×
204
            continue
×
205

206
        data  = df[hcol]
×
207
        dflag = ff[hcol]
×
208

209
        # get day and night data
210
        data_day = data.copy(deep=True)
×
211
        data_day[~(isday | isddday) | (dflag != 0) | (data == undef)] = np.nan
×
212
        data_night = data.copy(deep=True)
×
213
        data_night[~(~isday | isddnight) | (dflag != 0) | (data == undef)] = (
×
214
            np.nan)
215

216
        # iterate over fill window
NEW
217
        for j in range(half_fill_win, nrow - 1, 2 * half_fill_win):
×
218
            j1 = max(j - half_scan_win - 1, 0)
×
219
            j2 = min(j + half_scan_win + 1, nrow)
×
220
            fill_start = max(j - half_fill_win, 1)
×
NEW
221
            fill_end   = min(j + half_fill_win, nrow - 1)
×
222

223
            dd = data_day[j1:j2].to_numpy()
×
224
            day_flag = mad(np.ma.masked_array(data=dd, mask=np.isnan(dd)),
×
225
                           z=z, deriv=deriv)
226
            ff.iloc[fill_start:fill_end, cols.index(hcol)] += (
×
227
                np.where(day_flag[fill_start - j1 - 1:fill_end - j1 - 1],
228
                         2, 0))
229

230
            nn = data_night[j1:j2]
×
231
            night_flag = mad(np.ma.masked_array(data=nn, mask=np.isnan(nn)),
×
232
                             z=z, deriv=deriv)
233
            ff.iloc[fill_start:fill_end, cols.index(hcol)] += (
×
234
                np.where(night_flag[fill_start - j1 - 1:fill_end - j1 - 1],
235
                         2, 0))
236

237
        if plot:
×
238
            fig = plt.figure(1)
×
239
            sub = fig.add_subplot(111)
×
240
            valid = ff[hcol] == 0
×
241
            l1 = sub.plot(data[valid], 'ob')
×
242
            l3 = sub.plot(data[ff[hcol] == 2], 'or')
×
243
            plt.title(hcol)
×
244
            pp.savefig(fig)
×
245
            plt.close(fig)
×
246

247
    # Finish
248

249
    if plot:
×
250
        pp.close()
×
251

252
    if fisnumpy:
×
253
        if fistrans:
×
254
            return ff.to_numpy().T
×
255
        else:
256
            return ff.to_numpy()
×
257
    else:
258
        return ff
×
259

260

261
if __name__ == '__main__':
262
    import doctest
263
    doctest.testmod()
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc