9537697060

Committed 16 Jun 2024 05:21PM UTC coverage: 3.779% (+0.03%) from 3.746%

Build # 9537697060

Build Type

push

github

Committed by

mcuntz

Commit Message

Removed all warning with new pandas and numpy versions

Run Details

1 of 161 new or added lines in 4 files covered. (0.62%)

9 existing lines in 2 files now uncovered.

39 of 1032 relevant lines covered (3.78%)

0.42 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

2.08

/src/hesseflux/ustarfilter.py

#!/usr/bin/env python
"""
Filter Eddy Covariance data with friction velocity

This module was original written by Tino Rau and Matthias Cuntz, and
maintained by Arndt Piayda while at Department of Computational
Hydrosystems, Helmholtz Centre for Environmental Research - UFZ,
Leipzig, Germany, and continued by Matthias Cuntz while at Institut
National de Recherche pour l'Agriculture, l'Alimentation et
l'Environnement (INRAE), Nancy, France.

:copyright: Copyright 2008-2022 Matthias Cuntz, see AUTHORS.rst for details.
:license: MIT License, see LICENSE for details.

.. moduleauthor:: Matthias Cuntz, Arndt Piayda, Tino Rau

The following functions are provided

.. autosummary::
   ustarfilter

History
    * Written 2008 by Tino Rau and Matthias Cuntz - mc (at) macu (dot) de
    * Maintained by Arndt Piayda since Aug 2014.
    * Input can be pandas Dataframe or numpy array(s), Apr 2020, Matthias Cuntz
    * Using numpy docstring format, May 2020, Matthias Cuntz
    * No bootstrap by default, Jul 2020, Matthias Cuntz
    * Optionally return thresholds and flags for each season,
      Jul 2020, Matthias Cuntz
    * Bugfix if no threshold found, and for multi-year flags,
      Jul 2020, Matthias Cuntz
    * Improved flake8 and numpy docstring, Oct 2021, Matthias Cuntz
    * Use all ustar data for 90% quantile if no threshold found, instead of
      only ustar data when NEE and Ta are valid, Jan 2023, Matthias Cuntz
    * Use 90% of ustar if no threshold found also for seasonout,
      Jan 2023, Matthias Cuntz
    * Removed np.float and np.bool, Jun 2024, Matthias Cuntz
    * do not register pandas platting backend, Jun 2024, Matthias Cuntz

"""
import numpy as np
import pandas as pd


__all__ = ['ustarfilter']


def ustarfilter(dfin, flag=None, isday=None, date=None,
                timeformat='%Y-%m-%d %H:%M:%S', colhead=None, ustarmin=0.01,
                nboot=1, undef=-9999, plot=False, seasonout=False, nmon=3,
                ntaclasses=7, corrcheck=0.5, nustarclasses=20,
                plateaucrit=0.95, swthr=10.):
    """
    Filter Eddy Covariance data with friction velocity

    Flag Eddy Covariance data using a threshold of friction velocity (ustar)
    below which ustar correlates with a reduction in CO2 flux. The algorithm
    follows the method presented in Papale et al. (Biogeosciences, 2006).

    Parameters
    ----------
    dfin : pandas.Dataframe or numpy.array
        time series of CO2 fluxes and friction velocity as well as air
        temperature. `dfin` can be a pandas.Dataframe with the columns

           'FC' or 'NEE' (or starting with `FC_` or `NEE_`) for observed
           CO2 flux [umol(CO2) m-2 s-1]

           'USTAR' (or starting with 'USTAR') for friction velocity [m s-1]

           'TA'    (or starting with `TA_`) for air temperature [deg C]

        The index is taken as date variable.

        *dfin* can also me a numpy array with the same columns. In this case
        *colhead*, *date*, and possibly *dateformat* must be given.
    flag : pandas.Dataframe or numpy.array, optional
        Dataframe or array has the same shape as *dfin*. Non-zero values in
        *flag* will be treated as missing values in *dfin*.
        *flag* must follow the same rules as *dfin* if pandas.Dataframe.
        If *flag* is numpy array, *df.columns.values* will be used as column
        heads and the index of *dfin* will be copied to *flag*.
    isday : array_like of bool, optional
        True when it is day, False when night. Must have the same length as
        `dfin.shape[0]`.
        If *isday* is not given, *dfin* must have a column with head 'SW_IN' or
        starting with 'SW_IN'. *isday* will then be `dfin['SW_IN'] > swthr`.
    date : array_like of string, optional
        1D-array_like of calendar dates in format given in `timeformat`.
        *date* must be given if *dfin* is numpy array.
    timeformat : str, optional
        Format of dates in *date*, if given (default: '%Y-%m-%d %H:%M:%S').
        See strftime documentation of Python's datetime module:
        https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior
    colhead : array_like of str, optional
        column names if *dfin* is numpy array. See *dfin* for mandatory column
        names.
    ustarmin : float, optional
        minimum ustar threshold (default: 0.01).
        Papale et al. (Biogeosciences, 2006) take 0.1 for forest and 0.01
        otherwise.
    nboot : int, optional
        number of boot straps for estimate of confidence interval of ustar
        threshold (default: 1)
    undef : float, optional
        values having *undef* value are treated as missing values in *dfin*
        (default: -9999)
    plot : bool, optional
        True: data and ustar thresholds are plotted into ustarfilter.pdf
        (default: False)
    seasonout : bool, optional
        True: return ustar thresholds for each season (default: False)
    nmon : int, optional
        Number of months to combine for a season (default: 3).
    ntaclasses : int, optional
        Number of temperature classes per *nmon* months (default: 7).
    corrcheck : float, optional
        Skip temperature class if absolute of correlation coefficient between
        air temperature and ustar is greater equal *corrcheck* (default: 0.5).
    nustarclasses : int, optional
        Number of ustar classes per temperature class (default: 20).
    plateaucrit : float, optional
        The ustar threshold is the smallest ustar class that has an average CO2
        flux, which is higher than *plateaucrit* times the mean CO2 flux of all
        ustar classes above this class (default: 0.95).
    swthr : float, optional
        Threshold to determine daytime from incoming shortwave radiation if
        *isday* not given (default: 10).

    Returns
    -------
    tuple of numpy arrays, pandas.Dataframe or numpy array
        numpy array with 5, 50 and 95 percentiles of ustar thresholds,

        flags: 0 everywhere except set to 2 where ustar < ustar-threshold.
        Either maximum threshold of all seasons or thresholds for each season,
        i.e. threshold array is `array(3, nseason)` if *seasonout* and
        `array(3)` otherwise.

    Notes
    -----
    Works ONLY for a data set of at least one full year.

    """
    # Check input
    # numpy or panda
    if isinstance(dfin, (np.ndarray, np.ma.MaskedArray)):
        isnumpy = True
        istrans = False
        astr = 'colhead must be given if input is numpy.ndarray.'
        assert colhead is not None, astr
        if dfin.shape[0] == len(colhead):
            istrans = True
            df = pd.DataFrame(dfin.T, columns=colhead)
        elif dfin.shape[1] == len(colhead):
            df = pd.DataFrame(dfin, columns=colhead)
        else:
            estr = ('Length of colhead must be number of columns in input'
                    ' array. len(colhead)=' + str(len(colhead)) +
                    ' shape(input)=(' + str(dfin.shape[0]) + ',' +
                    str(dfin.shape[1]) + ').')
            raise ValueError(estr)
        assert date is not None, 'Date must be given if input is numpy arrary.'
        df['Datetime'] = pd.to_datetime(date, format=timeformat)
        df.set_index('Datetime', drop=True, inplace=True)
    else:
        isnumpy = False
        istrans = False
        astr = 'Input must be either numpy.ndarray or pandas.DataFrame.'
        assert isinstance(dfin, pd.core.frame.DataFrame), astr
        df = dfin.copy(deep=True)

    # Incoming flags
    if flag is not None:
        if isinstance(flag, (np.ndarray, np.ma.MaskedArray)):
            fisnumpy = True
            fistrans = False
            if flag.shape[0] == len(df):
                ff = pd.DataFrame(flag, columns=df.columns.values)
            elif flag.shape[1] == len(df):
                fistrans = True
                ff = pd.DataFrame(flag.T, columns=df.columns.values)
            else:
                estr = ('flag must have same shape as data array. data:'
                        ' ({:d},{:d}); flag: ({:d},{:d})'.format(
                            dfin.shape[0], dfin.shape[1], flag.shape[0],
                            flag.shape[1]))
                raise ValueError(estr)
            ff = ff.set_index(df.index)
        else:
            fisnumpy = False
            fistrans = False
            astr = 'Flag must be either numpy.ndarray or pandas.DataFrame.'
            assert isinstance(flag, pd.core.frame.DataFrame), astr
            ff = flag.copy(deep=True)
    else:
        fisnumpy = isnumpy
        fistrans = istrans
        # flags: 0: good; 1: input flagged; 2: output flagged
        ff = df.copy(deep=True).astype(int)
        ff[:]           = 0
        ff[df == undef] = 1
        ff[df.isna()]   = 1

    # day or night
    if isday is None:
        sw_id = ''
        for cc in df.columns:
            if cc.startswith('SW_IN'):
                sw_id = cc
                break
        astr = ('Global radiation with name SW or starting with SW_ must be'
                ' in input if isday not given.')
        assert sw_id, astr
        # Papale et al. (Biogeosciences, 2006): 20; REddyProc: 10
        isday = df[sw_id] > swthr
    if isinstance(isday, (pd.core.series.Series, pd.core.frame.DataFrame)):
        # otherwise time indexes could not match anymore after shifting times
        isday = isday.to_numpy()
    isday[isday == undef] = np.nan
    ff[np.isnan(isday)] = 1

    # data and flags
    fc_id = ''
    for cc in df.columns:
        if ( cc.startswith('FC_') or (cc == 'FC') or cc.startswith('NEE_') or
             (cc == 'NEE') ):
            fc_id = cc
            break
    ustar_id = ''
    for cc in df.columns:
        if cc.startswith('USTAR_') or (cc == 'USTAR'):
            ustar_id = cc
            break
    ta_id = ''
    for cc in df.columns:
        if cc.startswith('TA_') or (cc == 'TA'):
            ta_id = cc
            break
    astr = ('Carbon net flux with name FC or NEE or starting with FC_ or'
            ' NEE_ must be in input.')
    assert fc_id, astr
    astr = ('Friction velocity u* with name USTAR or starting with USTAR_'
            ' must be in input.')
    assert ustar_id, astr
    astr = ('Air temperature with name TA or starting with TA_ must be in'
            ' input.')
    assert ta_id, astr

    # Move time steps by half interval if end times
    # save for output with original DatetimeIndex
    ustar_in = df[ustar_id].copy(deep=True)
    times = df.index.minute.sort_values().unique()
    indx  = df.index.values
    nindx = indx.copy()
    if len(times) == 1:
        if 0 in times:
            # hourly time steps, shift by 30 min
            dt = pd.Timedelta('30 minute')
            if df.index[0].hour == 0:
                nindx += dt  # beginning of time step
            else:
                nindx -= dt  # end of time step
    elif len(times) == 2:
        if 0 in times:
            # half-hourly time steps, shift by 15 min
            dt = pd.Timedelta('15 minute')
            if df.index[0].minute == 0:
                nindx += dt  # beginning of time step
            else:
                nindx -= dt  # end of time step
    else:
        estr = ('Could not analyse time steps. Known time steps are hourly'
                ' or half-hourly.')
        raise ValueError(estr)
    df['DateTime'] = nindx
    df = df.set_index('DateTime', drop=True)
    ff = ff.set_index(df.index)

    # Check time span
    yrmax  = df.index.max().year
    yrmin  = df.index.min().year
    nyears = yrmax - yrmin + 1
    ndays  = (df.index.max() - df.index.min()).days + 1
    assert ndays // nyears > 360, 'Full years must be given.'

    # calculate thresholds
    nperiod = 12 // nmon  # number of nmon periods per year
    if seasonout:
        bustars = np.ones((nboot, nyears, nperiod)) * undef
    else:
        bustars = np.ones((nboot, nyears)) * undef
    for y in range(nyears):
        yy = yrmin + y
        iiyr    = df.index.year == yy
        iidf    = df.index[iiyr]
        df_f    = df.loc[iidf]
        ff_f    = ff.loc[iidf]
        isday_f = isday[iiyr]

        # bootstrap per year
        nstepsyear = len(df_f)
        for b in range(nboot):
            if b == 0:
                df_b    = df_f
                ff_b    = ff_f
                isday_b = isday_f
            else:
                iiboot  = np.random.randint(0, nstepsyear, size=nstepsyear)
                iidf    = df_f.index[iiboot]
                df_b    = df_f.loc[iidf]
                ff_b    = ff_f.loc[iidf]
                isday_b = isday_f[iiboot]

            # periods / seasons
            pustars = np.ones(nperiod) * undef
            for p in range(nperiod):
                flag_p   = ( (~isday_b) &
                             (ff_b[fc_id] == 0) & (ff_b[ustar_id] == 0) &
                             (ff_b[ta_id] == 0) &
                             (df_b.index.month > p * nmon) &
                             (df_b.index.month <= (p + 1) * nmon) )
                fc_p    = df_b.loc[flag_p, fc_id]
                ustar_p = df_b.loc[flag_p, ustar_id]
                ta_p    = df_b.loc[flag_p, ta_id]

                # temperature classes
                custars = []
                if len(ta_p) < (ntaclasses + 1):
                    continue
                ta_q = np.quantile(
                    ta_p,
                    np.arange(ntaclasses + 1, dtype=float) /
                    float(ntaclasses))
                ta_q[0] -= 0.1  # 1st include min
                for t in range(ntaclasses):
                    iita    = (ta_p > ta_q[t]) & (ta_p <= ta_q[t + 1])
                    fc_t    = fc_p[iita]
                    ustar_t = ustar_p[iita]
                    ta_t    = ta_p[iita]

                    # discard temperature class if correlation is strong
                    r_abs = np.abs(np.corrcoef(ta_t, ustar_t)[0, 1])
                    if r_abs >= corrcheck:
                        continue

                    # ustar classes
                    ustar_q = np.quantile(
                        ustar_t,
                        np.arange(nustarclasses + 1, dtype=float) /
                        float(nustarclasses))
                    ustar_q[0] -= 0.01  # 1st include min
                    for u in range(nustarclasses - 1):
                        iiustar = ((ustar_t > ustar_q[u]) &
                                   (ustar_t <= ustar_q[u + 1]))
                        fc_u    = fc_t[iiustar]
                        fc_a    = fc_t[ustar_t > ustar_q[u + 1]]

                        if abs(fc_u.mean()) >= abs(plateaucrit * fc_a.mean()):
                            custars.append(ustar_q[u + 1])
                            break

                # median of thresholds of all temperature classes =
                # threshold of period
                if len(custars) > 0:
                    pustars[p] = np.median(custars)
                elif seasonout:
                    # Set threshold to 90% of data per season if no threshold
                    # found
                    pustars[p] = np.quantile(ustar_p, 0.9)
            # Take maximum of periods if any thresholds found,
            # otherwise set threshold to 90% of data
            ii = np.where(pustars != undef)[0]
            if ii.size > 0:
                if seasonout:
                    bustars[b, y, :] = pustars
                else:
                    bustars[b, y] = pustars[ii].max()
            else:
                if seasonout:
                    # raise ValueError('Should not be here.')
                    for p in range(nperiod):
                        # Set threshold to 90% of data per season
                        # if not enough data
                        flag_b = ( (~isday_b) &
                                   (ff_b[ustar_id] == 0) &
                                   (df_b.index.month > p * nmon) &
                                   (df_b.index.month <= (p + 1) * nmon) )
                        ustar_b = df_b.loc[flag_b, ustar_id]
                        if len(ustar_b) > 0:
                            bustars[b, y, p] = np.quantile(ustar_b, 0.9)
                else:
                    flag_b = ( (~isday_b) &
                               (ff_b[ustar_id] == 0) )
                    bustars[b, y] = np.quantile(df_b.loc[flag_b, ustar_id],
                                                0.9)

    # set minimum ustar threshold
    bustars = np.maximum(bustars, ustarmin)

    # report 5, 50 and 95 percentile
    oustars = np.quantile(bustars, (0.05, 0.5, 0.95), axis=0)

    # flag out with original DatetimeIndex
    off    = ustar_in.astype(int)
    off[:] = 0
    ii     = np.zeros(len(off), dtype=bool)
    if seasonout:
        for y in range(nyears):
            yy = yrmin + y
            for p in range(nperiod):
                iiyr = ( (df.index.year == yy) &   # df DatetimeIndex
                         (df.index.month > p * nmon) &
                         (df.index.month <= (p + 1) * nmon) )
                ii[iiyr] = ustar_in[iiyr] < oustars[1, y, p]
    else:
        for y in range(nyears):
            yy = yrmin + y
            iiyr = df.index.year == yy  # df DatetimeIndex
            ii[iiyr] = ustar_in[iiyr] < oustars[1, y]
    off[ii] = 2  # original DatetimeIndex

    if plot:
        import matplotlib as mpl
        mpl.use('PDF')  # set directly after import matplotlib
        from matplotlib.backends.backend_pdf import PdfPages
        import matplotlib.pyplot as plt
        # import matplotlib.backends.backend_pdf as PdfPages
        # pd.plotting.register_matplotlib_converters()

        pp = PdfPages('ustarfilter.pdf')
        if seasonout:
            for y in range(nyears):
                yy = yrmin + y

                iiyr    = df.index.year == yy
                iidf    = df.index[iiyr]
                df_f    = df.loc[iidf]
                ff_f    = ff.loc[iidf]
                isday_f = isday[iiyr]

                for p in range(nperiod):
                    flag_p = ( (~isday_f) &
                               (ff_f[fc_id] == 0) & (ff_f[ustar_id] == 0) &
                               (ff_f[ta_id] == 0) &
                               (df_f.index.month > p * nmon) &
                               (df_f.index.month <= (p + 1) * nmon) )
                    fc_p    = df_f.loc[flag_p, fc_id]
                    ustar_p = df_f.loc[flag_p, ustar_id]

                    fig  = plt.figure(1)
                    sub  = fig.add_subplot(111)
                    sub.plot(ustar_p, fc_p, 'bo', markersize=1., alpha=0.2)
                    sub.axvline(x=oustars[1, y, p], linewidth=1.5, color='r')
                    plt.ylabel('F CO2')
                    plt.xlabel('ustar')
                    plt.title('ustar thresh for season'
                              ' {:d} of year {:d}: {:5.3f}'.format(
                                  p, yy, oustars[1, y, p]))

                    pp.savefig(fig)
                    plt.close(fig)
        else:
            for y in range(nyears):
                yy = yrmin + y

                iiyr    = (df.index.year == yy) & (~isday)
                fc_y    = df.loc[iiyr, fc_id]
                ffc_y   = ff.loc[iiyr, fc_id]
                ustar_y = df.loc[iiyr, ustar_id]
                ffu_y   = ff.loc[iiyr, ustar_id]
                # off_y   = off[iiyr & (isday == False)]

                fig  = plt.figure(1)
                sub  = fig.add_subplot(111)
                flag_p = (ffu_y == 0) & (ffc_y == 0)
                sub.plot(ustar_y[flag_p], fc_y[flag_p], 'bo',
                         markersize=1., alpha=0.2)
                sub.axvline(x=oustars[1, y], linewidth=1.5, color='r')
                plt.ylabel('F CO2')
                plt.xlabel('ustar')
                plt.title('ustar thresh of year ${:d}: {:5.3f}'.format(
                    yy, oustars[1, y]))

                pp.savefig(fig)
                plt.close(fig)
        pp.close()

    # out
    oustars = oustars.squeeze()

    if isnumpy:
        return oustars, off.to_numpy()
    else:
        return oustars, off


if __name__ == '__main__':
    import doctest
    doctest.testmod()

1	#!/usr/bin/env python
2	"""	11✔
3	Filter Eddy Covariance data with friction velocity
4
5	This module was original written by Tino Rau and Matthias Cuntz, and
6	maintained by Arndt Piayda while at Department of Computational
7	Hydrosystems, Helmholtz Centre for Environmental Research - UFZ,
8	Leipzig, Germany, and continued by Matthias Cuntz while at Institut
9	National de Recherche pour l'Agriculture, l'Alimentation et
10	l'Environnement (INRAE), Nancy, France.
11
12	:copyright: Copyright 2008-2022 Matthias Cuntz, see AUTHORS.rst for details.
13	:license: MIT License, see LICENSE for details.
14
15	.. moduleauthor:: Matthias Cuntz, Arndt Piayda, Tino Rau
16
17	The following functions are provided
18
19	.. autosummary::
20	ustarfilter
21
22	History
23	* Written 2008 by Tino Rau and Matthias Cuntz - mc (at) macu (dot) de
24	* Maintained by Arndt Piayda since Aug 2014.
25	* Input can be pandas Dataframe or numpy array(s), Apr 2020, Matthias Cuntz
26	* Using numpy docstring format, May 2020, Matthias Cuntz
27	* No bootstrap by default, Jul 2020, Matthias Cuntz
28	* Optionally return thresholds and flags for each season,
29	Jul 2020, Matthias Cuntz
30	* Bugfix if no threshold found, and for multi-year flags,
31	Jul 2020, Matthias Cuntz
32	* Improved flake8 and numpy docstring, Oct 2021, Matthias Cuntz
33	* Use all ustar data for 90% quantile if no threshold found, instead of
34	only ustar data when NEE and Ta are valid, Jan 2023, Matthias Cuntz
35	* Use 90% of ustar if no threshold found also for seasonout,
36	Jan 2023, Matthias Cuntz
37	* Removed np.float and np.bool, Jun 2024, Matthias Cuntz
38	* do not register pandas platting backend, Jun 2024, Matthias Cuntz
39
40	"""
41	import numpy as np	11✔
42	import pandas as pd	11✔
43
44
45	__all__ = ['ustarfilter']	11✔
46
47
48	def ustarfilter(dfin, flag=None, isday=None, date=None,	11✔
49	timeformat='%Y-%m-%d %H:%M:%S', colhead=None, ustarmin=0.01,
50	nboot=1, undef=-9999, plot=False, seasonout=False, nmon=3,
51	ntaclasses=7, corrcheck=0.5, nustarclasses=20,
52	plateaucrit=0.95, swthr=10.):
53	"""
54	Filter Eddy Covariance data with friction velocity
55
56	Flag Eddy Covariance data using a threshold of friction velocity (ustar)
57	below which ustar correlates with a reduction in CO2 flux. The algorithm
58	follows the method presented in Papale et al. (Biogeosciences, 2006).
59
60	Parameters
61	----------
62	dfin : pandas.Dataframe or numpy.array
63	time series of CO2 fluxes and friction velocity as well as air
64	temperature. `dfin` can be a pandas.Dataframe with the columns
65
66	'FC' or 'NEE' (or starting with `FC_` or `NEE_`) for observed
67	CO2 flux [umol(CO2) m-2 s-1]
68
69	'USTAR' (or starting with 'USTAR') for friction velocity [m s-1]
70
71	'TA' (or starting with `TA_`) for air temperature [deg C]
72
73	The index is taken as date variable.
74
75	dfin can also me a numpy array with the same columns. In this case
76	colhead, date, and possibly dateformat must be given.
77	flag : pandas.Dataframe or numpy.array, optional
78	Dataframe or array has the same shape as dfin. Non-zero values in
79	flag will be treated as missing values in dfin.
80	flag must follow the same rules as dfin if pandas.Dataframe.
81	If flag is numpy array, df.columns.values will be used as column
82	heads and the index of dfin will be copied to flag.
83	isday : array_like of bool, optional
84	True when it is day, False when night. Must have the same length as
85	`dfin.shape[0]`.
86	If isday is not given, dfin must have a column with head 'SW_IN' or
87	starting with 'SW_IN'. isday will then be `dfin['SW_IN'] > swthr`.
88	date : array_like of string, optional
89	1D-array_like of calendar dates in format given in `timeformat`.
90	date must be given if dfin is numpy array.
91	timeformat : str, optional
92	Format of dates in date, if given (default: '%Y-%m-%d %H:%M:%S').
93	See strftime documentation of Python's datetime module:
94	https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior
95	colhead : array_like of str, optional
96	column names if dfin is numpy array. See dfin for mandatory column
97	names.
98	ustarmin : float, optional
99	minimum ustar threshold (default: 0.01).
100	Papale et al. (Biogeosciences, 2006) take 0.1 for forest and 0.01
101	otherwise.
102	nboot : int, optional
103	number of boot straps for estimate of confidence interval of ustar
104	threshold (default: 1)
105	undef : float, optional
106	values having undef value are treated as missing values in dfin
107	(default: -9999)
108	plot : bool, optional
109	True: data and ustar thresholds are plotted into ustarfilter.pdf
110	(default: False)
111	seasonout : bool, optional
112	True: return ustar thresholds for each season (default: False)
113	nmon : int, optional
114	Number of months to combine for a season (default: 3).
115	ntaclasses : int, optional
116	Number of temperature classes per nmon months (default: 7).
117	corrcheck : float, optional
118	Skip temperature class if absolute of correlation coefficient between
119	air temperature and ustar is greater equal corrcheck (default: 0.5).
120	nustarclasses : int, optional
121	Number of ustar classes per temperature class (default: 20).
122	plateaucrit : float, optional
123	The ustar threshold is the smallest ustar class that has an average CO2
124	flux, which is higher than plateaucrit times the mean CO2 flux of all
125	ustar classes above this class (default: 0.95).
126	swthr : float, optional
127	Threshold to determine daytime from incoming shortwave radiation if
128	isday not given (default: 10).
129
130	Returns
131	-------
132	tuple of numpy arrays, pandas.Dataframe or numpy array
133	numpy array with 5, 50 and 95 percentiles of ustar thresholds,
134
135	flags: 0 everywhere except set to 2 where ustar < ustar-threshold.
136	Either maximum threshold of all seasons or thresholds for each season,
137	i.e. threshold array is `array(3, nseason)` if seasonout and
138	`array(3)` otherwise.
139
140	Notes
141	-----
142	Works ONLY for a data set of at least one full year.
143
144	"""
145	# Check input
146	# numpy or panda
147	if isinstance(dfin, (np.ndarray, np.ma.MaskedArray)):	×
148	isnumpy = True	×
149	istrans = False	×
150	astr = 'colhead must be given if input is numpy.ndarray.'	×
151	assert colhead is not None, astr	×
152	if dfin.shape[0] == len(colhead):	×
153	istrans = True	×
154	df = pd.DataFrame(dfin.T, columns=colhead)	×
155	elif dfin.shape[1] == len(colhead):	×
156	df = pd.DataFrame(dfin, columns=colhead)	×
157	else:
158	estr = ('Length of colhead must be number of columns in input'	×
159	' array. len(colhead)=' + str(len(colhead)) +
160	' shape(input)=(' + str(dfin.shape[0]) + ',' +
161	str(dfin.shape[1]) + ').')
162	raise ValueError(estr)	×
163	assert date is not None, 'Date must be given if input is numpy arrary.'	×
164	df['Datetime'] = pd.to_datetime(date, format=timeformat)	×
165	df.set_index('Datetime', drop=True, inplace=True)	×
166	else:
167	isnumpy = False	×
168	istrans = False	×
169	astr = 'Input must be either numpy.ndarray or pandas.DataFrame.'	×
170	assert isinstance(dfin, pd.core.frame.DataFrame), astr	×
171	df = dfin.copy(deep=True)	×
172
173	# Incoming flags
174	if flag is not None:	×
175	if isinstance(flag, (np.ndarray, np.ma.MaskedArray)):	×
176	fisnumpy = True	×
177	fistrans = False	×
178	if flag.shape[0] == len(df):	×
179	ff = pd.DataFrame(flag, columns=df.columns.values)	×
180	elif flag.shape[1] == len(df):	×
181	fistrans = True	×
182	ff = pd.DataFrame(flag.T, columns=df.columns.values)	×
183	else:
184	estr = ('flag must have same shape as data array. data:'	×
185	' ({:d},{:d}); flag: ({:d},{:d})'.format(
186	dfin.shape[0], dfin.shape[1], flag.shape[0],
187	flag.shape[1]))
188	raise ValueError(estr)	×
189	ff = ff.set_index(df.index)	×
190	else:
191	fisnumpy = False	×
192	fistrans = False	×
193	astr = 'Flag must be either numpy.ndarray or pandas.DataFrame.'	×
194	assert isinstance(flag, pd.core.frame.DataFrame), astr	×
195	ff = flag.copy(deep=True)	×
196	else:
197	fisnumpy = isnumpy	×
198	fistrans = istrans	×
199	# flags: 0: good; 1: input flagged; 2: output flagged
200	ff = df.copy(deep=True).astype(int)	×
201	ff[:] = 0	×
202	ff[df == undef] = 1	×
203	ff[df.isna()] = 1	×
204
205	# day or night
206	if isday is None:	×
207	sw_id = ''	×
208	for cc in df.columns:	×
209	if cc.startswith('SW_IN'):	×
210	sw_id = cc	×
211	break	×
212	astr = ('Global radiation with name SW or starting with SW_ must be'	×
213	' in input if isday not given.')
214	assert sw_id, astr	×
215	# Papale et al. (Biogeosciences, 2006): 20; REddyProc: 10
216	isday = df[sw_id] > swthr	×
217	if isinstance(isday, (pd.core.series.Series, pd.core.frame.DataFrame)):	×
218	# otherwise time indexes could not match anymore after shifting times
219	isday = isday.to_numpy()	×
220	isday[isday == undef] = np.nan	×
221	ff[np.isnan(isday)] = 1	×
222
223	# data and flags
224	fc_id = ''	×
225	for cc in df.columns:	×
226	if ( cc.startswith('FC_') or (cc == 'FC') or cc.startswith('NEE_') or	×
227	(cc == 'NEE') ):
228	fc_id = cc	×
229	break	×
230	ustar_id = ''	×
231	for cc in df.columns:	×
232	if cc.startswith('USTAR_') or (cc == 'USTAR'):	×
233	ustar_id = cc	×
234	break	×
235	ta_id = ''	×
236	for cc in df.columns:	×
237	if cc.startswith('TA_') or (cc == 'TA'):	×
238	ta_id = cc	×
239	break	×
240	astr = ('Carbon net flux with name FC or NEE or starting with FC_ or'	×
241	' NEE_ must be in input.')
242	assert fc_id, astr	×
243	astr = ('Friction velocity u* with name USTAR or starting with USTAR_'	×
244	' must be in input.')
245	assert ustar_id, astr	×
246	astr = ('Air temperature with name TA or starting with TA_ must be in'	×
247	' input.')
248	assert ta_id, astr	×
249
250	# Move time steps by half interval if end times
251	# save for output with original DatetimeIndex
252	ustar_in = df[ustar_id].copy(deep=True)	×
253	times = df.index.minute.sort_values().unique()	×
254	indx = df.index.values	×
255	nindx = indx.copy()	×
256	if len(times) == 1:	×
257	if 0 in times:	×
258	# hourly time steps, shift by 30 min
259	dt = pd.Timedelta('30 minute')	×
260	if df.index[0].hour == 0:	×
261	nindx += dt # beginning of time step	×
262	else:
263	nindx -= dt # end of time step	×
264	elif len(times) == 2:	×
265	if 0 in times:	×
266	# half-hourly time steps, shift by 15 min
267	dt = pd.Timedelta('15 minute')	×
268	if df.index[0].minute == 0:	×
269	nindx += dt # beginning of time step	×
270	else:
271	nindx -= dt # end of time step	×
272	else:
273	estr = ('Could not analyse time steps. Known time steps are hourly'	×
274	' or half-hourly.')
275	raise ValueError(estr)	×
276	df['DateTime'] = nindx	×
277	df = df.set_index('DateTime', drop=True)	×
278	ff = ff.set_index(df.index)	×
279
280	# Check time span
281	yrmax = df.index.max().year	×
282	yrmin = df.index.min().year	×
283	nyears = yrmax - yrmin + 1	×
284	ndays = (df.index.max() - df.index.min()).days + 1	×
NEW 285	assert ndays // nyears > 360, 'Full years must be given.'	×
286
287	# calculate thresholds
NEW 288	nperiod = 12 // nmon # number of nmon periods per year	×
289	if seasonout:	×
290	bustars = np.ones((nboot, nyears, nperiod)) * undef	×
291	else:
292	bustars = np.ones((nboot, nyears)) * undef	×
293	for y in range(nyears):	×
294	yy = yrmin + y	×
295	iiyr = df.index.year == yy	×
296	iidf = df.index[iiyr]	×
297	df_f = df.loc[iidf]	×
298	ff_f = ff.loc[iidf]	×
299	isday_f = isday[iiyr]	×
300
301	# bootstrap per year
302	nstepsyear = len(df_f)	×
303	for b in range(nboot):	×
304	if b == 0:	×
305	df_b = df_f	×
306	ff_b = ff_f	×
307	isday_b = isday_f	×
308	else:
309	iiboot = np.random.randint(0, nstepsyear, size=nstepsyear)	×
310	iidf = df_f.index[iiboot]	×
311	df_b = df_f.loc[iidf]	×
312	ff_b = ff_f.loc[iidf]	×
313	isday_b = isday_f[iiboot]	×
314
315	# periods / seasons
316	pustars = np.ones(nperiod) * undef	×
317	for p in range(nperiod):	×
318	flag_p = ( (~isday_b) &	×
319	(ff_b[fc_id] == 0) & (ff_b[ustar_id] == 0) &
320	(ff_b[ta_id] == 0) &
321	(df_b.index.month > p * nmon) &
322	(df_b.index.month <= (p + 1) * nmon) )
323	fc_p = df_b.loc[flag_p, fc_id]	×
324	ustar_p = df_b.loc[flag_p, ustar_id]	×
325	ta_p = df_b.loc[flag_p, ta_id]	×
326
327	# temperature classes
328	custars = []	×
329	if len(ta_p) < (ntaclasses + 1):	×
330	continue	×
331	ta_q = np.quantile(	×
332	ta_p,
333	np.arange(ntaclasses + 1, dtype=float) /
334	float(ntaclasses))
335	ta_q[0] -= 0.1 # 1st include min	×
336	for t in range(ntaclasses):	×
NEW 337	iita = (ta_p > ta_q[t]) & (ta_p <= ta_q[t + 1])	×
338	fc_t = fc_p[iita]	×
339	ustar_t = ustar_p[iita]	×
340	ta_t = ta_p[iita]	×
341
342	# discard temperature class if correlation is strong
343	r_abs = np.abs(np.corrcoef(ta_t, ustar_t)[0, 1])	×
344	if r_abs >= corrcheck:	×
345	continue	×
346
347	# ustar classes
348	ustar_q = np.quantile(	×
349	ustar_t,
350	np.arange(nustarclasses + 1, dtype=float) /
351	float(nustarclasses))
352	ustar_q[0] -= 0.01 # 1st include min	×
NEW 353	for u in range(nustarclasses - 1):	×
354	iiustar = ((ustar_t > ustar_q[u]) &	×
355	(ustar_t <= ustar_q[u + 1]))
356	fc_u = fc_t[iiustar]	×
NEW 357	fc_a = fc_t[ustar_t > ustar_q[u + 1]]	×
358
NEW 359	if abs(fc_u.mean()) >= abs(plateaucrit * fc_a.mean()):	×
NEW 360	custars.append(ustar_q[u + 1])	×
UNCOV 361	break	×
362
363	# median of thresholds of all temperature classes =
364	# threshold of period
365	if len(custars) > 0:	×
366	pustars[p] = np.median(custars)	×
367	elif seasonout:	×
368	# Set threshold to 90% of data per season if no threshold
369	# found
370	pustars[p] = np.quantile(ustar_p, 0.9)	×
371	# Take maximum of periods if any thresholds found,
372	# otherwise set threshold to 90% of data
373	ii = np.where(pustars != undef)[0]	×
374	if ii.size > 0:	×
375	if seasonout:	×
376	bustars[b, y, :] = pustars	×
377	else:
378	bustars[b, y] = pustars[ii].max()	×
379	else:
380	if seasonout:	×
381	# raise ValueError('Should not be here.')
382	for p in range(nperiod):	×
383	# Set threshold to 90% of data per season
384	# if not enough data
385	flag_b = ( (~isday_b) &	×
386	(ff_b[ustar_id] == 0) &
387	(df_b.index.month > p * nmon) &
388	(df_b.index.month <= (p + 1) * nmon) )
389	ustar_b = df_b.loc[flag_b, ustar_id]	×
390	if len(ustar_b) > 0:	×
391	bustars[b, y, p] = np.quantile(ustar_b, 0.9)	×
392	else:
393	flag_b = ( (~isday_b) &	×
394	(ff_b[ustar_id] == 0) )
NEW 395	bustars[b, y] = np.quantile(df_b.loc[flag_b, ustar_id],	×
396	0.9)
397
398	# set minimum ustar threshold
399	bustars = np.maximum(bustars, ustarmin)	×
400
401	# report 5, 50 and 95 percentile
402	oustars = np.quantile(bustars, (0.05, 0.5, 0.95), axis=0)	×
403
404	# flag out with original DatetimeIndex
405	off = ustar_in.astype(int)	×
406	off[:] = 0	×
NEW 407	ii = np.zeros(len(off), dtype=bool)	×
408	if seasonout:	×
409	for y in range(nyears):	×
410	yy = yrmin + y	×
411	for p in range(nperiod):	×
412	iiyr = ( (df.index.year == yy) & # df DatetimeIndex	×
413	(df.index.month > p * nmon) &
414	(df.index.month <= (p + 1) * nmon) )
UNCOV 415	ii[iiyr] = ustar_in[iiyr] < oustars[1, y, p]	×
416	else:
417	for y in range(nyears):	×
418	yy = yrmin + y	×
419	iiyr = df.index.year == yy # df DatetimeIndex	×
420	ii[iiyr] = ustar_in[iiyr] < oustars[1, y]	×
421	off[ii] = 2 # original DatetimeIndex	×
422
423	if plot:	×
NEW 424	import matplotlib as mpl	×
NEW 425	mpl.use('PDF') # set directly after import matplotlib	×
NEW 426	from matplotlib.backends.backend_pdf import PdfPages	×
427	import matplotlib.pyplot as plt	×
428	# import matplotlib.backends.backend_pdf as PdfPages
429	# pd.plotting.register_matplotlib_converters()
430
NEW 431	pp = PdfPages('ustarfilter.pdf')	×
432	if seasonout:	×
433	for y in range(nyears):	×
434	yy = yrmin + y	×
435
436	iiyr = df.index.year == yy	×
437	iidf = df.index[iiyr]	×
438	df_f = df.loc[iidf]	×
439	ff_f = ff.loc[iidf]	×
440	isday_f = isday[iiyr]	×
441
442	for p in range(nperiod):	×
443	flag_p = ( (~isday_f) &	×
444	(ff_f[fc_id] == 0) & (ff_f[ustar_id] == 0) &
445	(ff_f[ta_id] == 0) &
446	(df_f.index.month > p * nmon) &
447	(df_f.index.month <= (p + 1) * nmon) )
448	fc_p = df_f.loc[flag_p, fc_id]	×
449	ustar_p = df_f.loc[flag_p, ustar_id]	×
450
451	fig = plt.figure(1)	×
452	sub = fig.add_subplot(111)	×
453	sub.plot(ustar_p, fc_p, 'bo', markersize=1., alpha=0.2)	×
454	sub.axvline(x=oustars[1, y, p], linewidth=1.5, color='r')	×
455	plt.ylabel('F CO2')	×
456	plt.xlabel('ustar')	×
457	plt.title('ustar thresh for season'	×
458	' {:d} of year {:d}: {:5.3f}'.format(
459	p, yy, oustars[1, y, p]))
460
461	pp.savefig(fig)	×
462	plt.close(fig)	×
463	else:
464	for y in range(nyears):	×
465	yy = yrmin + y	×
466
467	iiyr = (df.index.year == yy) & (~isday)	×
468	fc_y = df.loc[iiyr, fc_id]	×
469	ffc_y = ff.loc[iiyr, fc_id]	×
470	ustar_y = df.loc[iiyr, ustar_id]	×
471	ffu_y = ff.loc[iiyr, ustar_id]	×
472	# off_y = off[iiyr & (isday == False)]
473
474	fig = plt.figure(1)	×
475	sub = fig.add_subplot(111)	×
476	flag_p = (ffu_y == 0) & (ffc_y == 0)	×
477	sub.plot(ustar_y[flag_p], fc_y[flag_p], 'bo',	×
478	markersize=1., alpha=0.2)
479	sub.axvline(x=oustars[1, y], linewidth=1.5, color='r')	×
480	plt.ylabel('F CO2')	×
481	plt.xlabel('ustar')	×
482	plt.title('ustar thresh of year ${:d}: {:5.3f}'.format(	×
483	yy, oustars[1, y]))
484
485	pp.savefig(fig)	×
486	plt.close(fig)	×
487	pp.close()	×
488
489	# out
490	oustars = oustars.squeeze()	×
491
492	if isnumpy:	×
493	return oustars, off.to_numpy()	×
494	else:
495	return oustars, off	×
496
497
498	if __name__ == '__main__':
499	import doctest
500	doctest.testmod()

mcuntz / hesseflux / 9537697060

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous