5144805366

pending completion

Build # 5144805366

Build Type

Pull #116

github

Committed by

web-flow

Commit Message

Merge 2fa98e443 into 0c2adccf8

Pull Request Pull Request #116: RC v0.0.10

Run Details

19 of 19 new or added lines in 5 files covered. (100.0%)

1807 of 2035 relevant lines covered (88.8%)

7.1 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.73

/pysatSpaceWeather/instruments/methods/f107.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-.
# Full license can be found in License.md
# Full author list can be found in .zenodo.json file
# DOI:10.5281/zenodo.3986138
# ----------------------------------------------------------------------------

"""Routines for the F10.7 solar index."""

import datetime as dt
import numpy as np
from packaging.version import Version

import pandas as pds
import pysat

import pysatSpaceWeather as pysat_sw


def acknowledgements(tag):
    """Define the acknowledgements for the F10.7 data.

    Parameters
    ----------
    tag : str
        Tag of the space waether index

    Returns
    -------
    ackn : str
        Acknowledgements string associated with the appropriate F10.7 tag.

    """
    lisird = 'NOAA radio flux obtained through LISIRD'
    swpc = ''.join(['Prepared by the U.S. Dept. of Commerce, NOAA, Space ',
                    'Weather Prediction Center'])

    ackn = {'historic': lisird, 'prelim': swpc, 'daily': swpc,
            'forecast': swpc, '45day': swpc}

    return ackn[tag]


def references(tag):
    """Define the references for the F10.7 data.

    Parameters
    ----------
    tag : str
        Instrument tag for the F10.7 data.

    Returns
    -------
    refs : str
        Reference string associated with the appropriate F10.7 tag.

    """
    noaa_desc = ''.join(['Dataset description: ',
                         'https://www.ngdc.noaa.gov/stp/space-weather/',
                         'solar-data/solar-features/solar-radio/noontime-flux',
                         '/penticton/documentation/dataset-description',
                         '_penticton.pdf, accessed Dec 2020'])
    orig_ref = ''.join(["Covington, A.E. (1948), Solar noise observations on",
                        " 10.7 centimetersSolar noise observations on 10.7 ",
                        "centimeters, Proceedings of the IRE, 36(44), ",
                        "p 454-457."])
    swpc_desc = ''.join(['Dataset description: https://www.swpc.noaa.gov/',
                         'sites/default/files/images/u2/Usr_guide.pdf'])

    refs = {'historic': "\n".join([noaa_desc, orig_ref]),
            'prelim': "\n".join([swpc_desc, orig_ref]),
            'daily': "\n".join([swpc_desc, orig_ref]),
            'forecast': "\n".join([swpc_desc, orig_ref]),
            '45day': "\n".join([swpc_desc, orig_ref])}

    return refs[tag]


def combine_f107(standard_inst, forecast_inst, start=None, stop=None):
    """Combine the output from the measured and forecasted F10.7 sources.

    Parameters
    ----------
    standard_inst : pysat.Instrument or NoneType
        Instrument object containing data for the 'sw' platform, 'f107' name,
        and 'historic', 'prelim', or 'daily' tag
    forecast_inst : pysat.Instrument or NoneType
        Instrument object containing data for the 'sw' platform, 'f107' name,
        and 'prelim', '45day' or 'forecast' tag
    start : dt.datetime or NoneType
        Starting time for combining data, or None to use earliest loaded
        date from the pysat Instruments (default=None)
    stop : dt.datetime or NoneType
        Ending time for combining data, or None to use the latest loaded date
        from the pysat Instruments (default=None)

    Returns
    -------
    f107_inst : pysat.Instrument
        Instrument object containing F10.7 observations for the desired period
        of time, merging the standard, 45day, and forecasted values based on
        their reliability

    Raises
    ------
    ValueError
        If appropriate time data is not supplied, or if the date range is badly
        formed.

    Notes
    -----
    Merging prioritizes the standard data, then the 45day data, and finally
    the forecast data

    Will not attempt to download any missing data, but will load data

    """

    # Initialize metadata and flags
    notes = "Combines data from"
    stag = standard_inst.tag if len(standard_inst.tag) > 0 else 'default'
    tag = 'combined_{:s}_{:s}'.format(stag, forecast_inst.tag)
    inst_flag = 'standard'

    # If the start or stop times are not defined, get them from the Instruments
    if start is None:
        stimes = [inst.index.min() for inst in [standard_inst, forecast_inst]
                  if len(inst.index) > 0]
        start = min(stimes) if len(stimes) > 0 else None

    if stop is None:
        stimes = [inst.index.max() for inst in [standard_inst, forecast_inst]
                  if len(inst.index) > 0]
        stop = max(stimes) + pds.DateOffset(days=1) if len(stimes) > 0 else None

    if start is None or stop is None:
        raise ValueError(' '.join(("must either load in Instrument objects or",
                                   "provide starting and ending times")))

    if start >= stop:
        raise ValueError("date range is zero or negative")

    # Initialize the output instrument
    f107_inst = pysat.Instrument()
    f107_inst.inst_module = pysat_sw.instruments.sw_f107
    f107_inst.tag = tag
    f107_inst.date = start
    f107_inst.doy = np.int64(start.strftime("%j"))
    fill_val = None

    f107_times = list()
    f107_values = list()

    # Cycle through the desired time range
    itime = start

    while itime < stop and inst_flag is not None:
        # Load and save the standard data for as many times as possible
        if inst_flag == 'standard':
            # Test to see if data loading is needed
            if not np.any(standard_inst.index == itime):
                # Set the load kwargs, which vary by pysat version and tag
                load_kwargs = {'date': itime}

                if Version(pysat.__version__) > Version('3.0.1'):
                    load_kwargs['use_header'] = True

                if standard_inst.tag == 'daily':
                    # Add 30 days
                    load_kwargs['date'] += dt.timedelta(days=30)

                standard_inst.load(**load_kwargs)

            if standard_inst.empty:
                good_times = [False]
            else:
                good_times = ((standard_inst.index >= itime)
                              & (standard_inst.index < stop))

            if notes.find("standard") < 0:
                notes += " the {:} source ({:} to ".format(inst_flag,
                                                           itime.date())

            if np.any(good_times):
                if fill_val is None:
                    f107_inst.meta = standard_inst.meta
                    fill_val = f107_inst.meta['f107'][
                        f107_inst.meta.labels.fill_val]

                good_vals = standard_inst['f107'][good_times] != fill_val
                new_times = list(standard_inst.index[good_times][good_vals])
                f107_times.extend(new_times)
                new_vals = list(standard_inst['f107'][good_times][good_vals])
                f107_values.extend(new_vals)
                itime = f107_times[-1] + pds.DateOffset(days=1)
            else:
                inst_flag = 'forecast'
                notes += "{:})".format(itime.date())

        # Load and save the forecast data for as many times as possible
        if inst_flag == "forecast":
            # Determine which files should be loaded
            if len(forecast_inst.index) == 0:
                files = np.unique(forecast_inst.files.files[itime:stop])
            else:
                files = [None]  # No load needed, if already initialized

            # Cycle through all possible files of interest, saving relevant
            # data
            for filename in files:
                if filename is not None:
                    load_kwargs = {'fname': filename}
                    if Version(pysat.__version__) > Version('3.0.1'):
                        load_kwargs['use_header'] = True

                    forecast_inst.load(**load_kwargs)

                if notes.find("forecast") < 0:
                    notes += " the {:} source ({:} to ".format(inst_flag,
                                                               itime.date())

                # Check in case there was a problem with the standard data
                if fill_val is None:
                    f107_inst.meta = forecast_inst.meta
                    fill_val = f107_inst.meta['f107'][
                        f107_inst.meta.labels.fill_val]

                # Determine which times to save
                if forecast_inst.empty:
                    good_vals = []
                else:
                    good_times = ((forecast_inst.index >= itime)
                                  & (forecast_inst.index < stop))
                    good_vals = forecast_inst['f107'][good_times] != fill_val

                # Save desired data and cycle time
                if len(good_vals) > 0:
                    new_times = list(
                        forecast_inst.index[good_times][good_vals])
                    f107_times.extend(new_times)
                    new_vals = list(
                        forecast_inst['f107'][good_times][good_vals])
                    f107_values.extend(new_vals)
                    itime = f107_times[-1] + pds.DateOffset(days=1)

            notes += "{:})".format(itime.date())

            inst_flag = None

    if inst_flag is not None:
        notes += "{:})".format(itime.date())

    # Determine if the beginning or end of the time series needs to be padded
    if len(f107_times) >= 2:
        freq = pysat.utils.time.calc_freq(f107_times)
    else:
        freq = None
    end_date = stop - pds.DateOffset(days=1)
    date_range = pds.date_range(start=start, end=end_date, freq=freq)

    if len(f107_times) == 0:
        f107_times = date_range

    date_range = pds.date_range(start=start, end=end_date, freq=freq)

    if date_range[0] < f107_times[0]:
        # Extend the time and value arrays from their beginning with fill
        # values
        itime = abs(date_range - f107_times[0]).argmin()
        f107_times.reverse()
        f107_values.reverse()
        extend_times = list(date_range[:itime])
        extend_times.reverse()
        f107_times.extend(extend_times)
        f107_values.extend([fill_val for kk in extend_times])
        f107_times.reverse()
        f107_values.reverse()

    if date_range[-1] > f107_times[-1]:
        # Extend the time and value arrays from their end with fill values
        itime = abs(date_range - f107_times[-1]).argmin() + 1
        extend_times = list(date_range[itime:])
        f107_times.extend(extend_times)
        f107_values.extend([fill_val for kk in extend_times])

    # Save output data
    f107_inst.data = pds.DataFrame(f107_values, columns=['f107'],
                                   index=f107_times)

    # Resample the output data, filling missing values
    if (date_range.shape != f107_inst.index.shape
            or abs(date_range - f107_inst.index).max().total_seconds() > 0.0):
        f107_inst.data = f107_inst.data.resample(freq).fillna(method=None)
        if np.isfinite(fill_val):
            f107_inst.data[np.isnan(f107_inst.data)] = fill_val

    # Update the metadata notes for this procedure
    notes += ", in that order"
    f107_inst.meta['f107'] = {f107_inst.meta.labels.notes: notes}

    return f107_inst


def parse_45day_block(block_lines):
    """Parse the data blocks used in the 45-day Ap and F10.7 Flux Forecast file.

    Parameters
    ----------
    block_lines : list
        List of lines containing data in this data block

    Returns
    -------
    dates : list
        List of dates for each date/data pair in this block
    values : list
        List of values for each date/data pair in this block

    """

    # Initialize the output
    dates = list()
    values = list()

    # Cycle through each line in this block
    for line in block_lines:
        # Split the line on whitespace
        split_line = line.split()

        # Format the dates
        dates.extend([dt.datetime.strptime(tt, "%d%b%y")
                      for tt in split_line[::2]])

        # Format the data values
        values.extend([np.int64(vv) for vv in split_line[1::2]])

    return dates, values


def rewrite_daily_file(year, outfile, lines):
    """Rewrite the SWPC Daily Solar Data files.

    Parameters
    ----------
    year : int
        Year of data file (format changes based on date)
    outfile : str
        Output filename
    lines : str
        String containing all output data (result of 'read')

    """

    # Get to the solar index data
    if year > 2000:
        raw_data = lines.split('#---------------------------------')[-1]
        raw_data = raw_data.split('\n')[1:-1]
        optical = True
    else:
        raw_data = lines.split('# ')[-1]
        raw_data = raw_data.split('\n')
        optical = False if raw_data[0].find('Not Available') or year == 1994 \
            else True
        istart = 7 if year < 2000 else 1
        raw_data = raw_data[istart:-1]

    # Parse the data
    solar_times, data_dict = parse_daily_solar_data(raw_data, year, optical)

    # Collect into DataFrame
    data = pds.DataFrame(data_dict, index=solar_times,
                         columns=data_dict.keys())

    # Write out as a file
    data.to_csv(outfile, header=True)

    return


def parse_daily_solar_data(data_lines, year, optical):
    """Parse the data in the SWPC daily solar index file.

    Parameters
    ----------
    data_lines : list
        List of lines containing data
    year : list
        Year of file
    optical : boolean
        Flag denoting whether or not optical data is available

    Returns
    -------
    dates : list
        List of dates for each date/data pair in this block
    values : dict
        Dict of lists of values, where each key is the value name

    """

    # Initialize the output
    dates = list()
    val_keys = ['f107', 'ssn', 'ss_area', 'new_reg', 'smf', 'goes_bgd_flux',
                'c_flare', 'm_flare', 'x_flare', 'o1_flare', 'o2_flare',
                'o3_flare']
    optical_keys = ['o1_flare', 'o2_flare', 'o3_flare']
    xray_keys = ['c_flare', 'm_flare', 'x_flare']
    values = {kk: list() for kk in val_keys}

    # Cycle through each line in this file
    for line in data_lines:
        # Split the line on whitespace
        split_line = line.split()

        # Format the date
        dfmt = "%Y %m %d" if year > 1996 else "%d %b %y"
        dates.append(dt.datetime.strptime(" ".join(split_line[0:3]), dfmt))

        # Format the data values
        j = 0
        for i, kk in enumerate(val_keys):
            if year == 1994 and kk == 'new_reg':
                # New regions only in files after 1994
                val = -999
            elif((year == 1994 and kk in xray_keys)
                 or (not optical and kk in optical_keys)):
                # X-ray flares in files after 1994, optical flares come later
                val = -1
            else:
                val = split_line[j + 3]
                j += 1

            if kk != 'goes_bgd_flux':
                if val == "*":
                    val = -999 if i < 5 else -1
                else:
                    val = np.int64(val)
            values[kk].append(val)

    return dates, values


def calc_f107a(f107_inst, f107_name='f107', f107a_name='f107a', min_pnts=41):
    """Calculate the 81 day mean F10.7.

    Parameters
    ----------
    f107_inst : pysat.Instrument
        pysat Instrument holding the F10.7 data
    f107_name : str
        Data column name for the F10.7 data (default='f107')
    f107a_name : str
        Data column name for the F10.7a data (default='f107a')
    min_pnts : int
        Minimum number of points required to calculate an average (default=41)

    Note
    ----
    Will not pad data on its own

    """

    # Test to see that the input data is present
    if f107_name not in f107_inst.data.columns:
        raise ValueError("unknown input data column: " + f107_name)

    # Test to see that the output data does not already exist
    if f107a_name in f107_inst.data.columns:
        raise ValueError("output data column already exists: " + f107a_name)

    if f107_name in f107_inst.meta:
        fill_val = f107_inst.meta[f107_name][f107_inst.meta.labels.fill_val]
    else:
        fill_val = np.nan

    # Calculate the rolling mean.  Since these values are centered but rolling
    # function doesn't allow temporal windows to be calculated this way, create
    # a hack for this.
    #
    # Ensure the data are evenly sampled at a daily frequency, since this is
    # how often F10.7 is calculated.
    f107_fill = f107_inst.data.resample('1D').fillna(method=None)

    # Replace the time index with an ordinal
    time_ind = f107_fill.index
    f107_fill['ord'] = pds.Series([tt.toordinal() for tt in time_ind],
                                  index=time_ind)
    f107_fill.set_index('ord', inplace=True)

    # Calculate the mean
    f107_fill[f107a_name] = f107_fill[f107_name].rolling(window=81,
                                                         min_periods=min_pnts,
                                                         center=True).mean()

    # Replace the ordinal index with the time
    f107_fill['time'] = pds.Series(time_ind, index=f107_fill.index)
    f107_fill.set_index('time', inplace=True)

    # Resample to the original frequency, if it is not equal to 1 day
    freq = pysat.utils.time.calc_freq(f107_inst.index)
    if freq != "86400S":
        # Resample to the desired frequency
        f107_fill = f107_fill.resample(freq).ffill()

        # Save the output in a list
        f107a = list(f107_fill[f107a_name])

        # Fill any dates that fall
        time_ind = pds.date_range(f107_fill.index[0], f107_inst.index[-1],
                                  freq=freq)
        for itime in time_ind[f107_fill.index.shape[0]:]:
            if (itime - f107_fill.index[-1]).total_seconds() < 86400.0:
                f107a.append(f107a[-1])
            else:
                f107a.append(fill_val)

        # Redefine the Series
        f107_fill = pds.DataFrame({f107a_name: f107a}, index=time_ind)

    # There may be missing days in the output data, remove these
    if f107_inst.index.shape < f107_fill.index.shape:
        f107_fill = f107_fill.loc[f107_inst.index]

    # Save the data
    f107_inst[f107a_name] = f107_fill[f107a_name]

    # Update the metadata
    meta_dict = {f107_inst.meta.labels.units: 'SFU',
                 f107_inst.meta.labels.name: 'F10.7a',
                 f107_inst.meta.labels.desc: "81-day centered average of F10.7",
                 f107_inst.meta.labels.min_val: 0.0,
                 f107_inst.meta.labels.max_val: np.nan,
                 f107_inst.meta.labels.fill_val: fill_val,
                 f107_inst.meta.labels.notes:
                 ' '.join(('Calculated using data between',
                           '{:} and {:}'.format(f107_inst.index[0],
                                                f107_inst.index[-1])))}

    f107_inst.meta[f107a_name] = meta_dict

    return

1	#!/usr/bin/env python
2	# -- coding: utf-8 --.
3	# Full license can be found in License.md
4	# Full author list can be found in .zenodo.json file
5	# DOI:10.5281/zenodo.3986138
6	# ----------------------------------------------------------------------------
7
8	"""Routines for the F10.7 solar index."""	8✔
9
10	import datetime as dt	8✔
11	import numpy as np	8✔
12	from packaging.version import Version	8✔
13
14	import pandas as pds	8✔
15	import pysat	8✔
16
17	import pysatSpaceWeather as pysat_sw	8✔
18
19
20	def acknowledgements(tag):	8✔
21	"""Define the acknowledgements for the F10.7 data.
22
23	Parameters
24	----------
25	tag : str
26	Tag of the space waether index
27
28	Returns
29	-------
30	ackn : str
31	Acknowledgements string associated with the appropriate F10.7 tag.
32
33	"""
34	lisird = 'NOAA radio flux obtained through LISIRD'	8✔
35	swpc = ''.join(['Prepared by the U.S. Dept. of Commerce, NOAA, Space ',	8✔
36	'Weather Prediction Center'])
37
38	ackn = {'historic': lisird, 'prelim': swpc, 'daily': swpc,	8✔
39	'forecast': swpc, '45day': swpc}
40
41	return ackn[tag]	8✔
42
43
44	def references(tag):	8✔
45	"""Define the references for the F10.7 data.
46
47	Parameters
48	----------
49	tag : str
50	Instrument tag for the F10.7 data.
51
52	Returns
53	-------
54	refs : str
55	Reference string associated with the appropriate F10.7 tag.
56
57	"""
58	noaa_desc = ''.join(['Dataset description: ',	8✔
59	'https://www.ngdc.noaa.gov/stp/space-weather/',
60	'solar-data/solar-features/solar-radio/noontime-flux',
61	'/penticton/documentation/dataset-description',
62	'_penticton.pdf, accessed Dec 2020'])
63	orig_ref = ''.join(["Covington, A.E. (1948), Solar noise observations on",	8✔
64	" 10.7 centimetersSolar noise observations on 10.7 ",
65	"centimeters, Proceedings of the IRE, 36(44), ",
66	"p 454-457."])
67	swpc_desc = ''.join(['Dataset description: https://www.swpc.noaa.gov/',	8✔
68	'sites/default/files/images/u2/Usr_guide.pdf'])
69
70	refs = {'historic': "\n".join([noaa_desc, orig_ref]),	8✔
71	'prelim': "\n".join([swpc_desc, orig_ref]),
72	'daily': "\n".join([swpc_desc, orig_ref]),
73	'forecast': "\n".join([swpc_desc, orig_ref]),
74	'45day': "\n".join([swpc_desc, orig_ref])}
75
76	return refs[tag]	8✔
77
78
79	def combine_f107(standard_inst, forecast_inst, start=None, stop=None):	8✔
80	"""Combine the output from the measured and forecasted F10.7 sources.
81
82	Parameters
83	----------
84	standard_inst : pysat.Instrument or NoneType
85	Instrument object containing data for the 'sw' platform, 'f107' name,
86	and 'historic', 'prelim', or 'daily' tag
87	forecast_inst : pysat.Instrument or NoneType
88	Instrument object containing data for the 'sw' platform, 'f107' name,
89	and 'prelim', '45day' or 'forecast' tag
90	start : dt.datetime or NoneType
91	Starting time for combining data, or None to use earliest loaded
92	date from the pysat Instruments (default=None)
93	stop : dt.datetime or NoneType
94	Ending time for combining data, or None to use the latest loaded date
95	from the pysat Instruments (default=None)
96
97	Returns
98	-------
99	f107_inst : pysat.Instrument
100	Instrument object containing F10.7 observations for the desired period
101	of time, merging the standard, 45day, and forecasted values based on
102	their reliability
103
104	Raises
105	------
106	ValueError
107	If appropriate time data is not supplied, or if the date range is badly
108	formed.
109
110	Notes
111	-----
112	Merging prioritizes the standard data, then the 45day data, and finally
113	the forecast data
114
115	Will not attempt to download any missing data, but will load data
116
117	"""
118
119	# Initialize metadata and flags
120	notes = "Combines data from"	8✔
121	stag = standard_inst.tag if len(standard_inst.tag) > 0 else 'default'	8✔
122	tag = 'combined_{:s}_{:s}'.format(stag, forecast_inst.tag)	8✔
123	inst_flag = 'standard'	8✔
124
125	# If the start or stop times are not defined, get them from the Instruments
126	if start is None:	8✔
127	stimes = [inst.index.min() for inst in [standard_inst, forecast_inst]	8✔
128	if len(inst.index) > 0]
129	start = min(stimes) if len(stimes) > 0 else None	8✔
130
131	if stop is None:	8✔
132	stimes = [inst.index.max() for inst in [standard_inst, forecast_inst]	8✔
133	if len(inst.index) > 0]
134	stop = max(stimes) + pds.DateOffset(days=1) if len(stimes) > 0 else None	8✔
135
136	if start is None or stop is None:	8✔
137	raise ValueError(' '.join(("must either load in Instrument objects or",	8✔
138	"provide starting and ending times")))
139
140	if start >= stop:	8✔
141	raise ValueError("date range is zero or negative")	8✔
142
143	# Initialize the output instrument
144	f107_inst = pysat.Instrument()	8✔
145	f107_inst.inst_module = pysat_sw.instruments.sw_f107	8✔
146	f107_inst.tag = tag	8✔
147	f107_inst.date = start	8✔
148	f107_inst.doy = np.int64(start.strftime("%j"))	8✔
149	fill_val = None	8✔
150
151	f107_times = list()	8✔
152	f107_values = list()	8✔
153
154	# Cycle through the desired time range
155	itime = start	8✔
156
157	while itime < stop and inst_flag is not None:	8✔
158	# Load and save the standard data for as many times as possible
159	if inst_flag == 'standard':	8✔
160	# Test to see if data loading is needed
161	if not np.any(standard_inst.index == itime):	8✔
162	# Set the load kwargs, which vary by pysat version and tag
163	load_kwargs = {'date': itime}	8✔
164
165	if Version(pysat.__version__) > Version('3.0.1'):	8✔
166	load_kwargs['use_header'] = True	8✔
167
168	if standard_inst.tag == 'daily':	8✔
169	# Add 30 days
170	load_kwargs['date'] += dt.timedelta(days=30)	×
171
172	standard_inst.load(**load_kwargs)	8✔
173
174	if standard_inst.empty:	8✔
175	good_times = [False]	8✔
176	else:
177	good_times = ((standard_inst.index >= itime)	8✔
178	& (standard_inst.index < stop))
179
180	if notes.find("standard") < 0:	8✔
181	notes += " the {:} source ({:} to ".format(inst_flag,	8✔
182	itime.date())
183
184	if np.any(good_times):	8✔
185	if fill_val is None:	8✔
186	f107_inst.meta = standard_inst.meta	8✔
187	fill_val = f107_inst.meta['f107'][	8✔
188	f107_inst.meta.labels.fill_val]
189
190	good_vals = standard_inst['f107'][good_times] != fill_val	8✔
191	new_times = list(standard_inst.index[good_times][good_vals])	8✔
192	f107_times.extend(new_times)	8✔
193	new_vals = list(standard_inst['f107'][good_times][good_vals])	8✔
194	f107_values.extend(new_vals)	8✔
195	itime = f107_times[-1] + pds.DateOffset(days=1)	8✔
196	else:
197	inst_flag = 'forecast'	8✔
198	notes += "{:})".format(itime.date())	8✔
199
200	# Load and save the forecast data for as many times as possible
201	if inst_flag == "forecast":	8✔
202	# Determine which files should be loaded
203	if len(forecast_inst.index) == 0:	8✔
204	files = np.unique(forecast_inst.files.files[itime:stop])	8✔
205	else:
206	files = [None] # No load needed, if already initialized	8✔
207
208	# Cycle through all possible files of interest, saving relevant
209	# data
210	for filename in files:	8✔
211	if filename is not None:	8✔
212	load_kwargs = {'fname': filename}	8✔
213	if Version(pysat.__version__) > Version('3.0.1'):	8✔
214	load_kwargs['use_header'] = True	8✔
215
216	forecast_inst.load(**load_kwargs)	8✔
217
218	if notes.find("forecast") < 0:	8✔
219	notes += " the {:} source ({:} to ".format(inst_flag,	8✔
220	itime.date())
221
222	# Check in case there was a problem with the standard data
223	if fill_val is None:	8✔
224	f107_inst.meta = forecast_inst.meta	8✔
225	fill_val = f107_inst.meta['f107'][	8✔
226	f107_inst.meta.labels.fill_val]
227
228	# Determine which times to save
229	if forecast_inst.empty:	8✔
230	good_vals = []	×
231	else:
232	good_times = ((forecast_inst.index >= itime)	8✔
233	& (forecast_inst.index < stop))
234	good_vals = forecast_inst['f107'][good_times] != fill_val	8✔
235
236	# Save desired data and cycle time
237	if len(good_vals) > 0:	8✔
238	new_times = list(	8✔
239	forecast_inst.index[good_times][good_vals])
240	f107_times.extend(new_times)	8✔
241	new_vals = list(	8✔
242	forecast_inst['f107'][good_times][good_vals])
243	f107_values.extend(new_vals)	8✔
244	itime = f107_times[-1] + pds.DateOffset(days=1)	8✔
245
246	notes += "{:})".format(itime.date())	8✔
247
248	inst_flag = None	8✔
249
250	if inst_flag is not None:	8✔
251	notes += "{:})".format(itime.date())	×
252
253	# Determine if the beginning or end of the time series needs to be padded
254	if len(f107_times) >= 2:	8✔
255	freq = pysat.utils.time.calc_freq(f107_times)	8✔
256	else:
257	freq = None	8✔
258	end_date = stop - pds.DateOffset(days=1)	8✔
259	date_range = pds.date_range(start=start, end=end_date, freq=freq)	8✔
260
261	if len(f107_times) == 0:	8✔
262	f107_times = date_range	8✔
263
264	date_range = pds.date_range(start=start, end=end_date, freq=freq)	8✔
265
266	if date_range[0] < f107_times[0]:	8✔
267	# Extend the time and value arrays from their beginning with fill
268	# values
269	itime = abs(date_range - f107_times[0]).argmin()	8✔
270	f107_times.reverse()	8✔
271	f107_values.reverse()	8✔
272	extend_times = list(date_range[:itime])	8✔
273	extend_times.reverse()	8✔
274	f107_times.extend(extend_times)	8✔
275	f107_values.extend([fill_val for kk in extend_times])	8✔
276	f107_times.reverse()	8✔
277	f107_values.reverse()	8✔
278
279	if date_range[-1] > f107_times[-1]:	8✔
280	# Extend the time and value arrays from their end with fill values
281	itime = abs(date_range - f107_times[-1]).argmin() + 1	×
282	extend_times = list(date_range[itime:])	×
283	f107_times.extend(extend_times)	×
284	f107_values.extend([fill_val for kk in extend_times])	×
285
286	# Save output data
287	f107_inst.data = pds.DataFrame(f107_values, columns=['f107'],	8✔
288	index=f107_times)
289
290	# Resample the output data, filling missing values
291	if (date_range.shape != f107_inst.index.shape	8✔
292	or abs(date_range - f107_inst.index).max().total_seconds() > 0.0):
293	f107_inst.data = f107_inst.data.resample(freq).fillna(method=None)	8✔
294	if np.isfinite(fill_val):	8✔
295	f107_inst.data[np.isnan(f107_inst.data)] = fill_val	×
296
297	# Update the metadata notes for this procedure
298	notes += ", in that order"	8✔
299	f107_inst.meta['f107'] = {f107_inst.meta.labels.notes: notes}	8✔
300
301	return f107_inst	8✔
302
303
304	def parse_45day_block(block_lines):	8✔
305	"""Parse the data blocks used in the 45-day Ap and F10.7 Flux Forecast file.
306
307	Parameters
308	----------
309	block_lines : list
310	List of lines containing data in this data block
311
312	Returns
313	-------
314	dates : list
315	List of dates for each date/data pair in this block
316	values : list
317	List of values for each date/data pair in this block
318
319	"""
320
321	# Initialize the output
322	dates = list()	8✔
323	values = list()	8✔
324
325	# Cycle through each line in this block
326	for line in block_lines:	8✔
327	# Split the line on whitespace
328	split_line = line.split()	8✔
329
330	# Format the dates
331	dates.extend([dt.datetime.strptime(tt, "%d%b%y")	8✔
332	for tt in split_line[::2]])
333
334	# Format the data values
335	values.extend([np.int64(vv) for vv in split_line[1::2]])	8✔
336
337	return dates, values	8✔
338
339
340	def rewrite_daily_file(year, outfile, lines):	8✔
341	"""Rewrite the SWPC Daily Solar Data files.
342
343	Parameters
344	----------
345	year : int
346	Year of data file (format changes based on date)
347	outfile : str
348	Output filename
349	lines : str
350	String containing all output data (result of 'read')
351
352	"""
353
354	# Get to the solar index data
355	if year > 2000:	8✔
356	raw_data = lines.split('#---------------------------------')[-1]	8✔
357	raw_data = raw_data.split('\n')[1:-1]	8✔
358	optical = True	8✔
359	else:
360	raw_data = lines.split('# ')[-1]	×
361	raw_data = raw_data.split('\n')	×
362	optical = False if raw_data[0].find('Not Available') or year == 1994 \	×
363	else True
364	istart = 7 if year < 2000 else 1	×
365	raw_data = raw_data[istart:-1]	×
366
367	# Parse the data
368	solar_times, data_dict = parse_daily_solar_data(raw_data, year, optical)	8✔
369
370	# Collect into DataFrame
371	data = pds.DataFrame(data_dict, index=solar_times,	8✔
372	columns=data_dict.keys())
373
374	# Write out as a file
375	data.to_csv(outfile, header=True)	8✔
376
377	return	8✔
378
379
380	def parse_daily_solar_data(data_lines, year, optical):	8✔
381	"""Parse the data in the SWPC daily solar index file.
382
383	Parameters
384	----------
385	data_lines : list
386	List of lines containing data
387	year : list
388	Year of file
389	optical : boolean
390	Flag denoting whether or not optical data is available
391
392	Returns
393	-------
394	dates : list
395	List of dates for each date/data pair in this block
396	values : dict
397	Dict of lists of values, where each key is the value name
398
399	"""
400
401	# Initialize the output
402	dates = list()	8✔
403	val_keys = ['f107', 'ssn', 'ss_area', 'new_reg', 'smf', 'goes_bgd_flux',	8✔
404	'c_flare', 'm_flare', 'x_flare', 'o1_flare', 'o2_flare',
405	'o3_flare']
406	optical_keys = ['o1_flare', 'o2_flare', 'o3_flare']	8✔
407	xray_keys = ['c_flare', 'm_flare', 'x_flare']	8✔
408	values = {kk: list() for kk in val_keys}	8✔
409
410	# Cycle through each line in this file
411	for line in data_lines:	8✔
412	# Split the line on whitespace
413	split_line = line.split()	8✔
414
415	# Format the date
416	dfmt = "%Y %m %d" if year > 1996 else "%d %b %y"	8✔
417	dates.append(dt.datetime.strptime(" ".join(split_line[0:3]), dfmt))	8✔
418
419	# Format the data values
420	j = 0	8✔
421	for i, kk in enumerate(val_keys):	8✔
422	if year == 1994 and kk == 'new_reg':	8✔
423	# New regions only in files after 1994
424	val = -999	×
425	elif((year == 1994 and kk in xray_keys)	8✔
426	or (not optical and kk in optical_keys)):
427	# X-ray flares in files after 1994, optical flares come later
428	val = -1	×
429	else:
430	val = split_line[j + 3]	8✔
431	j += 1	8✔
432
433	if kk != 'goes_bgd_flux':	8✔
434	if val == "*":	8✔
435	val = -999 if i < 5 else -1	×
436	else:
437	val = np.int64(val)	8✔
438	values[kk].append(val)	8✔
439
440	return dates, values	8✔
441
442
443	def calc_f107a(f107_inst, f107_name='f107', f107a_name='f107a', min_pnts=41):	8✔
444	"""Calculate the 81 day mean F10.7.
445
446	Parameters
447	----------
448	f107_inst : pysat.Instrument
449	pysat Instrument holding the F10.7 data
450	f107_name : str
451	Data column name for the F10.7 data (default='f107')
452	f107a_name : str
453	Data column name for the F10.7a data (default='f107a')
454	min_pnts : int
455	Minimum number of points required to calculate an average (default=41)
456
457	Note
458	----
459	Will not pad data on its own
460
461	"""
462
463	# Test to see that the input data is present
464	if f107_name not in f107_inst.data.columns:	8✔
465	raise ValueError("unknown input data column: " + f107_name)	8✔
466
467	# Test to see that the output data does not already exist
468	if f107a_name in f107_inst.data.columns:	8✔
469	raise ValueError("output data column already exists: " + f107a_name)	8✔
470
471	if f107_name in f107_inst.meta:	8✔
472	fill_val = f107_inst.meta[f107_name][f107_inst.meta.labels.fill_val]	×
473	else:
474	fill_val = np.nan	8✔
475
476	# Calculate the rolling mean. Since these values are centered but rolling
477	# function doesn't allow temporal windows to be calculated this way, create
478	# a hack for this.
479	#
480	# Ensure the data are evenly sampled at a daily frequency, since this is
481	# how often F10.7 is calculated.
482	f107_fill = f107_inst.data.resample('1D').fillna(method=None)	8✔
483
484	# Replace the time index with an ordinal
485	time_ind = f107_fill.index	8✔
486	f107_fill['ord'] = pds.Series([tt.toordinal() for tt in time_ind],	8✔
487	index=time_ind)
488	f107_fill.set_index('ord', inplace=True)	8✔
489
490	# Calculate the mean
491	f107_fill[f107a_name] = f107_fill[f107_name].rolling(window=81,	8✔
492	min_periods=min_pnts,
493	center=True).mean()
494
495	# Replace the ordinal index with the time
496	f107_fill['time'] = pds.Series(time_ind, index=f107_fill.index)	8✔
497	f107_fill.set_index('time', inplace=True)	8✔
498
499	# Resample to the original frequency, if it is not equal to 1 day
500	freq = pysat.utils.time.calc_freq(f107_inst.index)	8✔
501	if freq != "86400S":	8✔
502	# Resample to the desired frequency
503	f107_fill = f107_fill.resample(freq).ffill()	8✔
504
505	# Save the output in a list
506	f107a = list(f107_fill[f107a_name])	8✔
507
508	# Fill any dates that fall
509	time_ind = pds.date_range(f107_fill.index[0], f107_inst.index[-1],	8✔
510	freq=freq)
511	for itime in time_ind[f107_fill.index.shape[0]:]:	8✔
512	if (itime - f107_fill.index[-1]).total_seconds() < 86400.0:	8✔
513	f107a.append(f107a[-1])	8✔
514	else:
515	f107a.append(fill_val)	×
516
517	# Redefine the Series
518	f107_fill = pds.DataFrame({f107a_name: f107a}, index=time_ind)	8✔
519
520	# There may be missing days in the output data, remove these
521	if f107_inst.index.shape < f107_fill.index.shape:	8✔
522	f107_fill = f107_fill.loc[f107_inst.index]	×
523
524	# Save the data
525	f107_inst[f107a_name] = f107_fill[f107a_name]	8✔
526
527	# Update the metadata
528	meta_dict = {f107_inst.meta.labels.units: 'SFU',	8✔
529	f107_inst.meta.labels.name: 'F10.7a',
530	f107_inst.meta.labels.desc: "81-day centered average of F10.7",
531	f107_inst.meta.labels.min_val: 0.0,
532	f107_inst.meta.labels.max_val: np.nan,
533	f107_inst.meta.labels.fill_val: fill_val,
534	f107_inst.meta.labels.notes:
535	' '.join(('Calculated using data between',
536	'{:} and {:}'.format(f107_inst.index[0],
537	f107_inst.index[-1])))}
538
539	f107_inst.meta[f107a_name] = meta_dict	8✔
540
541	return	8✔

pysat / pysatSpaceWeather / 5144805366

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous