8614163418

Committed 09 Apr 2024 10:24AM CUT coverage: 81.03% (+0.2%) from 80.813%

Build # 8614163418

Build Type

Pull #343

github

Committed by

web-flow

Commit Message

Merge 1fd684f5b into 93062a244

Pull Request Pull Request #343: Apply SAITS embedding strategy to new added models

Run Details

79 of 80 new or added lines in 10 files covered. (98.75%)

2 existing lines in 1 file now uncovered.

6847 of 8450 relevant lines covered (81.03%)

4.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

15.79

/pypots/data/load_preprocessing.py

"""
Preprocessing functions to load supported open-source time-series datasets.
"""

# Created by Wenjie Du <wenjay.du@gmail.com>
# License: BSD-3-Clause

import pandas as pd


def preprocess_physionet2012(data: dict) -> dict:
    """The preprocessing function for dataset PhysioNet-2012.

    Parameters
    ----------
    data :
        A data dict from tsdb.load_dataset().

    Returns
    -------
    dataset :
        A dict containing processed data, including:
            X : pandas.DataFrame,
                A dataframe contains all time series vectors from 11988 patients, distinguished by column `RecordID`.
            y : pandas.Series
                The 11988 classification labels of all patients, indicating whether they were deceased.
    """
    data["static_features"].remove("ICUType")  # keep ICUType for now
    # remove the other static features, e.g. age, gender
    X = data["X"].drop(data["static_features"], axis=1)

    def apply_func(df_temp):  # pad and truncate to set the max length of samples as 48
        missing = list(set(range(0, 48)).difference(set(df_temp["Time"])))
        missing_part = pd.DataFrame({"Time": missing})
        df_temp = pd.concat(
            [df_temp, missing_part], ignore_index=False, sort=False
        )  # pad the sample's length to 48 if it doesn't have enough time steps
        df_temp = df_temp.set_index("Time").sort_index().reset_index()
        df_temp = df_temp.iloc[:48]  # truncate
        return df_temp

    X = X.groupby("RecordID").apply(apply_func)
    X = X.drop("RecordID", axis=1)
    X = X.reset_index()
    ICUType = X[["RecordID", "ICUType"]].set_index("RecordID").dropna()
    X = X.drop(["level_1", "ICUType"], axis=1)

    dataset = {
        "X": X,
        "y": data["y"],
        "ICUType": ICUType,
    }

    return dataset

1	"""	6✔
2	Preprocessing functions to load supported open-source time-series datasets.
3	"""
4
5	# Created by Wenjie Du <wenjay.du@gmail.com>
6	# License: BSD-3-Clause
7
8	import pandas as pd	6✔
9
10
11	def preprocess_physionet2012(data: dict) -> dict:	6✔
12	"""The preprocessing function for dataset PhysioNet-2012.
13
14	Parameters
15	----------
16	data :
17	A data dict from tsdb.load_dataset().
18
19	Returns
20	-------
21	dataset :
22	A dict containing processed data, including:
23	X : pandas.DataFrame,
24	A dataframe contains all time series vectors from 11988 patients, distinguished by column `RecordID`.
25	y : pandas.Series
26	The 11988 classification labels of all patients, indicating whether they were deceased.
27	"""
28	data["static_features"].remove("ICUType") # keep ICUType for now	×
29	# remove the other static features, e.g. age, gender
30	X = data["X"].drop(data["static_features"], axis=1)	×
31
32	def apply_func(df_temp): # pad and truncate to set the max length of samples as 48	×
33	missing = list(set(range(0, 48)).difference(set(df_temp["Time"])))	×
34	missing_part = pd.DataFrame({"Time": missing})	×
35	df_temp = pd.concat(	×
36	[df_temp, missing_part], ignore_index=False, sort=False
37	) # pad the sample's length to 48 if it doesn't have enough time steps
38	df_temp = df_temp.set_index("Time").sort_index().reset_index()	×
39	df_temp = df_temp.iloc[:48] # truncate	×
40	return df_temp	×
41
42	X = X.groupby("RecordID").apply(apply_func)	×
43	X = X.drop("RecordID", axis=1)	×
44	X = X.reset_index()	×
45	ICUType = X[["RecordID", "ICUType"]].set_index("RecordID").dropna()	×
46	X = X.drop(["level_1", "ICUType"], axis=1)	×
47
48	dataset = {	×
49	"X": X,
50	"y": data["y"],
51	"ICUType": ICUType,
52	}
53
54	return dataset	×

WenjieDu / PyPOTS / 8614163418

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous