• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

WenjieDu / PyPOTS / 3911954423

pending completion
3911954423

push

github

Wenjie Du
fix: add the dependencies of PyPOTS into the doc building requirement file;

2110 of 2800 relevant lines covered (75.36%)

0.76 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

28.57
/pypots/data/load_specific_datasets.py
1
"""
1✔
2
Functions to load supported open-source time-series datasets.
3
"""
4

5
# Created by Wenjie Du <wenjay.du@gmail.com>
6
# License: GLP-v3
7

8
import pandas as pd
1✔
9
import tsdb
1✔
10

11
SUPPORTED_DATASETS = [
1✔
12
    "physionet_2012",
13
]
14

15

16
def list_supported_datasets():
1✔
17
    """
18

19
    Returns
20
    -------
21
    SUPPORTED_DATASETS : list
22
        A list including all supported datasets.
23

24
    """
25
    return SUPPORTED_DATASETS
×
26

27

28
def preprocess_physionet2012(data):
1✔
29
    """
30
    Parameters
31
    ----------
32
    data : dict,
33
        A data dict from tsdb.load_dataset().
34

35
    Returns
36
    -------
37
    dict :
38
        A dict containing processed data.
39

40
    """
41
    X = data["X"].drop(data["static_features"], axis=1)
×
42

43
    def apply_func(df_temp):  # pad and truncate to set the max length of samples as 48
×
44
        missing = list(set(range(0, 48)).difference(set(df_temp["Time"])))
×
45
        missing_part = pd.DataFrame({"Time": missing})
×
46
        df_temp = pd.concat([df_temp, missing_part], ignore_index=False, sort=False)  # pad
×
47
        df_temp = df_temp.set_index("Time").sort_index().reset_index()
×
48
        df_temp = df_temp.iloc[:48]  # truncate
×
49
        return df_temp
×
50

51
    X = X.groupby("RecordID").apply(apply_func)
×
52
    X = X.drop("RecordID", axis=1)  #
×
53
    X = X.reset_index()
×
54
    X = X.drop(["level_1", "Time"], axis=1)
×
55
    return {"X": X, "y": data["y"]}
×
56

57

58
PREPROCESSING = {"physionet_2012": preprocess_physionet2012}
1✔
59

60

61
def load_specific_dataset(dataset_name, use_cache=True):
1✔
62
    """Load specific datasets supported by PyPOTS.
63
    Different from tsdb.load_dataset(), which only produces merely raw data,
64
    load_specific_dataset here does some preprocessing operations,
65
    like truncating time series to generate samples with the same length.
66

67
    Parameters
68
    ----------
69
    dataset_name : str,
70
        The name of the dataset to be loaded, which should be supported, i.e. in SUPPORTED_DATASETS.
71

72
    use_cache :
73
        Whether to use cache. This is an argument of tsdb.load_dataset().
74

75
    Returns
76
    -------
77
    data : dict,
78
        A dict contains the preprocessed dataset.
79
        Users only need to continue the preprocessing steps to generate the data they want,
80
        e.g. standardizing and splitting.
81

82
    """
83
    print(
×
84
        f"Loading the dataset {dataset_name} with TSDB (https://github.com/WenjieDu/Time_Series_Database)..."
85
    )
86
    assert dataset_name in SUPPORTED_DATASETS, (
×
87
        f"Dataset {dataset_name} is not supported. "
88
        f"If you believe this dataset is valuable to be supported by PyPOTS,"
89
        f"please create an issue on GitHub "
90
        f"https://github.com/WenjieDu/PyPOTS/issues"
91
    )
92
    print(f"Starting preprocessing {dataset_name}...")
×
93
    data = tsdb.load_dataset(dataset_name, use_cache)
×
94
    data = PREPROCESSING[dataset_name](data)
×
95
    return data
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc