• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SPF-OST / pytrnsys_process / 11970144474

22 Nov 2024 09:39AM UTC coverage: 94.037% (+1.1%) from 92.933%
11970144474

push

github

web-flow
Merge pull request #14 from SPF-OST/13-create-initial-pipeline-for-processing-one-simulation

added sim processing:

183 of 189 new or added lines in 8 files covered. (96.83%)

1 existing line in 1 file now uncovered.

410 of 436 relevant lines covered (94.04%)

1.87 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.1
/pytrnsys_process/process_sim/process_sim.py
1
import pathlib as _pl
2✔
2
from dataclasses import dataclass
2✔
3

4
import pandas as _pd
2✔
5

6
from pytrnsys_process import file_matcher as fm
2✔
7
from pytrnsys_process import readers, utils
2✔
8
from pytrnsys_process.logger import logger
2✔
9

10

11
# TODO test if overlapping colums are allowed if the value are the same # pylint: disable=fixme
12

13

14
@dataclass
2✔
15
class Simulation:
2✔
16
    monthly: _pd.DataFrame
2✔
17
    hourly: _pd.DataFrame
2✔
18
    timestep: _pd.DataFrame
2✔
19
    # TODO: Add results data here. Not sure yet, what this will look like # pylint: disable=fixme
20

21

22
def handle_duplicate_columns(df: _pd.DataFrame) -> _pd.DataFrame:
2✔
23
    """
24
    Process duplicate columns in a DataFrame, ensuring they contain consistent data.
25

26
    This function checks for duplicate column names and verifies that:
27
    1. If one duplicate column has NaN values, the other(s) must also have NaN at the same indices
28
    2. All non-NaN values must be identical across duplicate columns
29

30
    Parameters
31
    ----------
32
    df : pandas.DataFrame
33
        Input DataFrame to process
34

35
    Returns
36
    -------
37
    pandas.DataFrame
38
        DataFrame with duplicate columns removed, keeping only the first occurrence
39

40
    Raises
41
    ------
42
    ValueError
43
        If duplicate columns have:
44
        - NaN values in one column while having actual values in another at the same index
45
        - Different non-NaN values at the same index
46

47
    https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns
48
    """
49
    for col in df.columns[df.columns.duplicated(keep=False)]:
2✔
50
        duplicate_cols = df.iloc[:, df.columns == col]
2✔
51

52
        nan_mask = duplicate_cols.isna()
2✔
53
        value_mask = ~nan_mask
2✔
54
        if ((nan_mask.sum(axis=1) > 0) & (value_mask.sum(axis=1) > 0)).any():
2✔
55
            raise ValueError(
2✔
56
                f"Column '{col}' has NaN values in one column while having actual values in another"
57
            )
58

59
        if not duplicate_cols.apply(lambda x: x.nunique() <= 1, axis=1).all():
2✔
60
            raise ValueError(
2✔
61
                f"Column '{col}' has conflicting values at same indices"
62
            )
63

64
    df = df.iloc[:, ~df.columns.duplicated()].copy()
2✔
65
    return df
2✔
66

67

68
def process_sim_prt(
2✔
69
        sim_folder: _pl.Path,
70
) -> Simulation:
71
    sim_files = utils.get_files([sim_folder])
2✔
72
    prt_reader = readers.PrtReader()
2✔
73
    hourly = []
2✔
74
    monthly = []
2✔
75
    timestep = []
2✔
76

77
    for sim_file in sim_files:
2✔
78
        if fm.has_pattern(sim_file.name, fm.FileType.MONTHLY):
2✔
79
            monthly.append(prt_reader.read_monthly(sim_file))
2✔
80
        elif fm.has_pattern(sim_file.name, fm.FileType.HOURLY):
2✔
81
            hourly.append(prt_reader.read_hourly(sim_file))
2✔
82
        elif fm.has_pattern(sim_file.name, fm.FileType.TIMESTEP):
2✔
83
            timestep.append(prt_reader.read_step(sim_file))
2✔
84
        else:
NEW
85
            logger.warning("Unknown file type: %s", sim_file.name)
×
86

87
    monthly_df = handle_duplicate_columns(_pd.concat(monthly, axis=1))
2✔
88
    hourly_df = handle_duplicate_columns(_pd.concat(hourly, axis=1))
2✔
89
    timestep_df = handle_duplicate_columns(_pd.concat(timestep, axis=1))
2✔
90
    return Simulation(monthly_df, hourly_df, timestep_df)
2✔
91

92

93
def process_sim_using_file_content_prt(
2✔
94
        sim_folder: _pl.Path,
95
) -> Simulation:
96
    sim_files = utils.get_files([sim_folder])
2✔
97
    prt_reader = readers.PrtReader()
2✔
98
    hourly = []
2✔
99
    monthly = []
2✔
100
    step = []
2✔
101

102
    for sim_file in sim_files:
2✔
103
        file_type = fm.get_file_type_using_file_content(sim_file)
2✔
104
        if file_type == fm.FileType.MONTHLY:
2✔
105
            monthly.append(prt_reader.read_monthly(sim_file))
2✔
106
        elif file_type == fm.FileType.HOURLY:
2✔
107
            hourly.append(prt_reader.read_hourly(sim_file))
2✔
108
        elif file_type == fm.FileType.TIMESTEP:
2✔
109
            step.append(prt_reader.read_step(sim_file))
2✔
110
        else:
NEW
111
            logger.warning("Unknown file type: %s", sim_file.name)
×
112

113
    monthly_df = handle_duplicate_columns(_pd.concat(monthly, axis=1))
2✔
114
    hourly_df = handle_duplicate_columns(_pd.concat(hourly, axis=1))
2✔
115
    timestep_df = handle_duplicate_columns(_pd.concat(step, axis=1))
2✔
116
    return Simulation(monthly_df, hourly_df, timestep_df)
2✔
117

118

119
def process_sim_csv(
2✔
120
        sim_folder: _pl.Path,
121
) -> Simulation:
122
    sim_files = utils.get_files([sim_folder], results_folder_name="converted")
2✔
123
    csv_reader = readers.CsvReader()
2✔
124
    hourly = []
2✔
125
    monthly = []
2✔
126
    timestep = []
2✔
127

128
    for sim_file in sim_files:
2✔
129
        if fm.has_pattern(sim_file.name, fm.FileType.MONTHLY):
2✔
130
            monthly.append(csv_reader.read_csv(sim_file))
2✔
131
        elif fm.has_pattern(sim_file.name, fm.FileType.HOURLY):
2✔
132
            hourly.append(csv_reader.read_csv(sim_file))
2✔
133
        elif fm.has_pattern(sim_file.name, fm.FileType.TIMESTEP):
2✔
134
            timestep.append(csv_reader.read_csv(sim_file))
2✔
135
        else:
NEW
136
            logger.warning("Unknown file type: %s", sim_file.name)
×
137

138
    monthly_df = handle_duplicate_columns(_pd.concat(monthly, axis=1))
2✔
139
    hourly_df = handle_duplicate_columns(_pd.concat(hourly, axis=1))
2✔
140
    timestep_df = handle_duplicate_columns(_pd.concat(timestep, axis=1))
2✔
141

142
    return Simulation(monthly_df, hourly_df, timestep_df)
2✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc