• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SPF-OST / pytrnsys_process / 12050814130

27 Nov 2024 12:44PM UTC coverage: 90.52% (-3.5%) from 94.037%
12050814130

push

github

web-flow
Merge pull request #18 from SPF-OST/15-create-example-script-for-per-sim-interaction

15 create example script for per sim interaction

119 of 124 new or added lines in 5 files covered. (95.97%)

20 existing lines in 3 files now uncovered.

487 of 538 relevant lines covered (90.52%)

1.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.36
/pytrnsys_process/process_sim/process_sim.py
1
import pathlib as _pl
2✔
2
from dataclasses import dataclass
2✔
3

4
import pandas as _pd
2✔
5

6
from pytrnsys_process import file_matcher as fm
2✔
7
from pytrnsys_process import readers, utils
2✔
8
from pytrnsys_process.logger import logger
2✔
9

10

11
# TODO test if overlapping colums are allowed if the value are the same # pylint: disable=fixme
12

13

14
@dataclass
2✔
15
class Simulation:
2✔
16
    path: _pl.Path
2✔
17
    monthly: _pd.DataFrame
2✔
18
    hourly: _pd.DataFrame
2✔
19
    timestep: _pd.DataFrame
2✔
20
    # TODO: Add results data here. Not sure yet, what this will look like # pylint: disable=fixme
21

22

23
def handle_duplicate_columns(df: _pd.DataFrame) -> _pd.DataFrame:
2✔
24
    """
25
    Process duplicate columns in a DataFrame, ensuring they contain consistent data.
26

27
    This function checks for duplicate column names and verifies that:
28
    1. If one duplicate column has NaN values, the other(s) must also have NaN at the same indices
29
    2. All non-NaN values must be identical across duplicate columns
30

31
    Parameters
32
    ----------
33
    df : pandas.DataFrame
34
        Input DataFrame to process
35

36
    Returns
37
    -------
38
    pandas.DataFrame
39
        DataFrame with duplicate columns removed, keeping only the first occurrence
40

41
    Raises
42
    ------
43
    ValueError
44
        If duplicate columns have:
45
        - NaN values in one column while having actual values in another at the same index
46
        - Different non-NaN values at the same index
47

48
    https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns
49
    """
50
    for col in df.columns[df.columns.duplicated(keep=False)]:
2✔
51
        duplicate_cols = df.iloc[:, df.columns == col]
2✔
52

53
        nan_mask = duplicate_cols.isna()
2✔
54
        value_mask = ~nan_mask
2✔
55
        if ((nan_mask.sum(axis=1) > 0) & (value_mask.sum(axis=1) > 0)).any():
2✔
56
            raise ValueError(
2✔
57
                f"Column '{col}' has NaN values in one column while having actual values in another"
58
            )
59

60
        if not duplicate_cols.apply(lambda x: x.nunique() <= 1, axis=1).all():
2✔
61
            raise ValueError(
2✔
62
                f"Column '{col}' has conflicting values at same indices"
63
            )
64

65
    df = df.iloc[:, ~df.columns.duplicated()].copy()
2✔
66
    return df
2✔
67

68

69
def process_sim_prt(
2✔
70
    sim_folder: _pl.Path,
71
) -> Simulation:
72
    sim_files = utils.get_files([sim_folder])
2✔
73
    prt_reader = readers.PrtReader()
2✔
74
    hourly = []
2✔
75
    monthly = []
2✔
76
    timestep = []
2✔
77

78
    for sim_file in sim_files:
2✔
79
        if fm.has_pattern(sim_file.name, fm.FileType.MONTHLY):
2✔
80
            monthly.append(prt_reader.read_monthly(sim_file))
2✔
81
        elif fm.has_pattern(sim_file.name, fm.FileType.HOURLY):
2✔
82
            hourly.append(prt_reader.read_hourly(sim_file))
2✔
83
        elif fm.has_pattern(sim_file.name, fm.FileType.TIMESTEP):
2✔
84
            timestep.append(prt_reader.read_step(sim_file))
2✔
85
        else:
86
            logger.warning("Unknown file type: %s", sim_file.name)
×
87

88
    monthly_df = (
2✔
89
        handle_duplicate_columns(_pd.concat(monthly, axis=1))
90
        if monthly
91
        else _pd.DataFrame()
92
    )
93
    hourly_df = (
2✔
94
        handle_duplicate_columns(_pd.concat(hourly, axis=1))
95
        if hourly
96
        else _pd.DataFrame()
97
    )
98
    timestep_df = (
2✔
99
        handle_duplicate_columns(_pd.concat(timestep, axis=1))
100
        if timestep
101
        else _pd.DataFrame()
102
    )
103
    return Simulation(sim_folder, monthly_df, hourly_df, timestep_df)
2✔
104

105

106
def process_sim_using_file_content_prt(
2✔
107
    sim_folder: _pl.Path,
108
) -> Simulation:
UNCOV
109
    sim_files = utils.get_files([sim_folder])
×
UNCOV
110
    prt_reader = readers.PrtReader()
×
UNCOV
111
    hourly = []
×
UNCOV
112
    monthly = []
×
UNCOV
113
    step = []
×
114

UNCOV
115
    for sim_file in sim_files:
×
UNCOV
116
        file_type = fm.get_file_type_using_file_content(sim_file)
×
UNCOV
117
        if file_type == fm.FileType.MONTHLY:
×
UNCOV
118
            monthly.append(prt_reader.read_monthly(sim_file))
×
UNCOV
119
        elif file_type == fm.FileType.HOURLY:
×
UNCOV
120
            hourly.append(prt_reader.read_hourly(sim_file))
×
UNCOV
121
        elif file_type == fm.FileType.TIMESTEP:
×
UNCOV
122
            step.append(prt_reader.read_step(sim_file))
×
123
        else:
124
            logger.warning("Unknown file type: %s", sim_file.name)
×
125

UNCOV
126
    monthly_df = handle_duplicate_columns(_pd.concat(monthly, axis=1))
×
UNCOV
127
    hourly_df = handle_duplicate_columns(_pd.concat(hourly, axis=1))
×
UNCOV
128
    timestep_df = handle_duplicate_columns(_pd.concat(step, axis=1))
×
NEW
129
    return Simulation(sim_folder, monthly_df, hourly_df, timestep_df)
×
130

131

132
def process_sim_csv(
2✔
133
    sim_folder: _pl.Path,
134
) -> Simulation:
135
    sim_files = utils.get_files([sim_folder], results_folder_name="converted")
2✔
136
    csv_reader = readers.CsvReader()
2✔
137
    hourly = []
2✔
138
    monthly = []
2✔
139
    timestep = []
2✔
140

141
    for sim_file in sim_files:
2✔
142
        if fm.has_pattern(sim_file.name, fm.FileType.MONTHLY):
2✔
143
            monthly.append(csv_reader.read_csv(sim_file))
2✔
144
        elif fm.has_pattern(sim_file.name, fm.FileType.HOURLY):
2✔
145
            hourly.append(csv_reader.read_csv(sim_file))
2✔
146
        elif fm.has_pattern(sim_file.name, fm.FileType.TIMESTEP):
2✔
147
            timestep.append(csv_reader.read_csv(sim_file))
2✔
148
        else:
149
            logger.warning("Unknown file type: %s", sim_file.name)
×
150

151
    monthly_df = handle_duplicate_columns(_pd.concat(monthly, axis=1))
2✔
152
    hourly_df = handle_duplicate_columns(_pd.concat(hourly, axis=1))
2✔
153
    timestep_df = handle_duplicate_columns(_pd.concat(timestep, axis=1))
2✔
154

155
    return Simulation(sim_folder, monthly_df, hourly_df, timestep_df)
2✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc