• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SPF-OST / pytrnsys_process / 12275970811

11 Dec 2024 11:55AM UTC coverage: 95.245%. First build
12275970811

push

github

web-flow
Merge pull request #37 from SPF-OST/22-add-support-for-step-files

22 add support for step files

129 of 136 new or added lines in 9 files covered. (94.85%)

641 of 673 relevant lines covered (95.25%)

1.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.18
/pytrnsys_process/process_sim/process_sim.py
1
import pathlib as _pl
2✔
2
from collections import abc as _abc
2✔
3
from dataclasses import dataclass, field
2✔
4

5
import pandas as _pd
2✔
6

7
from pytrnsys_process import constants as const
2✔
8
from pytrnsys_process import file_type_detector as ftd
2✔
9
from pytrnsys_process import readers
2✔
10
from pytrnsys_process import settings as sett
2✔
11
from pytrnsys_process.logger import logger
2✔
12

13

14
@dataclass
2✔
15
class Simulation:
2✔
16
    """Class representing a TRNSYS simulation with its associated data.
17

18
    This class holds the simulation data organized in different time resolutions (monthly, hourly, timestep)
19
    along with the path to the simulation files.
20

21
    Attributes
22
    ----------
23
    path : pathlib.Path
24
        Path to the simulation folder containing the input files
25
    monthly : pandas.DataFrame
26
        Monthly aggregated simulation data. Each column represents a different variable
27
        and each row represents a month.
28
    hourly : pandas.DataFrame
29
        Hourly simulation data. Each column represents a different variable
30
        and each row represents an hour.
31
    step : pandas.DataFrame
32
        Simulation data at the smallest timestep resolution. Each column represents
33
        a different variable and each row represents a timestep.
34
    """
35

36
    path: _pl.Path
2✔
37
    monthly: _pd.DataFrame
2✔
38
    hourly: _pd.DataFrame
2✔
39
    step: _pd.DataFrame
2✔
40
    # TODO: Add results data here. Not sure yet, what this will look like # pylint: disable=fixme
41

42

43
def process_sim(
2✔
44
        sim_files: _abc.Sequence[_pl.Path], sim_folder: _pl.Path
45
) -> Simulation:
46
    simulation_data_collector = _SimulationDataCollector()
2✔
47
    for sim_file in sim_files:
2✔
48
        try:
2✔
49
            _process_file(
2✔
50
                simulation_data_collector,
51
                sim_file,
52
                _determine_file_type(sim_file),
53
            )
54
        except ValueError as e:
2✔
NEW
55
            logger.error(
×
56
                "Error reading file %s it will not be available for processing: %s",
57
                sim_file,
58
                str(e),
59
                exc_info=True,
60
            )
61

62
    return _merge_dataframes_into_simulation(
2✔
63
        simulation_data_collector, sim_folder
64
    )
65

66

67
def handle_duplicate_columns(df: _pd.DataFrame) -> _pd.DataFrame:
2✔
68
    """
69
    Process duplicate columns in a DataFrame, ensuring they contain consistent data.
70

71
    This function checks for duplicate column names and verifies that:
72
    1. If one duplicate column has NaN values, the other(s) must also have NaN at the same indices
73
    2. All non-NaN values must be identical across duplicate columns
74

75
    Parameters
76
    ----------
77
    df : pandas.DataFrame
78
        Input DataFrame to process
79

80
    Returns
81
    -------
82
    pandas.DataFrame
83
        DataFrame with duplicate columns removed, keeping only the first occurrence
84

85
    Raises
86
    ------
87
    ValueError
88
        If duplicate columns have:
89
        - NaN values in one column while having actual values in another at the same index
90
        - Different non-NaN values at the same index
91

92
    https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns
93
    """
94
    for col in df.columns[df.columns.duplicated(keep=False)]:
2✔
95
        duplicate_cols = df.iloc[:, df.columns == col]
2✔
96

97
        nan_mask = duplicate_cols.isna()
2✔
98
        value_mask = ~nan_mask
2✔
99
        if ((nan_mask.sum(axis=1) > 0) & (value_mask.sum(axis=1) > 0)).any():
2✔
100
            raise ValueError(
2✔
101
                f"Column '{col}' has NaN values in one column while having actual values in another"
102
            )
103

104
        if not duplicate_cols.apply(lambda x: x.nunique() <= 1, axis=1).all():
2✔
105
            raise ValueError(
2✔
106
                f"Column '{col}' has conflicting values at same indices"
107
            )
108

109
    df = df.iloc[:, ~df.columns.duplicated()].copy()
2✔
110
    return df
2✔
111

112

113
def _determine_file_type(sim_file: _pl.Path) -> const.FileType:
2✔
114
    """Determine the file type using name and content."""
115
    try:
2✔
116
        return ftd.get_file_type_using_file_name(sim_file)
2✔
117
    except ValueError:
2✔
118
        return ftd.get_file_type_using_file_content(sim_file)
2✔
119

120

121
@dataclass
2✔
122
class _SimulationDataCollector:
2✔
123
    hourly: list[_pd.DataFrame] = field(default_factory=list)
2✔
124
    monthly: list[_pd.DataFrame] = field(default_factory=list)
2✔
125
    step: list[_pd.DataFrame] = field(default_factory=list)
2✔
126

127

128
def _read_file(
2✔
129
        file_path: _pl.Path, file_type: const.FileType
130
) -> _pd.DataFrame:
131
    """
132
    Factory method to read data from a file using the appropriate reader.
133

134
    Parameters
135
    ----------
136
    file_path : pathlib.Path
137
        Path to the file to be read
138
    file_type : const.FileType
139
        Type of data in the file (MONTHLY, HOURLY, or TIMESTEP)
140

141
    Returns
142
    -------
143
    pandas.DataFrame
144
        Data read from the file
145

146
    Raises
147
    ------
148
    ValueError
149
        If file extension is not supported
150
    """
151
    extension = file_path.suffix.lower()
2✔
152
    if extension == ".prt":
2✔
153
        reader = readers.PrtReader()
2✔
154
        if file_type == const.FileType.MONTHLY:
2✔
155
            return reader.read_monthly(file_path)
2✔
156
        if file_type == const.FileType.HOURLY:
2✔
157
            return reader.read_hourly(file_path)
2✔
158
        if file_type == const.FileType.TIMESTEP:
2✔
159
            return reader.read_step(file_path)
2✔
160
    elif extension == ".csv":
2✔
161
        return readers.CsvReader().read_csv(file_path)
2✔
162

NEW
163
    raise ValueError(f"Unsupported file extension: {extension}")
×
164

165

166
def _process_file(
2✔
167
        simulation_data_collector: _SimulationDataCollector,
168
        file_path: _pl.Path,
169
        file_type: const.FileType,
170
) -> bool:
171
    if file_type == const.FileType.MONTHLY:
2✔
172
        simulation_data_collector.monthly.append(
2✔
173
            _read_file(file_path, const.FileType.MONTHLY)
174
        )
175
    elif file_type == const.FileType.HOURLY:
2✔
176
        simulation_data_collector.hourly.append(
2✔
177
            _read_file(file_path, const.FileType.HOURLY)
178
        )
179
    elif (
2✔
180
            file_type == const.FileType.TIMESTEP
181
            and sett.settings.reader.read_step_files
182
    ):
183
        simulation_data_collector.step.append(
2✔
184
            _read_file(file_path, const.FileType.TIMESTEP)
185
        )
186
    else:
187
        return False
2✔
188

189
    return True
2✔
190

191

192
def _merge_dataframes_into_simulation(
2✔
193
        simulation_data_collector: _SimulationDataCollector, sim_folder: _pl.Path
194
) -> Simulation:
195
    monthly_df = (
2✔
196
        handle_duplicate_columns(
197
            _pd.concat(simulation_data_collector.monthly, axis=1)
198
        )
199
        if simulation_data_collector.monthly
200
        else _pd.DataFrame()
201
    )
202
    hourly_df = (
2✔
203
        handle_duplicate_columns(
204
            _pd.concat(simulation_data_collector.hourly, axis=1)
205
        )
206
        if simulation_data_collector.hourly
207
        else _pd.DataFrame()
208
    )
209
    timestep_df = (
2✔
210
        handle_duplicate_columns(
211
            _pd.concat(simulation_data_collector.step, axis=1)
212
        )
213
        if simulation_data_collector.step
214
        else _pd.DataFrame()
215
    )
216
    return Simulation(sim_folder, monthly_df, hourly_df, timestep_df)
2✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc