• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SPF-OST / pytrnsys_process / 16748222285

05 Aug 2025 11:03AM UTC coverage: 49.518% (-46.5%) from 95.968%
16748222285

Pull #126

github

ahobeost
Reduce linux job to just test.
Pull Request #126: 125 bug step file not read when step used with type 25

5 of 6 new or added lines in 2 files covered. (83.33%)

578 existing lines in 11 files now uncovered.

616 of 1244 relevant lines covered (49.52%)

0.99 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.82
/pytrnsys_process/process/process_sim.py
1
import logging as _logging
2✔
2
import pathlib as _pl
2✔
3
from collections import abc as _abc
2✔
4
from dataclasses import dataclass, field
2✔
5

6
import pandas as _pd
2✔
7

8
from pytrnsys_process import config as conf
2✔
9
from pytrnsys_process import deck, log, read, util
2✔
10
from pytrnsys_process.process import data_structures as ds
2✔
11
from pytrnsys_process.process import file_type_detector as ftd
2✔
12

13

14
def process_sim(
2✔
15
    sim_files: _abc.Sequence[_pl.Path], sim_folder: _pl.Path
16
) -> ds.Simulation:
17
    # Used to store the array of dataframes for each file type.
18
    # Later used to concatenate all into one dataframe and saving as Sim object
19
    simulation_data_collector = _SimulationDataCollector()
2✔
20

21
    sim_logger = log.get_simulation_logger(sim_folder)
2✔
22
    for sim_file in sim_files:
2✔
23
        try:
2✔
24
            _process_file(
2✔
25
                simulation_data_collector,
26
                sim_file,
27
                _determine_file_type(sim_file, sim_logger),
28
            )
29
        except ValueError as e:
2✔
30
            sim_logger.error(
2✔
31
                "Error reading file %s it will not be available for processing: %s",
32
                sim_file,
33
                str(e),
34
                exc_info=True,
35
            )
36
        except KeyError as e:
×
37
            sim_logger.error(
×
38
                "Error reading file %s it will not be available for processing: %s",
39
                sim_file,
40
                str(e),
41
                exc_info=True,
42
            )
43

44
    return _merge_dataframes_into_simulation(
2✔
45
        simulation_data_collector, sim_folder
46
    )
47

48

49
def handle_duplicate_columns(df: _pd.DataFrame) -> _pd.DataFrame:
2✔
50
    """
51
    Process duplicate columns in a DataFrame, ensuring they contain consistent data.
52

53
    This function checks for duplicate column names and verifies that:
54
    1. If one duplicate column has NaN values, the other(s) must also have NaN at the same indices
55
    2. All non-NaN values must be identical across duplicate columns
56

57
    Parameters
58
    __________
59
    df: pandas.DataFrame
60
        Input DataFrame to process
61

62
    Returns
63
    _______
64
    df: pandas.DataFrame
65
        DataFrame with duplicate columns removed, keeping only the first occurrence
66

67
    Raises
68
    ______
69
    ValueError
70
        If duplicate columns have:
71
        1. NaN values in one column while having actual values in another at the same index, or
72
        2. Different non-NaN values at the same index
73

74
    Note
75
    ____
76
    https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns
77
    """
78
    remove_time_as_well = False
2✔
79
    for col in df.columns[df.columns.duplicated(keep=False)]:
2✔
80
        duplicate_cols = df.iloc[:, df.columns == col]
2✔
81

82
        nan_mask = duplicate_cols.isna()
2✔
83
        value_mask = ~nan_mask
2✔
84
        if ((nan_mask.sum(axis=1) > 0) & (value_mask.sum(axis=1) > 0)).any():
2✔
UNCOV
85
            raise ValueError(
×
86
                f"Column '{col}' has NaN values in one column while having actual values in another"
87
            )
88

89
        if not duplicate_cols.apply(lambda x: x.nunique() <= 1, axis=1).all():
2✔
UNCOV
90
            if col == "Time":
×
UNCOV
91
                remove_time_as_well = True
×
UNCOV
92
                continue
×
93

UNCOV
94
            raise ValueError(
×
95
                f"Column '{col}' has conflicting values at same indices"
96
            )
97

98
    columns_to_be_removed = df.columns.duplicated()
2✔
99
    if remove_time_as_well:
2✔
UNCOV
100
        columns_to_be_removed += df.columns.get_loc("Time")  # type: ignore[arg-type]
×
101

102
    df = df.iloc[:, ~columns_to_be_removed].copy()
2✔
103

104
    return df
2✔
105

106

107
def _determine_file_type(
2✔
108
    sim_file: _pl.Path, logger: _logging.Logger
109
) -> conf.FileType:
110
    """Determine the file type using name and content."""
111
    try:
2✔
112
        return ftd.get_file_type_using_file_name(sim_file, logger)
2✔
113
    except ValueError:
2✔
114
        return ftd.get_file_type_using_file_content(sim_file, logger)
2✔
115

116

117
@dataclass
2✔
118
class _SimulationDataCollector:
2✔
119
    hourly: list[_pd.DataFrame] = field(default_factory=list)
2✔
120
    monthly: list[_pd.DataFrame] = field(default_factory=list)
2✔
121
    step: list[_pd.DataFrame] = field(default_factory=list)
2✔
122
    parsed_deck: _pd.DataFrame = field(default_factory=_pd.DataFrame)
2✔
123

124

125
def _read_file(file_path: _pl.Path, file_type: conf.FileType) -> _pd.DataFrame:
2✔
126
    """
127
    Factory method to read data from a file using the appropriate reader.
128

129
    Parameters
130
    __________
131
    file_path: pathlib.Path
132
        Path to the file to be read
133

134
    file_type: conf.FileType
135
        Type of data in the file (MONTHLY, HOURLY, or TIMESTEP)
136

137
    Returns
138
    _______
139
    pandas.DataFrame
140
        Data read from the file
141

142
    Raises
143
    ______
144
    ValueError
145
        If file extension is not supported
146
    """
147
    starting_year = conf.global_settings.reader.starting_year
2✔
148
    extension = file_path.suffix.lower()
2✔
149
    logger = log.get_simulation_logger(file_path.parents[1])
2✔
150
    if extension in [".prt", ".hr"]:
2✔
151
        reader = read.PrtReader()
2✔
152
        if file_type == conf.FileType.MONTHLY:
2✔
153
            return reader.read_monthly(
2✔
154
                file_path, logger=logger, starting_year=starting_year
155
            )
156
        if file_type == conf.FileType.HOURLY:
2✔
157
            return reader.read_hourly(
2✔
158
                file_path, logger=logger, starting_year=starting_year
159
            )
160
        if file_type == conf.FileType.TIMESTEP:
2✔
161
            return reader.read_step(
2✔
162
                file_path, starting_year=starting_year, skipfooter=23, header=1
163
            )
164
        if file_type == conf.FileType.HYDRAULIC:
2✔
165
            return reader.read_step(file_path, starting_year=starting_year)
2✔
166
    elif extension == ".csv":
2✔
167
        return read.CsvReader().read_csv(file_path)
2✔
168

169
    raise ValueError(f"Unsupported file extension: {extension}")
×
170

171

172
def _process_file(
2✔
173
    simulation_data_collector: _SimulationDataCollector,
174
    file_path: _pl.Path,
175
    file_type: conf.FileType,
176
) -> bool:
177
    if file_type == conf.FileType.MONTHLY:
2✔
178
        simulation_data_collector.monthly.append(
2✔
179
            _read_file(file_path, conf.FileType.MONTHLY)
180
        )
181
    elif file_type == conf.FileType.HOURLY:
2✔
182
        simulation_data_collector.hourly.append(
2✔
183
            _read_file(file_path, conf.FileType.HOURLY)
184
        )
185
    elif (
2✔
186
        file_type == conf.FileType.TIMESTEP
187
        and conf.global_settings.reader.read_step_files
188
    ):
189
        # There are two ways to have a step file:
190
        # - using type 25
191
        # - using type 46
192
        # The user can copy and paste both, and they would like to use '_step.prt'.
193
        # Here we try both, as a temporary solution, till the file reading is fully refactored.
194
        try:
2✔
195
            step_df = _read_file(file_path, conf.FileType.TIMESTEP)
2✔
196
        except KeyError:
2✔
197
            step_df = _read_file(file_path, conf.FileType.HYDRAULIC)
2✔
198
        simulation_data_collector.step.append(step_df)
2✔
199
    elif (
2✔
200
        file_type == conf.FileType.HYDRAULIC
201
        and conf.global_settings.reader.read_step_files
202
    ):
203
        simulation_data_collector.step.append(
2✔
204
            _read_file(file_path, conf.FileType.HYDRAULIC)
205
        )
206
    elif (
2✔
207
        file_type == conf.FileType.DECK
208
        and conf.global_settings.reader.read_deck_files
209
    ):
210
        simulation_data_collector.parsed_deck = _get_deck_as_df(file_path)
2✔
211
    else:
212
        return False
2✔
213
    return True
2✔
214

215

216
def _get_deck_as_df(
2✔
217
    file_path: _pl.Path,
218
) -> _pd.DataFrame:
219
    deck_file_as_string = util.get_file_content_as_string(file_path)
2✔
220
    parsed_deck: dict[str, float] = deck.parse_deck_for_constant_expressions(
2✔
221
        deck_file_as_string, log.get_simulation_logger(file_path.parent)
222
    )
223
    deck_as_df = _pd.DataFrame([parsed_deck])
2✔
224
    return deck_as_df
2✔
225

226

227
def _merge_dataframes_into_simulation(
2✔
228
    simulation_data_collector: _SimulationDataCollector, sim_folder: _pl.Path
229
) -> ds.Simulation:
230
    monthly_df = _get_df_without_duplicates(simulation_data_collector.monthly)
2✔
231
    hourly_df = _get_df_without_duplicates(simulation_data_collector.hourly)
2✔
232
    timestep_df = _get_df_without_duplicates(simulation_data_collector.step)
2✔
233
    parsed_deck = simulation_data_collector.parsed_deck
2✔
234

235
    return ds.Simulation(
2✔
236
        sim_folder.as_posix(), monthly_df, hourly_df, timestep_df, parsed_deck
237
    )
238

239

240
def _get_df_without_duplicates(dfs: _abc.Sequence[_pd.DataFrame]):
2✔
241
    if len(dfs) > 0:
2✔
242
        return handle_duplicate_columns(_pd.concat(dfs, axis=1))
2✔
243

244
    return _pd.DataFrame()
2✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc