• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SPF-OST / pytrnsys_process / 13730861558

07 Mar 2025 11:02PM UTC coverage: 94.613% (-2.9%) from 97.522%
13730861558

push

github

ahobeost
Final cleanup, invalid data is no longer invalid, tests commented out for now.

1 of 2 new or added lines in 1 file covered. (50.0%)

32 existing lines in 5 files now uncovered.

1089 of 1151 relevant lines covered (94.61%)

0.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.55
/pytrnsys_process/process/process_sim.py
1
import logging as _logging
1✔
2
import pathlib as _pl
1✔
3
from collections import abc as _abc
1✔
4
from dataclasses import dataclass, field
1✔
5

6
import pandas as _pd
1✔
7

8
from pytrnsys_process import config as conf
1✔
9
from pytrnsys_process import deck, log, read, util
1✔
10
from pytrnsys_process.process import data_structures as ds
1✔
11
from pytrnsys_process.process import file_type_detector as ftd
1✔
12

13

14
def process_sim(
1✔
15
    sim_files: _abc.Sequence[_pl.Path], sim_folder: _pl.Path
16
) -> ds.Simulation:
17
    # Used to store the array of dataframes for each file type.
18
    # Later used to concatenate all into one dataframe and saving as Sim object
19
    simulation_data_collector = _SimulationDataCollector()
1✔
20

21
    sim_logger = log.get_simulation_logger(sim_folder)
1✔
22
    for sim_file in sim_files:
1✔
23
        try:
1✔
24
            _process_file(
1✔
25
                simulation_data_collector,
26
                sim_file,
27
                _determine_file_type(sim_file, sim_logger),
28
            )
29
        except ValueError as e:
1✔
30
            sim_logger.error(
1✔
31
                "Error reading file %s it will not be available for processing: %s",
32
                sim_file,
33
                str(e),
34
                exc_info=True,
35
            )
UNCOV
36
        except KeyError as e:
×
UNCOV
37
            sim_logger.error(
×
38
                "Error reading file %s it will not be available for processing: %s",
39
                sim_file,
40
                str(e),
41
                exc_info=True,
42
            )
43

44

45
    return _merge_dataframes_into_simulation(
1✔
46
        simulation_data_collector, sim_folder
47
    )
48

49

50
def handle_duplicate_columns(df: _pd.DataFrame) -> _pd.DataFrame:
1✔
51
    """
52
    Process duplicate columns in a DataFrame, ensuring they contain consistent data.
53

54
    This function checks for duplicate column names and verifies that:
55
    1. If one duplicate column has NaN values, the other(s) must also have NaN at the same indices
56
    2. All non-NaN values must be identical across duplicate columns
57

58
    Parameters
59
    __________
60
    df: pandas.DataFrame
61
        Input DataFrame to process
62

63
    Returns
64
    _______
65
    df: pandas.DataFrame
66
        DataFrame with duplicate columns removed, keeping only the first occurrence
67

68
    Raises
69
    ______
70
    ValueError
71
        If duplicate columns have:
72
        1. NaN values in one column while having actual values in another at the same index, or
73
        2. Different non-NaN values at the same index
74

75
    Note
76
    ____
77
    https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns
78
    """
79
    for col in df.columns[df.columns.duplicated(keep=False)]:
1✔
80
        duplicate_cols = df.iloc[:, df.columns == col]
1✔
81

82
        nan_mask = duplicate_cols.isna()
1✔
83
        value_mask = ~nan_mask
1✔
84
        if ((nan_mask.sum(axis=1) > 0) & (value_mask.sum(axis=1) > 0)).any():
1✔
85
            raise ValueError(
1✔
86
                f"Column '{col}' has NaN values in one column while having actual values in another"
87
            )
88

89
        if not duplicate_cols.apply(lambda x: x.nunique() <= 1, axis=1).all():
1✔
90
            raise ValueError(
1✔
91
                f"Column '{col}' has conflicting values at same indices"
92
            )
93

94
    df = df.iloc[:, ~df.columns.duplicated()].copy()
1✔
95
    return df
1✔
96

97

98
def _determine_file_type(
1✔
99
    sim_file: _pl.Path, logger: _logging.Logger
100
) -> conf.FileType:
101
    """Determine the file type using name and content."""
102
    try:
1✔
103
        return ftd.get_file_type_using_file_name(sim_file, logger)
1✔
104
    except ValueError:
1✔
105
        return ftd.get_file_type_using_file_content(sim_file, logger)
1✔
106

107

108
@dataclass
1✔
109
class _SimulationDataCollector:
1✔
110
    hourly: list[_pd.DataFrame] = field(default_factory=list)
1✔
111
    monthly: list[_pd.DataFrame] = field(default_factory=list)
1✔
112
    step: list[_pd.DataFrame] = field(default_factory=list)
1✔
113
    parsed_deck: _pd.DataFrame = field(default_factory=_pd.DataFrame)
1✔
114

115

116
def _read_file(file_path: _pl.Path, file_type: conf.FileType) -> _pd.DataFrame:
1✔
117
    """
118
    Factory method to read data from a file using the appropriate reader.
119

120
    Parameters
121
    __________
122
    file_path: pathlib.Path
123
        Path to the file to be read
124

125
    file_type: conf.FileType
126
        Type of data in the file (MONTHLY, HOURLY, or TIMESTEP)
127

128
    Returns
129
    _______
130
    pandas.DataFrame
131
        Data read from the file
132

133
    Raises
134
    ______
135
    ValueError
136
        If file extension is not supported
137
    """
138
    starting_year = conf.global_settings.reader.starting_year
1✔
139
    extension = file_path.suffix.lower()
1✔
140
    logger = log.get_simulation_logger(file_path.parents[1])
1✔
141
    if extension in [".prt", ".hr"]:
1✔
142
        reader = read.PrtReader()
1✔
143
        if file_type == conf.FileType.MONTHLY:
1✔
144
            return reader.read_monthly(
1✔
145
                file_path, logger=logger, starting_year=starting_year
146
            )
147
        if file_type == conf.FileType.HOURLY:
1✔
148
            return reader.read_hourly(
1✔
149
                file_path, logger=logger, starting_year=starting_year
150
            )
151
        if file_type == conf.FileType.TIMESTEP:
1✔
152
            return reader.read_step(file_path, starting_year=starting_year, skipfooter=23, header=1)
1✔
153
        if file_type == conf.FileType.HYDRAULIC:
1✔
154
            return reader.read_step(file_path, starting_year=starting_year)
1✔
155
    elif extension == ".csv":
1✔
156
        return read.CsvReader().read_csv(file_path)
1✔
157

UNCOV
158
    raise ValueError(f"Unsupported file extension: {extension}")
×
159

160

161
def _process_file(
1✔
162
    simulation_data_collector: _SimulationDataCollector,
163
    file_path: _pl.Path,
164
    file_type: conf.FileType,
165
) -> bool:
166
    if file_type == conf.FileType.MONTHLY:
1✔
167
        simulation_data_collector.monthly.append(
1✔
168
            _read_file(file_path, conf.FileType.MONTHLY)
169
        )
170
    elif file_type == conf.FileType.HOURLY:
1✔
171
        simulation_data_collector.hourly.append(
1✔
172
            _read_file(file_path, conf.FileType.HOURLY)
173
        )
174
    elif (
1✔
175
        file_type == conf.FileType.TIMESTEP
176
        and conf.global_settings.reader.read_step_files
177
    ):
178
        simulation_data_collector.step.append(
1✔
179
            _read_file(file_path, conf.FileType.TIMESTEP)
180
        )
181
    elif (
1✔
182
        file_type == conf.FileType.HYDRAULIC
183
        and conf.global_settings.reader.read_step_files
184
    ):
185
        simulation_data_collector.step.append(
1✔
186
            _read_file(file_path, conf.FileType.HYDRAULIC)
187
        )
188
    elif (
1✔
189
        file_type == conf.FileType.DECK
190
        and conf.global_settings.reader.read_deck_files
191
    ):
192
        simulation_data_collector.parsed_deck = _get_deck_as_df(file_path)
1✔
193
    else:
194
        return False
1✔
195
    return True
1✔
196

197

198
def _get_deck_as_df(
1✔
199
    file_path: _pl.Path,
200
) -> _pd.DataFrame:
201
    deck_file_as_string = util.get_file_content_as_string(file_path)
1✔
202
    parsed_deck: dict[str, float] = deck.parse_deck_for_constant_expressions(
1✔
203
        deck_file_as_string, log.get_simulation_logger(file_path.parent)
204
    )
205
    deck_as_df = _pd.DataFrame([parsed_deck])
1✔
206
    return deck_as_df
1✔
207

208

209
def _merge_dataframes_into_simulation(
1✔
210
    simulation_data_collector: _SimulationDataCollector, sim_folder: _pl.Path
211
) -> ds.Simulation:
212
    monthly_df = _get_df_without_duplicates(simulation_data_collector.monthly)
1✔
213
    hourly_df = _get_df_without_duplicates(simulation_data_collector.hourly)
1✔
214
    timestep_df = _get_df_without_duplicates(simulation_data_collector.step)
1✔
215
    parsed_deck = simulation_data_collector.parsed_deck
1✔
216

217
    return ds.Simulation(
1✔
218
        sim_folder.as_posix(), monthly_df, hourly_df, timestep_df, parsed_deck
219
    )
220

221

222
def _get_df_without_duplicates(dfs: _abc.Sequence[_pd.DataFrame]):
1✔
223
    if len(dfs) > 0:
1✔
224
        return handle_duplicate_columns(_pd.concat(dfs, axis=1))
1✔
225

226
    return _pd.DataFrame()
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc