• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SPF-OST / pytrnsys_process / 13394719474

18 Feb 2025 03:55PM UTC coverage: 96.919% (-1.0%) from 97.93%
13394719474

push

github

sebastian-swob
increased positional arguments to 7

1164 of 1201 relevant lines covered (96.92%)

1.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.8
/pytrnsys_process/process_sim/process_sim.py
1
import logging as _logging
2✔
2
import pathlib as _pl
2✔
3
from collections import abc as _abc
2✔
4
from dataclasses import dataclass, field
2✔
5

6
import pandas as _pd
2✔
7

8
from pytrnsys_process import constants as const
2✔
9
from pytrnsys_process import data_structures as ds
2✔
10
from pytrnsys_process import file_type_detector as ftd
2✔
11
from pytrnsys_process import logger as log
2✔
12
from pytrnsys_process import readers
2✔
13
from pytrnsys_process import settings as sett
2✔
14
from pytrnsys_process import utils
2✔
15
from pytrnsys_process.deck import extractor
2✔
16

17

18
def process_sim(
2✔
19
        sim_files: _abc.Sequence[_pl.Path], sim_folder: _pl.Path
20
) -> ds.Simulation:
21
    # Used to store the array of dataframes for each file type.
22
    # Later used to concatenate all into one dataframe and saving as Sim object
23
    simulation_data_collector = _SimulationDataCollector()
2✔
24

25
    sim_logger = log.get_simulation_logger(sim_folder)
2✔
26
    for sim_file in sim_files:
2✔
27
        try:
2✔
28
            _process_file(
2✔
29
                simulation_data_collector,
30
                sim_file,
31
                _determine_file_type(sim_file, sim_logger),
32
            )
33
        except ValueError as e:
2✔
34
            sim_logger.error(
2✔
35
                "Error reading file %s it will not be available for processing: %s",
36
                sim_file,
37
                str(e),
38
                exc_info=True,
39
            )
40

41
    return _merge_dataframes_into_simulation(
2✔
42
        simulation_data_collector, sim_folder
43
    )
44

45

46
def handle_duplicate_columns(df: _pd.DataFrame) -> _pd.DataFrame:
2✔
47
    """
48
    Process duplicate columns in a DataFrame, ensuring they contain consistent data.
49

50
    This function checks for duplicate column names and verifies that:
51
    1. If one duplicate column has NaN values, the other(s) must also have NaN at the same indices
52
    2. All non-NaN values must be identical across duplicate columns
53

54
    Parameters
55
    ----------
56
    df : pandas.DataFrame
57
        Input DataFrame to process
58

59
    Returns
60
    -------
61
    pandas.DataFrame
62
        DataFrame with duplicate columns removed, keeping only the first occurrence
63

64
    Raises
65
    ------
66
    ValueError
67
        If duplicate columns have:
68
        - NaN values in one column while having actual values in another at the same index
69
        - Different non-NaN values at the same index
70

71
    https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns
72
    """
73
    for col in df.columns[df.columns.duplicated(keep=False)]:
2✔
74
        duplicate_cols = df.iloc[:, df.columns == col]
2✔
75

76
        nan_mask = duplicate_cols.isna()
2✔
77
        value_mask = ~nan_mask
2✔
78
        if ((nan_mask.sum(axis=1) > 0) & (value_mask.sum(axis=1) > 0)).any():
2✔
79
            raise ValueError(
2✔
80
                f"Column '{col}' has NaN values in one column while having actual values in another"
81
            )
82

83
        if not duplicate_cols.apply(lambda x: x.nunique() <= 1, axis=1).all():
2✔
84
            raise ValueError(
2✔
85
                f"Column '{col}' has conflicting values at same indices"
86
            )
87

88
    df = df.iloc[:, ~df.columns.duplicated()].copy()
2✔
89
    return df
2✔
90

91

92
def _determine_file_type(
2✔
93
        sim_file: _pl.Path, logger: _logging.Logger
94
) -> const.FileType:
95
    """Determine the file type using name and content."""
96
    try:
2✔
97
        return ftd.get_file_type_using_file_name(sim_file, logger)
2✔
98
    except ValueError:
2✔
99
        return ftd.get_file_type_using_file_content(sim_file, logger)
2✔
100

101

102
@dataclass
2✔
103
class _SimulationDataCollector:
2✔
104
    hourly: list[_pd.DataFrame] = field(default_factory=list)
2✔
105
    monthly: list[_pd.DataFrame] = field(default_factory=list)
2✔
106
    step: list[_pd.DataFrame] = field(default_factory=list)
2✔
107
    deck: _pd.DataFrame = field(default_factory=_pd.DataFrame)
2✔
108

109

110
def _read_file(
2✔
111
        file_path: _pl.Path, file_type: const.FileType
112
) -> _pd.DataFrame:
113
    """
114
    Factory method to read data from a file using the appropriate reader.
115

116
    Parameters
117
    ----------
118
    file_path : pathlib.Path
119
        Path to the file to be read
120
    file_type : const.FileType
121
        Type of data in the file (MONTHLY, HOURLY, or TIMESTEP)
122

123
    Returns
124
    -------
125
    pandas.DataFrame
126
        Data read from the file
127

128
    Raises
129
    ------
130
    ValueError
131
        If file extension is not supported
132
    """
133
    extension = file_path.suffix.lower()
2✔
134
    if extension in [".prt", ".hr"]:
2✔
135
        reader = readers.PrtReader()
2✔
136
        if file_type == const.FileType.MONTHLY:
2✔
137
            return reader.read_monthly(file_path)
2✔
138
        if file_type == const.FileType.HOURLY:
2✔
139
            return reader.read_hourly(file_path)
2✔
140
        if file_type == const.FileType.TIMESTEP:
2✔
141
            return reader.read_step(file_path)
2✔
142
    elif extension == ".csv":
2✔
143
        return readers.CsvReader().read_csv(file_path)
2✔
144

145
    raise ValueError(f"Unsupported file extension: {extension}")
×
146

147

148
def _process_file(
2✔
149
        simulation_data_collector: _SimulationDataCollector,
150
        file_path: _pl.Path,
151
        file_type: const.FileType,
152
) -> bool:
153
    if file_type == const.FileType.MONTHLY:
2✔
154
        simulation_data_collector.monthly.append(
2✔
155
            _read_file(file_path, const.FileType.MONTHLY)
156
        )
157
    elif file_type == const.FileType.HOURLY:
2✔
158
        simulation_data_collector.hourly.append(
2✔
159
            _read_file(file_path, const.FileType.HOURLY)
160
        )
161
    elif (
2✔
162
            file_type == const.FileType.TIMESTEP
163
            and sett.settings.reader.read_step_files
164
    ):
165
        simulation_data_collector.step.append(
2✔
166
            _read_file(file_path, const.FileType.TIMESTEP)
167
        )
168
    elif (
2✔
169
            file_type == const.FileType.DECK
170
            and sett.settings.reader.read_deck_files
171
    ):
172
        simulation_data_collector.deck = _get_deck_as_df(file_path)
2✔
173
    else:
174
        return False
2✔
175
    return True
2✔
176

177

178
def _get_deck_as_df(
2✔
179
        file_path: _pl.Path,
180
) -> _pd.DataFrame:
181
    deck_file_as_string = utils.get_file_content_as_string(file_path)
2✔
182
    deck: dict[str, float] = extractor.parse_deck_for_constant_expressions(
2✔
183
        deck_file_as_string, log.get_simulation_logger(file_path.parent)
184
    )
185
    deck_as_df = _pd.DataFrame([deck])
2✔
186
    return deck_as_df
2✔
187

188

189
def _merge_dataframes_into_simulation(
2✔
190
        simulation_data_collector: _SimulationDataCollector, sim_folder: _pl.Path
191
) -> ds.Simulation:
192
    monthly_df = _get_df_without_duplicates(simulation_data_collector.monthly)
2✔
193
    hourly_df = _get_df_without_duplicates(simulation_data_collector.hourly)
2✔
194
    timestep_df = _get_df_without_duplicates(simulation_data_collector.step)
2✔
195
    deck = simulation_data_collector.deck
2✔
196

197
    return ds.Simulation(sim_folder, monthly_df, hourly_df, timestep_df, deck)
2✔
198

199

200
def _get_df_without_duplicates(dfs: _abc.Sequence[_pd.DataFrame]):
2✔
201
    if len(dfs) > 0:
2✔
202
        return handle_duplicate_columns(_pd.concat(dfs, axis=1))
2✔
203

204
    return _pd.DataFrame()
2✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc