• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SPF-OST / pytrnsys_process / 13560850870

27 Feb 2025 07:08AM UTC coverage: 98.182% (+0.02%) from 98.165%
13560850870

push

github

ahobeost
CI adjustments

7 of 8 new or added lines in 3 files covered. (87.5%)

4 existing lines in 2 files now uncovered.

1188 of 1210 relevant lines covered (98.18%)

1.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.82
/pytrnsys_process/process_sim/process_sim.py
1
import logging as _logging
2✔
2
import pathlib as _pl
2✔
3
from collections import abc as _abc
2✔
4
from dataclasses import dataclass, field
2✔
5

6
import pandas as _pd
2✔
7

8
from pytrnsys_process import constants as const
2✔
9
from pytrnsys_process import data_structures as ds
2✔
10
from pytrnsys_process import file_type_detector as ftd
2✔
11
from pytrnsys_process import logger as log
2✔
12
from pytrnsys_process import readers
2✔
13
from pytrnsys_process import settings as sett
2✔
14
from pytrnsys_process import utils
2✔
15
from pytrnsys_process.deck import extractor
2✔
16
from pytrnsys_process.settings import settings
2✔
17

18

19
def process_sim(
2✔
20
    sim_files: _abc.Sequence[_pl.Path], sim_folder: _pl.Path
21
) -> ds.Simulation:
22
    # Used to store the array of dataframes for each file type.
23
    # Later used to concatenate all into one dataframe and saving as Sim object
24
    simulation_data_collector = _SimulationDataCollector()
2✔
25

26
    sim_logger = log.get_simulation_logger(sim_folder)
2✔
27
    for sim_file in sim_files:
2✔
28
        try:
2✔
29
            _process_file(
2✔
30
                simulation_data_collector,
31
                sim_file,
32
                _determine_file_type(sim_file, sim_logger),
33
            )
34
        except ValueError as e:
2✔
35
            sim_logger.error(
2✔
36
                "Error reading file %s it will not be available for processing: %s",
37
                sim_file,
38
                str(e),
39
                exc_info=True,
40
            )
41

42
    return _merge_dataframes_into_simulation(
2✔
43
        simulation_data_collector, sim_folder
44
    )
45

46

47
def handle_duplicate_columns(df: _pd.DataFrame) -> _pd.DataFrame:
2✔
48
    """
49
    Process duplicate columns in a DataFrame, ensuring they contain consistent data.
50

51
    This function checks for duplicate column names and verifies that:
52
    1. If one duplicate column has NaN values, the other(s) must also have NaN at the same indices
53
    2. All non-NaN values must be identical across duplicate columns
54

55
    Parameters
56
    __________
57
    df: pandas.DataFrame
58
        Input DataFrame to process
59

60
    Returns
61
    _______
62
    df: pandas.DataFrame
63
        DataFrame with duplicate columns removed, keeping only the first occurrence
64

65
    Raises
66
    ______
67
    ValueError
68
        If duplicate columns have:
69
        1. NaN values in one column while having actual values in another at the same index, or
70
        2. Different non-NaN values at the same index
71

72
    Note
73
    ____
74
    https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns
75
    """
76
    for col in df.columns[df.columns.duplicated(keep=False)]:
2✔
77
        duplicate_cols = df.iloc[:, df.columns == col]
2✔
78

79
        nan_mask = duplicate_cols.isna()
2✔
80
        value_mask = ~nan_mask
2✔
81
        if ((nan_mask.sum(axis=1) > 0) & (value_mask.sum(axis=1) > 0)).any():
2✔
82
            raise ValueError(
2✔
83
                f"Column '{col}' has NaN values in one column while having actual values in another"
84
            )
85

86
        if not duplicate_cols.apply(lambda x: x.nunique() <= 1, axis=1).all():
2✔
87
            raise ValueError(
2✔
88
                f"Column '{col}' has conflicting values at same indices"
89
            )
90

91
    df = df.iloc[:, ~df.columns.duplicated()].copy()
2✔
92
    return df
2✔
93

94

95
def _determine_file_type(
2✔
96
    sim_file: _pl.Path, logger: _logging.Logger
97
) -> const.FileType:
98
    """Determine the file type using name and content."""
99
    try:
2✔
100
        return ftd.get_file_type_using_file_name(sim_file, logger)
2✔
101
    except ValueError:
2✔
102
        return ftd.get_file_type_using_file_content(sim_file, logger)
2✔
103

104

105
@dataclass
2✔
106
class _SimulationDataCollector:
2✔
107
    hourly: list[_pd.DataFrame] = field(default_factory=list)
2✔
108
    monthly: list[_pd.DataFrame] = field(default_factory=list)
2✔
109
    step: list[_pd.DataFrame] = field(default_factory=list)
2✔
110
    deck: _pd.DataFrame = field(default_factory=_pd.DataFrame)
2✔
111

112

113
def _read_file(
2✔
114
    file_path: _pl.Path, file_type: const.FileType
115
) -> _pd.DataFrame:
116
    """
117
    Factory method to read data from a file using the appropriate reader.
118

119
    Parameters
120
    __________
121
    file_path: pathlib.Path
122
        Path to the file to be read
123

124
    file_type: const.FileType
125
        Type of data in the file (MONTHLY, HOURLY, or TIMESTEP)
126

127
    Returns
128
    _______
129
    pandas.DataFrame
130
        Data read from the file
131

132
    Raises
133
    ______
134
    ValueError
135
        If file extension is not supported
136
    """
137
    starting_year = settings.reader.starting_year
2✔
138
    extension = file_path.suffix.lower()
2✔
139
    if extension in [".prt", ".hr"]:
2✔
140
        reader = readers.PrtReader()
2✔
141
        if file_type == const.FileType.MONTHLY:
2✔
142
            return reader.read_monthly(file_path, starting_year)
2✔
143
        if file_type == const.FileType.HOURLY:
2✔
144
            return reader.read_hourly(file_path, starting_year)
2✔
145
        if file_type == const.FileType.TIMESTEP:
2✔
146
            return reader.read_step(file_path, starting_year)
2✔
147
    elif extension == ".csv":
2✔
148
        return readers.CsvReader().read_csv(file_path)
2✔
149

UNCOV
150
    raise ValueError(f"Unsupported file extension: {extension}")
×
151

152

153
def _process_file(
2✔
154
    simulation_data_collector: _SimulationDataCollector,
155
    file_path: _pl.Path,
156
    file_type: const.FileType,
157
) -> bool:
158
    if file_type == const.FileType.MONTHLY:
2✔
159
        simulation_data_collector.monthly.append(
2✔
160
            _read_file(file_path, const.FileType.MONTHLY)
161
        )
162
    elif file_type == const.FileType.HOURLY:
2✔
163
        simulation_data_collector.hourly.append(
2✔
164
            _read_file(file_path, const.FileType.HOURLY)
165
        )
166
    elif (
2✔
167
        file_type == const.FileType.TIMESTEP
168
        and sett.settings.reader.read_step_files
169
    ):
170
        simulation_data_collector.step.append(
2✔
171
            _read_file(file_path, const.FileType.TIMESTEP)
172
        )
173
    elif (
2✔
174
        file_type == const.FileType.DECK
175
        and sett.settings.reader.read_deck_files
176
    ):
177
        simulation_data_collector.deck = _get_deck_as_df(file_path)
2✔
178
    else:
179
        return False
2✔
180
    return True
2✔
181

182

183
def _get_deck_as_df(
2✔
184
    file_path: _pl.Path,
185
) -> _pd.DataFrame:
186
    deck_file_as_string = utils.get_file_content_as_string(file_path)
2✔
187
    deck: dict[str, float] = extractor.parse_deck_for_constant_expressions(
2✔
188
        deck_file_as_string, log.get_simulation_logger(file_path.parent)
189
    )
190
    deck_as_df = _pd.DataFrame([deck])
2✔
191
    return deck_as_df
2✔
192

193

194
def _merge_dataframes_into_simulation(
2✔
195
    simulation_data_collector: _SimulationDataCollector, sim_folder: _pl.Path
196
) -> ds.Simulation:
197
    monthly_df = _get_df_without_duplicates(simulation_data_collector.monthly)
2✔
198
    hourly_df = _get_df_without_duplicates(simulation_data_collector.hourly)
2✔
199
    timestep_df = _get_df_without_duplicates(simulation_data_collector.step)
2✔
200
    deck = simulation_data_collector.deck
2✔
201

202
    return ds.Simulation(
2✔
203
        sim_folder.as_posix(), monthly_df, hourly_df, timestep_df, deck
204
    )
205

206

207
def _get_df_without_duplicates(dfs: _abc.Sequence[_pd.DataFrame]):
2✔
208
    if len(dfs) > 0:
2✔
209
        return handle_duplicate_columns(_pd.concat(dfs, axis=1))
2✔
210

211
    return _pd.DataFrame()
2✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc