• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

SPF-OST / pytrnsys_process / 12050239394

27 Nov 2024 12:24PM UTC coverage: 90.52% (-3.5%) from 94.037%
12050239394

Pull #18

github

sebastian-swob
added batch processing,
added initial example script on how to use the api,
disabled some step tests until requirements are clear
Pull Request #18: 15 create example script for per sim interaction

119 of 124 new or added lines in 5 files covered. (95.97%)

20 existing lines in 3 files now uncovered.

487 of 538 relevant lines covered (90.52%)

0.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.36
/pytrnsys_process/process_sim/process_sim.py
1
import pathlib as _pl
1✔
2
from dataclasses import dataclass
1✔
3

4
import pandas as _pd
1✔
5

6
from pytrnsys_process import file_matcher as fm
1✔
7
from pytrnsys_process import readers, utils
1✔
8
from pytrnsys_process.logger import logger
1✔
9

10

11
# TODO test if overlapping colums are allowed if the value are the same # pylint: disable=fixme
12

13

14
@dataclass
1✔
15
class Simulation:
1✔
16
    path: _pl.Path
1✔
17
    monthly: _pd.DataFrame
1✔
18
    hourly: _pd.DataFrame
1✔
19
    timestep: _pd.DataFrame
1✔
20
    # TODO: Add results data here. Not sure yet, what this will look like # pylint: disable=fixme
21

22

23
def handle_duplicate_columns(df: _pd.DataFrame) -> _pd.DataFrame:
1✔
24
    """
25
    Process duplicate columns in a DataFrame, ensuring they contain consistent data.
26

27
    This function checks for duplicate column names and verifies that:
28
    1. If one duplicate column has NaN values, the other(s) must also have NaN at the same indices
29
    2. All non-NaN values must be identical across duplicate columns
30

31
    Parameters
32
    ----------
33
    df : pandas.DataFrame
34
        Input DataFrame to process
35

36
    Returns
37
    -------
38
    pandas.DataFrame
39
        DataFrame with duplicate columns removed, keeping only the first occurrence
40

41
    Raises
42
    ------
43
    ValueError
44
        If duplicate columns have:
45
        - NaN values in one column while having actual values in another at the same index
46
        - Different non-NaN values at the same index
47

48
    https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns
49
    """
50
    for col in df.columns[df.columns.duplicated(keep=False)]:
1✔
51
        duplicate_cols = df.iloc[:, df.columns == col]
1✔
52

53
        nan_mask = duplicate_cols.isna()
1✔
54
        value_mask = ~nan_mask
1✔
55
        if ((nan_mask.sum(axis=1) > 0) & (value_mask.sum(axis=1) > 0)).any():
1✔
56
            raise ValueError(
1✔
57
                f"Column '{col}' has NaN values in one column while having actual values in another"
58
            )
59

60
        if not duplicate_cols.apply(lambda x: x.nunique() <= 1, axis=1).all():
1✔
61
            raise ValueError(
1✔
62
                f"Column '{col}' has conflicting values at same indices"
63
            )
64

65
    df = df.iloc[:, ~df.columns.duplicated()].copy()
1✔
66
    return df
1✔
67

68

69
def process_sim_prt(
1✔
70
    sim_folder: _pl.Path,
71
) -> Simulation:
72
    sim_files = utils.get_files([sim_folder])
1✔
73
    prt_reader = readers.PrtReader()
1✔
74
    hourly = []
1✔
75
    monthly = []
1✔
76
    timestep = []
1✔
77

78
    for sim_file in sim_files:
1✔
79
        if fm.has_pattern(sim_file.name, fm.FileType.MONTHLY):
1✔
80
            monthly.append(prt_reader.read_monthly(sim_file))
1✔
81
        elif fm.has_pattern(sim_file.name, fm.FileType.HOURLY):
1✔
82
            hourly.append(prt_reader.read_hourly(sim_file))
1✔
83
        elif fm.has_pattern(sim_file.name, fm.FileType.TIMESTEP):
1✔
84
            timestep.append(prt_reader.read_step(sim_file))
1✔
85
        else:
86
            logger.warning("Unknown file type: %s", sim_file.name)
×
87

88
    monthly_df = (
1✔
89
        handle_duplicate_columns(_pd.concat(monthly, axis=1))
90
        if monthly
91
        else _pd.DataFrame()
92
    )
93
    hourly_df = (
1✔
94
        handle_duplicate_columns(_pd.concat(hourly, axis=1))
95
        if hourly
96
        else _pd.DataFrame()
97
    )
98
    timestep_df = (
1✔
99
        handle_duplicate_columns(_pd.concat(timestep, axis=1))
100
        if timestep
101
        else _pd.DataFrame()
102
    )
103
    return Simulation(sim_folder, monthly_df, hourly_df, timestep_df)
1✔
104

105

106
def process_sim_using_file_content_prt(
1✔
107
    sim_folder: _pl.Path,
108
) -> Simulation:
UNCOV
109
    sim_files = utils.get_files([sim_folder])
×
UNCOV
110
    prt_reader = readers.PrtReader()
×
UNCOV
111
    hourly = []
×
UNCOV
112
    monthly = []
×
UNCOV
113
    step = []
×
114

UNCOV
115
    for sim_file in sim_files:
×
UNCOV
116
        file_type = fm.get_file_type_using_file_content(sim_file)
×
UNCOV
117
        if file_type == fm.FileType.MONTHLY:
×
UNCOV
118
            monthly.append(prt_reader.read_monthly(sim_file))
×
UNCOV
119
        elif file_type == fm.FileType.HOURLY:
×
UNCOV
120
            hourly.append(prt_reader.read_hourly(sim_file))
×
UNCOV
121
        elif file_type == fm.FileType.TIMESTEP:
×
UNCOV
122
            step.append(prt_reader.read_step(sim_file))
×
123
        else:
124
            logger.warning("Unknown file type: %s", sim_file.name)
×
125

UNCOV
126
    monthly_df = handle_duplicate_columns(_pd.concat(monthly, axis=1))
×
UNCOV
127
    hourly_df = handle_duplicate_columns(_pd.concat(hourly, axis=1))
×
UNCOV
128
    timestep_df = handle_duplicate_columns(_pd.concat(step, axis=1))
×
NEW
129
    return Simulation(sim_folder, monthly_df, hourly_df, timestep_df)
×
130

131

132
def process_sim_csv(
1✔
133
    sim_folder: _pl.Path,
134
) -> Simulation:
135
    sim_files = utils.get_files([sim_folder], results_folder_name="converted")
1✔
136
    csv_reader = readers.CsvReader()
1✔
137
    hourly = []
1✔
138
    monthly = []
1✔
139
    timestep = []
1✔
140

141
    for sim_file in sim_files:
1✔
142
        if fm.has_pattern(sim_file.name, fm.FileType.MONTHLY):
1✔
143
            monthly.append(csv_reader.read_csv(sim_file))
1✔
144
        elif fm.has_pattern(sim_file.name, fm.FileType.HOURLY):
1✔
145
            hourly.append(csv_reader.read_csv(sim_file))
1✔
146
        elif fm.has_pattern(sim_file.name, fm.FileType.TIMESTEP):
1✔
147
            timestep.append(csv_reader.read_csv(sim_file))
1✔
148
        else:
149
            logger.warning("Unknown file type: %s", sim_file.name)
×
150

151
    monthly_df = handle_duplicate_columns(_pd.concat(monthly, axis=1))
1✔
152
    hourly_df = handle_duplicate_columns(_pd.concat(hourly, axis=1))
1✔
153
    timestep_df = handle_duplicate_columns(_pd.concat(timestep, axis=1))
1✔
154

155
    return Simulation(sim_folder, monthly_df, hourly_df, timestep_df)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc