12007709399

Committed 25 Nov 2024 10:07AM UTC coverage: 92.793% (-1.2%) from 94.037%

Build # 12007709399

Build Type

push

github

Committed by

sebastian-swob

Commit Message

adding exception handling to prt reader
initial example script(unfinished)

Run Details

9 of 15 new or added lines in 1 file covered. (60.0%)

412 of 444 relevant lines covered (92.79%)

1.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.5

/pytrnsys_process/readers.py

import datetime as _dt
import pathlib as _pl
from dataclasses import dataclass

import pandas as _pd

from pytrnsys_process.logger import logger as log


# TODO: Adjust architecture to separate reading and conversion.  # pylint: disable=fixme
# TODO: Base reader with PRT and CSV as children.  # pylint: disable=fixme
# TODO: Describe what to do when file name does not match any known patterns.  # pylint: disable=fixme
# TODO: Convert single file according to keyword suggestion, and/or automatically?  # pylint: disable=fixme
# TODO: timestep from first two rows -> if 1 hour, use hourly, otherwise convert to timestep  # pylint: disable=fixme
# TODO: Message to user about automatic conversion when file name does not match any known patterns.  # pylint: disable=fixme


@dataclass
class ReaderBase:
    # ===================================
    # pylint: disable=invalid-name
    SKIPFOOTER: int = 24
    HEADER: int = 1
    DELIMITER: str = r"\s+"

    # Pylint complains about these CONSTANTS, because pylint differs with PEP8 on this topic.
    # https://stackoverflow.com/questions/25184097/pylint-invalid-constant-name/51975811#51975811
    # ===================================

    def read(self, file_path: _pl.Path) -> _pd.DataFrame:
        """Common read function for all readers"""
        df = _pd.read_csv(
            file_path,
            skipfooter=self.SKIPFOOTER,
            header=self.HEADER,
            delimiter=self.DELIMITER,
            engine="python",
        )
        return df


class PrtReader(ReaderBase):

    def read_hourly(
            self, hourly_file: _pl.Path, starting_year: int = 1990
    ) -> _pd.DataFrame:
        """Read hourly TRNSYS output data from a file.

        Args:
            hourly_file: Path to the hourly TRNSYS output file
            starting_year: Year to use as the start of the simulation (default: 1990)

        Returns:
            DataFrame with hourly data indexed by timestamp, with 'Period' and 'time' columns removed

        Raises:
            ValueError: If the timestamps are not exactly on the hour (minutes or seconds != 0)
        """
        try:
            df = self._process_dataframe(self.read(hourly_file), starting_year)
            self._validate_hourly(df)
            return df.drop(columns=["Period", "time"])
        except (ValueError, KeyError) as e:
            log.error("Error reading hourly file %s: %s", hourly_file, e)
            raise

    def read_monthly(
            self,
            monthly_file: _pl.Path,
            starting_year: int = 1990,
    ) -> _pd.DataFrame:
        """Read monthly TRNSYS output data from a file.

        Args:
            monthly_file: Path to the monthly TRNSYS output file
            starting_year: Year to use as the start of the simulation (default: 1990)

        Returns:
            DataFrame with monthly data indexed by timestamp, with 'Month' and 'time' columns removed

        Raises:
            ValueError: If the timestamps are not at the start of each month at midnight
                      (not month start or hours/minutes/seconds != 0)
        """
        try:
            df = self._process_dataframe(self.read(monthly_file), starting_year)
            self._validate_monthly(df)
            return df.drop(columns=["Month", "time"])
        except (ValueError, KeyError) as e:
            log.error("Error reading monthly file %s: %s", monthly_file, e)
            raise

    def read_step(self, step_file: _pl.Path, starting_year: int = 1990):
        df = self._process_dataframe(self.read(step_file), starting_year)
        return df.drop(columns=["Period", "time"])

    def _process_dataframe(
            self, df: _pd.DataFrame, starting_year: int
    ) -> _pd.DataFrame:
        """Process the dataframe by formatting column names and creating timestamps."""
        df.columns.values[1] = df.columns[1].lower()
        df["Timestamp"] = self._create_timestamps(
            df["time"].astype(float), starting_year
        )

        return df.set_index("Timestamp")

    def _create_timestamps(
            self, time_series: _pd.Series, starting_year: int
    ) -> _pd.Series:
        """Create timestamps from time series and starting year."""
        hours = [_dt.timedelta(hours=float(h)) for h in time_series]
        start_of_year = _dt.datetime(day=1, month=1, year=starting_year)
        return _pd.Series([start_of_year + h for h in hours])

    def _validate_hourly(self, df: _pd.DataFrame) -> None:
        """Validate that timestamps are exactly on the hour."""
        index = _pd.to_datetime(df.index)
        if not ((index.minute == 0) & (index.second == 0)).all():
            raise ValueError(
                "Timestamps must be exactly on the hour (minutes and seconds must be 0)"
            )

    def _validate_monthly(self, df: _pd.DataFrame) -> None:
        """Validate that timestamps are at the start of each month at midnight."""
        index = _pd.to_datetime(df.index)
        if not (
                index.is_month_start
                & (index.hour == 0)
                & (index.minute == 0)
                & (index.second == 0)
        ).all():
            raise ValueError(
                "Timestamps must be at the start of each month at midnight"
            )


@dataclass
class HeaderReader(ReaderBase):
    NUMBER_OF_ROWS_TO_SKIP = 1
    NUMBER_OF_ROWS = 0

    def read_headers(self, sim_file: _pl.Path) -> list[str]:
        df = _pd.read_csv(
            sim_file,
            nrows=self.NUMBER_OF_ROWS,
            skiprows=self.NUMBER_OF_ROWS_TO_SKIP,
            delimiter=self.DELIMITER,
        )
        return df.columns.tolist()


@dataclass
class CsvReader(ReaderBase):
    SKIPFOOTER: int = 0
    HEADER: int = 0
    DELIMITER: str = ","

    def read_csv(self, csv_file: _pl.Path) -> _pd.DataFrame:
        df = self.read(csv_file)
        return df.set_index("Timestamp")

1	import datetime as _dt	2✔
2	import pathlib as _pl	2✔
3	from dataclasses import dataclass	2✔
4
5	import pandas as _pd	2✔
6
7	from pytrnsys_process.logger import logger as log	2✔
8
9
10	# TODO: Adjust architecture to separate reading and conversion. # pylint: disable=fixme
11	# TODO: Base reader with PRT and CSV as children. # pylint: disable=fixme
12	# TODO: Describe what to do when file name does not match any known patterns. # pylint: disable=fixme
13	# TODO: Convert single file according to keyword suggestion, and/or automatically? # pylint: disable=fixme
14	# TODO: timestep from first two rows -> if 1 hour, use hourly, otherwise convert to timestep # pylint: disable=fixme
15	# TODO: Message to user about automatic conversion when file name does not match any known patterns. # pylint: disable=fixme
16
17
18	@dataclass	2✔
19	class ReaderBase:	2✔
20	# ===================================
21	# pylint: disable=invalid-name
22	SKIPFOOTER: int = 24	2✔
23	HEADER: int = 1	2✔
24	DELIMITER: str = r"\s+"	2✔
25
26	# Pylint complains about these CONSTANTS, because pylint differs with PEP8 on this topic.
27	# https://stackoverflow.com/questions/25184097/pylint-invalid-constant-name/51975811#51975811
28	# ===================================
29
30	def read(self, file_path: _pl.Path) -> _pd.DataFrame:	2✔
31	"""Common read function for all readers"""
32	df = _pd.read_csv(	2✔
33	file_path,
34	skipfooter=self.SKIPFOOTER,
35	header=self.HEADER,
36	delimiter=self.DELIMITER,
37	engine="python",
38	)
39	return df	2✔
40
41
42	class PrtReader(ReaderBase):	2✔
43
44	def read_hourly(	2✔
45	self, hourly_file: _pl.Path, starting_year: int = 1990
46	) -> _pd.DataFrame:
47	"""Read hourly TRNSYS output data from a file.
48
49	Args:
50	hourly_file: Path to the hourly TRNSYS output file
51	starting_year: Year to use as the start of the simulation (default: 1990)
52
53	Returns:
54	DataFrame with hourly data indexed by timestamp, with 'Period' and 'time' columns removed
55
56	Raises:
57	ValueError: If the timestamps are not exactly on the hour (minutes or seconds != 0)
58	"""
59	try:	2✔
60	df = self._process_dataframe(self.read(hourly_file), starting_year)	2✔
61	self._validate_hourly(df)	2✔
62	return df.drop(columns=["Period", "time"])	2✔
NEW 63	except (ValueError, KeyError) as e:	×
NEW 64	log.error("Error reading hourly file %s: %s", hourly_file, e)	×
NEW 65	raise	×
66
67	def read_monthly(	2✔
68	self,
69	monthly_file: _pl.Path,
70	starting_year: int = 1990,
71	) -> _pd.DataFrame:
72	"""Read monthly TRNSYS output data from a file.
73
74	Args:
75	monthly_file: Path to the monthly TRNSYS output file
76	starting_year: Year to use as the start of the simulation (default: 1990)
77
78	Returns:
79	DataFrame with monthly data indexed by timestamp, with 'Month' and 'time' columns removed
80
81	Raises:
82	ValueError: If the timestamps are not at the start of each month at midnight
83	(not month start or hours/minutes/seconds != 0)
84	"""
85	try:	2✔
86	df = self._process_dataframe(self.read(monthly_file), starting_year)	2✔
87	self._validate_monthly(df)	2✔
88	return df.drop(columns=["Month", "time"])	2✔
NEW 89	except (ValueError, KeyError) as e:	×
NEW 90	log.error("Error reading monthly file %s: %s", monthly_file, e)	×
NEW 91	raise	×
92
93	def read_step(self, step_file: _pl.Path, starting_year: int = 1990):	2✔
94	df = self._process_dataframe(self.read(step_file), starting_year)	2✔
95	return df.drop(columns=["Period", "time"])	2✔
96
97	def _process_dataframe(	2✔
98	self, df: _pd.DataFrame, starting_year: int
99	) -> _pd.DataFrame:
100	"""Process the dataframe by formatting column names and creating timestamps."""
101	df.columns.values[1] = df.columns[1].lower()	2✔
102	df["Timestamp"] = self._create_timestamps(	2✔
103	df["time"].astype(float), starting_year
104	)
105
106	return df.set_index("Timestamp")	2✔
107
108	def _create_timestamps(	2✔
109	self, time_series: _pd.Series, starting_year: int
110	) -> _pd.Series:
111	"""Create timestamps from time series and starting year."""
112	hours = [_dt.timedelta(hours=float(h)) for h in time_series]	2✔
113	start_of_year = _dt.datetime(day=1, month=1, year=starting_year)	2✔
114	return _pd.Series([start_of_year + h for h in hours])	2✔
115
116	def _validate_hourly(self, df: _pd.DataFrame) -> None:	2✔
117	"""Validate that timestamps are exactly on the hour."""
118	index = _pd.to_datetime(df.index)	2✔
119	if not ((index.minute == 0) & (index.second == 0)).all():	2✔
120	raise ValueError(	×
121	"Timestamps must be exactly on the hour (minutes and seconds must be 0)"
122	)
123
124	def _validate_monthly(self, df: _pd.DataFrame) -> None:	2✔
125	"""Validate that timestamps are at the start of each month at midnight."""
126	index = _pd.to_datetime(df.index)	2✔
127	if not (	2✔
128	index.is_month_start
129	& (index.hour == 0)
130	& (index.minute == 0)
131	& (index.second == 0)
132	).all():
133	raise ValueError(	×
134	"Timestamps must be at the start of each month at midnight"
135	)
136
137
138	@dataclass	2✔
139	class HeaderReader(ReaderBase):	2✔
140	NUMBER_OF_ROWS_TO_SKIP = 1	2✔
141	NUMBER_OF_ROWS = 0	2✔
142
143	def read_headers(self, sim_file: _pl.Path) -> list[str]:	2✔
144	df = _pd.read_csv(	2✔
145	sim_file,
146	nrows=self.NUMBER_OF_ROWS,
147	skiprows=self.NUMBER_OF_ROWS_TO_SKIP,
148	delimiter=self.DELIMITER,
149	)
150	return df.columns.tolist()	2✔
151
152
153	@dataclass	2✔
154	class CsvReader(ReaderBase):	2✔
155	SKIPFOOTER: int = 0	2✔
156	HEADER: int = 0	2✔
157	DELIMITER: str = ","	2✔
158
159	def read_csv(self, csv_file: _pl.Path) -> _pd.DataFrame:	2✔
160	df = self.read(csv_file)	2✔
161	return df.set_index("Timestamp")	2✔

SPF-OST / pytrnsys_process / 12007709399

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous