12737093410

Committed 12 Jan 2025 09:08PM UTC coverage: 92.047% (+0.2%) from 91.801%

Build # 12737093410

Build Type

Pull #437

github

Committed by

web-flow

Commit Message

Merge pull request #542 from OpenCOMPES/more-broken-file-fixes

add further exceptions for completely empty files, and exceptions

Pull Request Pull Request #437: Upgrade to V1

Run Details

2103 of 2238 new or added lines in 53 files covered. (93.97%)

4 existing lines in 1 file now uncovered.

7581 of 8236 relevant lines covered (92.05%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.25

/src/sed/loader/base/loader.py

"""The abstract class off of which to implement loaders.
"""
from __future__ import annotations

import os
from abc import ABC
from abc import abstractmethod
from collections.abc import Sequence
from copy import deepcopy
from typing import Any

import dask.dataframe as ddf
import numpy as np

from sed.loader.utils import gather_files


class BaseLoader(ABC):
    """
    The abstract class off of which to implement loaders.

    The reader's folder name is the identifier.
    For this BaseLoader with filename base/loader.py the ID  becomes 'base'

    Args:
        config (dict, optional): Config dictionary. Defaults to None.
        verbose (bool, optional): Option to print out diagnostic information.
            Defaults to True.
    """

    __name__ = "BaseLoader"

    supported_file_types: list[str] = []

    def __init__(
        self,
        config: dict = None,
        verbose: bool = True,
    ):
        self._config = config if config is not None else {}

        self.files: list[str] = []
        self.runs: list[str] = []
        self.metadata: dict[Any, Any] = {}
        self._verbose = verbose

    @property
    def verbose(self) -> bool:
        """Accessor to the verbosity flag.

        Returns:
            bool: Verbosity flag.
        """
        return self._verbose

    @verbose.setter
    def verbose(self, verbose: bool):
        """Setter for the verbosity.

        Args:
            verbose (bool): Option to turn on verbose output. Sets loglevel to INFO.
        """
        self._verbose = verbose

    @abstractmethod
    def read_dataframe(
        self,
        files: str | Sequence[str] = None,
        folders: str | Sequence[str] = None,
        runs: str | Sequence[str] = None,
        ftype: str = None,
        metadata: dict = None,
        collect_metadata: bool = False,
        **kwds,
    ) -> tuple[ddf.DataFrame, ddf.DataFrame, dict]:
        """Reads data from given files, folder, or runs and returns a dask dataframe
        and corresponding metadata.

        Args:
            files (str | Sequence[str], optional): File path(s) to process.
                Defaults to None.
            folders (str | Sequence[str], optional): Path to folder(s) where files
                are stored. Path has priority such that if it's specified, the specified
                files will be ignored. Defaults to None.
            runs (str | Sequence[str], optional): Run identifier(s). Corresponding
                files will be located in the location provided by ``folders``. Takes
                precedence over ``files`` and ``folders``. Defaults to None.
            ftype (str, optional): File type to read ('parquet', 'json', 'csv', etc).
                If a folder path is given, all files with the specified extension are
                read into the dataframe in the reading order. Defaults to None.
            metadata (dict, optional): Manual metadata dictionary. Auto-generated
                metadata will be added to it. Defaults to None.
            collect_metadata (bool): Option to collect metadata from files. Requires
                a valid config dict. Defaults to False.
            **kwds: keyword arguments. See description in respective loader.

        Returns:
            tuple[ddf.DataFrame, ddf.DataFrame, dict]: Dask dataframe, timed dataframe and metadata
            read from specified files.
        """

        if metadata is None:
            metadata = {}

        if runs is not None:
            if isinstance(runs, (str, int)):
                runs = [runs]
            self.runs = list(runs)
            files = []
            for run in runs:
                files.extend(self.get_files_from_run_id(run, folders, **kwds))

        elif folders is not None:
            if isinstance(folders, str):
                folders = [folders]
            files = []
            for folder in folders:
                folder = os.path.realpath(folder)
                files.extend(
                    gather_files(
                        folder=folder,
                        extension=ftype,
                        file_sorting=True,
                        **kwds,
                    ),
                )

        elif files is None:
            raise ValueError(
                "Either folders, files, or runs have to be provided!",
            )

        if files is not None:
            if isinstance(files, str):
                files = [files]
            files = [os.path.realpath(file) for file in files]
            self.files = files

        self.metadata = deepcopy(metadata)

        if not files:
            raise FileNotFoundError("No valid files or runs found!")

        return None, None, None

    @abstractmethod
    def get_files_from_run_id(
        self,
        run_id: str,
        folders: str | Sequence[str] = None,
        extension: str = None,
        **kwds,
    ) -> list[str]:
        """Locate the files for a given run identifier.

        Args:
            run_id (str): The run identifier to locate.
            folders (str | Sequence[str], optional): The directory(ies) where the raw
                data is located. Defaults to None.
            extension (str, optional): The file extension. Defaults to None.
            kwds: Keyword arguments

        Return:
            list[str]: List of files for the given run.
        """
        raise NotImplementedError

    @abstractmethod
    def get_count_rate(
        self,
        fids: Sequence[int] = None,
        **kwds,
    ) -> tuple[np.ndarray, np.ndarray]:
        """Create count rate data for the files specified in ``fids``.

        Args:
            fids (Sequence[int], optional): fids (Sequence[int]): the file ids to
                include. Defaults to list of all file ids.
            kwds: Keyword arguments

        Return:
            tuple[np.ndarray, np.ndarray]: Arrays containing countrate and seconds
            into the scan.
        """
        return None, None

    @abstractmethod
    def get_elapsed_time(self, fids: Sequence[int] = None, **kwds) -> float:
        """Return the elapsed time in the specified in ``fids``.

        Args:
            fids (Sequence[int], optional): fids (Sequence[int]): the file ids to
                include. Defaults to list of all file ids.
            kwds: Keyword arguments

        Return:
            float: The elapsed time in the files in seconds.
        """
        return None


LOADER = BaseLoader

1	"""The abstract class off of which to implement loaders.
2	"""
3	from __future__ import annotations	1✔
4
5	import os	1✔
6	from abc import ABC	1✔
7	from abc import abstractmethod	1✔
8	from collections.abc import Sequence	1✔
9	from copy import deepcopy	1✔
10	from typing import Any	1✔
11
12	import dask.dataframe as ddf	1✔
13	import numpy as np	1✔
14
15	from sed.loader.utils import gather_files	1✔
16
17
18	class BaseLoader(ABC):	1✔
19	"""
20	The abstract class off of which to implement loaders.
21
22	The reader's folder name is the identifier.
23	For this BaseLoader with filename base/loader.py the ID becomes 'base'
24
25	Args:
26	config (dict, optional): Config dictionary. Defaults to None.
27	verbose (bool, optional): Option to print out diagnostic information.
28	Defaults to True.
29	"""
30
31	__name__ = "BaseLoader"	1✔
32
33	supported_file_types: list[str] = []	1✔
34
35	def __init__(	1✔
36	self,
37	config: dict = None,
38	verbose: bool = True,
39	):
40	self._config = config if config is not None else {}	1✔
41
42	self.files: list[str] = []	1✔
43	self.runs: list[str] = []	1✔
44	self.metadata: dict[Any, Any] = {}	1✔
45	self._verbose = verbose	1✔
46
47	@property	1✔
48	def verbose(self) -> bool:	1✔
49	"""Accessor to the verbosity flag.
50
51	Returns:
52	bool: Verbosity flag.
53	"""
NEW 54	return self._verbose	×
55
56	@verbose.setter	1✔
57	def verbose(self, verbose: bool):	1✔
58	"""Setter for the verbosity.
59
60	Args:
61	verbose (bool): Option to turn on verbose output. Sets loglevel to INFO.
62	"""
NEW 63	self._verbose = verbose	×
64
65	@abstractmethod	1✔
66	def read_dataframe(	1✔
67	self,
68	files: str \| Sequence[str] = None,
69	folders: str \| Sequence[str] = None,
70	runs: str \| Sequence[str] = None,
71	ftype: str = None,
72	metadata: dict = None,
73	collect_metadata: bool = False,
74	**kwds,
75	) -> tuple[ddf.DataFrame, ddf.DataFrame, dict]:
76	"""Reads data from given files, folder, or runs and returns a dask dataframe
77	and corresponding metadata.
78
79	Args:
80	files (str \| Sequence[str], optional): File path(s) to process.
81	Defaults to None.
82	folders (str \| Sequence[str], optional): Path to folder(s) where files
83	are stored. Path has priority such that if it's specified, the specified
84	files will be ignored. Defaults to None.
85	runs (str \| Sequence[str], optional): Run identifier(s). Corresponding
86	files will be located in the location provided by ``folders``. Takes
87	precedence over ``files`` and ``folders``. Defaults to None.
88	ftype (str, optional): File type to read ('parquet', 'json', 'csv', etc).
89	If a folder path is given, all files with the specified extension are
90	read into the dataframe in the reading order. Defaults to None.
91	metadata (dict, optional): Manual metadata dictionary. Auto-generated
92	metadata will be added to it. Defaults to None.
93	collect_metadata (bool): Option to collect metadata from files. Requires
94	a valid config dict. Defaults to False.
95	**kwds: keyword arguments. See description in respective loader.
96
97	Returns:
98	tuple[ddf.DataFrame, ddf.DataFrame, dict]: Dask dataframe, timed dataframe and metadata
99	read from specified files.
100	"""
101
102	if metadata is None:	1✔
103	metadata = {}	1✔
104
105	if runs is not None:	1✔
106	if isinstance(runs, (str, int)):	×
107	runs = [runs]	×
108	self.runs = list(runs)	×
109	files = []	×
110	for run in runs:	×
111	files.extend(self.get_files_from_run_id(run, folders, **kwds))	×
112
113	elif folders is not None:	1✔
114	if isinstance(folders, str):	1✔
115	folders = [folders]	1✔
116	files = []	1✔
117	for folder in folders:	1✔
118	folder = os.path.realpath(folder)	1✔
119	files.extend(	1✔
120	gather_files(
121	folder=folder,
122	extension=ftype,
123	file_sorting=True,
124	**kwds,
125	),
126	)
127
128	elif files is None:	1✔
129	raise ValueError(	1✔
130	"Either folders, files, or runs have to be provided!",
131	)
132
133	if files is not None:	1✔
134	if isinstance(files, str):	1✔
135	files = [files]	1✔
136	files = [os.path.realpath(file) for file in files]	1✔
137	self.files = files	1✔
138
139	self.metadata = deepcopy(metadata)	1✔
140
141	if not files:	1✔
142	raise FileNotFoundError("No valid files or runs found!")	×
143
144	return None, None, None	1✔
145
146	@abstractmethod	1✔
147	def get_files_from_run_id(	1✔
148	self,
149	run_id: str,
150	folders: str \| Sequence[str] = None,
151	extension: str = None,
152	**kwds,
153	) -> list[str]:
154	"""Locate the files for a given run identifier.
155
156	Args:
157	run_id (str): The run identifier to locate.
158	folders (str \| Sequence[str], optional): The directory(ies) where the raw
159	data is located. Defaults to None.
160	extension (str, optional): The file extension. Defaults to None.
161	kwds: Keyword arguments
162
163	Return:
164	list[str]: List of files for the given run.
165	"""
166	raise NotImplementedError	×
167
168	@abstractmethod	1✔
169	def get_count_rate(	1✔
170	self,
171	fids: Sequence[int] = None,
172	**kwds,
173	) -> tuple[np.ndarray, np.ndarray]:
174	"""Create count rate data for the files specified in ``fids``.
175
176	Args:
177	fids (Sequence[int], optional): fids (Sequence[int]): the file ids to
178	include. Defaults to list of all file ids.
179	kwds: Keyword arguments
180
181	Return:
182	tuple[np.ndarray, np.ndarray]: Arrays containing countrate and seconds
183	into the scan.
184	"""
185	return None, None	×
186
187	@abstractmethod	1✔
188	def get_elapsed_time(self, fids: Sequence[int] = None, **kwds) -> float:	1✔
189	"""Return the elapsed time in the specified in ``fids``.
190
191	Args:
192	fids (Sequence[int], optional): fids (Sequence[int]): the file ids to
193	include. Defaults to list of all file ids.
194	kwds: Keyword arguments
195
196	Return:
197	float: The elapsed time in the files in seconds.
198	"""
199	return None	×
200
201
202	LOADER = BaseLoader	1✔

OpenCOMPES / sed / 12737093410

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous