6498082478

Committed 12 Oct 2023 04:01PM UTC coverage: 99.663% (+9.1%) from 90.587%

Build # 6498082478

Build Type

Pull #151

github

Committed by

web-flow

Commit Message

Merge 103c9a6f3 into ec5bccda4

Pull Request Pull Request #151: Documentation PR

Run Details

4734 of 4750 relevant lines covered (99.66%)

1.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.52

/sed/loader/base/loader.py

"""The abstract class off of which to implement loaders."""
import os
from abc import ABC
from abc import abstractmethod
from copy import deepcopy
from typing import Any
from typing import Dict
from typing import List
from typing import Sequence
from typing import Tuple
from typing import Union

import dask.dataframe as ddf
import numpy as np

from sed.loader.utils import gather_files


class BaseLoader(ABC):
    """
    The abstract class off of which to implement loaders.

    The reader's folder name is the identifier.
    For this BaseLoader with filename base/loader.py the ID  becomes 'base'

    Args:
        config (dict, optional): Config dictionary. Defaults to None.
        meta_handler (MetaHandler, optional): MetaHandler object. Defaults to None.
    """

    # pylint: disable=too-few-public-methods

    __name__ = "BaseLoader"

    supported_file_types: List[str] = []

    def __init__(
        self,
        config: dict = None,
    ):
        self._config = config if config is not None else {}

        self.files: List[str] = []
        self.runs: List[str] = []
        self.metadata: Dict[Any, Any] = {}

    @abstractmethod
    def read_dataframe(
        self,
        files: Union[str, Sequence[str]] = None,
        folders: Union[str, Sequence[str]] = None,
        runs: Union[str, Sequence[str]] = None,
        ftype: str = None,
        metadata: dict = None,
        collect_metadata: bool = False,
        **kwds,
    ) -> Tuple[ddf.DataFrame, dict]:
        """Reads data from given files, folder, or runs and returns a dask dataframe
        and corresponding metadata.

        Args:
            files (Union[str, Sequence[str]], optional): File path(s) to process.
                Defaults to None.
            folders (Union[str, Sequence[str]], optional): Path to folder(s) where files
                are stored. Path has priority such that if it's specified, the specified
                files will be ignored. Defaults to None.
            runs (Union[str, Sequence[str]], optional): Run identifier(s). Corresponding
                files will be located in the location provided by ``folders``. Takes
                precendence over ``files`` and ``folders``. Defaults to None.
            ftype (str, optional): File type to read ('parquet', 'json', 'csv', etc).
                If a folder path is given, all files with the specified extension are
                read into the dataframe in the reading order. Defaults to None.
            metadata (dict, optional): Manual metadata dictionary. Auto-generated
                metadata will be added to it. Defaults to None.
            collect_metadata (bool): Option to collect metadata from files. Requires
                a valid config dict. Defaults to False.
            **kwds: keyword arguments. See description in respective loader.

        Returns:
            Tuple[ddf.DataFrame, dict]: Dask dataframe and metadata read from
            specified files.
        """

        if metadata is None:
            metadata = {}

        if runs is not None:
            if isinstance(runs, (str, int)):
                runs = [runs]
            self.runs = list(runs)
            files = []
            for run in runs:
                files.extend(self.get_files_from_run_id(run, folders, **kwds))

        elif folders is not None:
            if isinstance(folders, str):
                folders = [folders]
            files = []
            for folder in folders:
                folder = os.path.realpath(folder)
                files.extend(
                    gather_files(
                        folder=folder,
                        extension=ftype,
                        file_sorting=True,
                        **kwds,
                    ),
                )

        elif files is None:
            raise ValueError(
                "Either folder, file paths, or runs should be provided!",
            )

        if files is not None:
            if isinstance(files, str):
                files = [files]
            files = [os.path.realpath(file) for file in files]
            self.files = files

        self.metadata = deepcopy(metadata)

        if not files:
            raise FileNotFoundError("No valid files or runs found!")

        return None, None

    @abstractmethod
    def get_files_from_run_id(
        self,
        run_id: str,
        folders: Union[str, Sequence[str]] = None,
        extension: str = None,
        **kwds,
    ) -> List[str]:
        """Locate the files for a given run identifier.

        Args:
            run_id (str): The run identifier to locate.
            folders (Union[str, Sequence[str]], optional): The directory(ies) where the raw
                data is located. Defaults to None.
            extension (str, optional): The file extension. Defaults to None.
            kwds: Keyword arguments

        Return:
            List[str]: List of files for the given run.
        """
        raise NotImplementedError

    @abstractmethod
    def get_count_rate(
        self,
        fids: Sequence[int] = None,
        **kwds,
    ) -> Tuple[np.ndarray, np.ndarray]:
        """Create count rate data for the files specified in ``fids``.

        Args:
            fids (Sequence[int], optional): fids (Sequence[int]): the file ids to
                include. Defaults to list of all file ids.
            kwds: Keyword arguments

        Return:
            Tuple[np.ndarray, np.ndarray]: Arrays containing countrate and seconds
            into the scan.
        """
        return None, None

    @abstractmethod
    def get_elapsed_time(self, fids: Sequence[int] = None, **kwds) -> float:
        """Return the elapsed time in the specified in ``fids``.

        Args:
            fids (Sequence[int], optional): fids (Sequence[int]): the file ids to
                include. Defaults to list of all file ids.
            kwds: Keyword arguments

        Return:
            float: The elapsed time in the files in seconds.
        """
        return None


LOADER = BaseLoader

1	"""The abstract class off of which to implement loaders."""
2	import os
3	from abc import ABC
4	from abc import abstractmethod
5	from copy import deepcopy
6	from typing import Any
7	from typing import Dict
8	from typing import List
9	from typing import Sequence
10	from typing import Tuple
11	from typing import Union
12
13	import dask.dataframe as ddf
14	import numpy as np
15
16	from sed.loader.utils import gather_files
17
18
19	class BaseLoader(ABC):
20	"""
21	The abstract class off of which to implement loaders.	×
22
23	The reader's folder name is the identifier.	×
24	For this BaseLoader with filename base/loader.py the ID becomes 'base'	×
25
26	Args:	×
27	config (dict, optional): Config dictionary. Defaults to None.	×
28	meta_handler (MetaHandler, optional): MetaHandler object. Defaults to None.	×
29	"""	×
30
31	# pylint: disable=too-few-public-methods
32
33	__name__ = "BaseLoader"	3✔
34
35	supported_file_types: List[str] = []	3✔
36
37	def __init__(	3✔
38	self,
39	config: dict = None,	3✔
40	):
41	self._config = config if config is not None else {}	3✔
42
43	self.files: List[str] = []	3✔
44	self.runs: List[str] = []	3✔
45	self.metadata: Dict[Any, Any] = {}	3✔
46
47	@abstractmethod	3✔
48	def read_dataframe(	3✔
49	self,
50	files: Union[str, Sequence[str]] = None,	3✔
51	folders: Union[str, Sequence[str]] = None,	3✔
52	runs: Union[str, Sequence[str]] = None,	3✔
53	ftype: str = None,	3✔
54	metadata: dict = None,	3✔
55	collect_metadata: bool = False,	3✔
56	**kwds,
57	) -> Tuple[ddf.DataFrame, dict]:	3✔
58	"""Reads data from given files, folder, or runs and returns a dask dataframe
59	and corresponding metadata.
60
61	Args:
62	files (Union[str, Sequence[str]], optional): File path(s) to process.
63	Defaults to None.
64	folders (Union[str, Sequence[str]], optional): Path to folder(s) where files
65	are stored. Path has priority such that if it's specified, the specified
66	files will be ignored. Defaults to None.
67	runs (Union[str, Sequence[str]], optional): Run identifier(s). Corresponding
68	files will be located in the location provided by ``folders``. Takes
69	precendence over ``files`` and ``folders``. Defaults to None.
70	ftype (str, optional): File type to read ('parquet', 'json', 'csv', etc).
71	If a folder path is given, all files with the specified extension are
72	read into the dataframe in the reading order. Defaults to None.
73	metadata (dict, optional): Manual metadata dictionary. Auto-generated
74	metadata will be added to it. Defaults to None.
75	collect_metadata (bool): Option to collect metadata from files. Requires
76	a valid config dict. Defaults to False.
77	**kwds: keyword arguments. See description in respective loader.
78
79	Returns:
80	Tuple[ddf.DataFrame, dict]: Dask dataframe and metadata read from
81	specified files.
82	"""
83
84	if metadata is None:	3✔
85	metadata = {}	3✔
86
87	if runs is not None:	3✔
88	if isinstance(runs, (str, int)):
89	runs = [runs]
90	self.runs = list(runs)
91	files = []
92	for run in runs:
93	files.extend(self.get_files_from_run_id(run, folders, **kwds))
94
95	elif folders is not None:	3✔
96	if isinstance(folders, str):	3✔
97	folders = [folders]	3✔
98	files = []	3✔
99	for folder in folders:	3✔
100	folder = os.path.realpath(folder)	3✔
101	files.extend(	3✔
102	gather_files(	3✔
103	folder=folder,	3✔
104	extension=ftype,	3✔
105	file_sorting=True,	3✔
106	**kwds,	3✔
107	),
108	)
109
110	elif files is None:	3✔
111	raise ValueError(
112	"Either folder, file paths, or runs should be provided!",
113	)
114
115	if files is not None:	3✔
116	if isinstance(files, str):	3✔
117	files = [files]	3✔
118	files = [os.path.realpath(file) for file in files]	3✔
119	self.files = files	3✔
120
121	self.metadata = deepcopy(metadata)	3✔
122
123	if not files:	3✔
124	raise FileNotFoundError("No valid files or runs found!")
125
126	return None, None	3✔
127
128	@abstractmethod	3✔
129	def get_files_from_run_id(	3✔
130	self,
131	run_id: str,	3✔
132	folders: Union[str, Sequence[str]] = None,	3✔
133	extension: str = None,	3✔
134	**kwds,
135	) -> List[str]:	3✔
136	"""Locate the files for a given run identifier.
137
138	Args:
139	run_id (str): The run identifier to locate.
140	folders (Union[str, Sequence[str]], optional): The directory(ies) where the raw
141	data is located. Defaults to None.
142	extension (str, optional): The file extension. Defaults to None.
143	kwds: Keyword arguments
144
145	Return:
146	List[str]: List of files for the given run.
147	"""
148	raise NotImplementedError
149
150	@abstractmethod	3✔
151	def get_count_rate(	3✔
152	self,
153	fids: Sequence[int] = None,	3✔
154	**kwds,
155	) -> Tuple[np.ndarray, np.ndarray]:	3✔
156	"""Create count rate data for the files specified in ``fids``.
157
158	Args:
159	fids (Sequence[int], optional): fids (Sequence[int]): the file ids to
160	include. Defaults to list of all file ids.
161	kwds: Keyword arguments
162
163	Return:
164	Tuple[np.ndarray, np.ndarray]: Arrays containing countrate and seconds
165	into the scan.
166	"""
167	return None, None
168
169	@abstractmethod	3✔
170	def get_elapsed_time(self, fids: Sequence[int] = None, **kwds) -> float:	3✔
171	"""Return the elapsed time in the specified in ``fids``.
172
173	Args:
174	fids (Sequence[int], optional): fids (Sequence[int]): the file ids to
175	include. Defaults to list of all file ids.
176	kwds: Keyword arguments
177
178	Return:
179	float: The elapsed time in the files in seconds.
180	"""
181	return None
182
183
184	LOADER = BaseLoader	3✔

OpenCOMPES / sed / 6498082478

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous