• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OpenCOMPES / sed / 6498082478

12 Oct 2023 04:01PM UTC coverage: 99.663% (+9.1%) from 90.587%
6498082478

Pull #151

github

web-flow
Merge 103c9a6f3 into ec5bccda4
Pull Request #151: Documentation PR

4734 of 4750 relevant lines covered (99.66%)

1.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.52
/sed/loader/base/loader.py
1
"""The abstract class off of which to implement loaders."""
2
import os
3
from abc import ABC
4
from abc import abstractmethod
5
from copy import deepcopy
6
from typing import Any
7
from typing import Dict
8
from typing import List
9
from typing import Sequence
10
from typing import Tuple
11
from typing import Union
12

13
import dask.dataframe as ddf
14
import numpy as np
15

16
from sed.loader.utils import gather_files
17

18

19
class BaseLoader(ABC):
20
    """
21
    The abstract class off of which to implement loaders.
×
22

23
    The reader's folder name is the identifier.
×
24
    For this BaseLoader with filename base/loader.py the ID  becomes 'base'
×
25

26
    Args:
×
27
        config (dict, optional): Config dictionary. Defaults to None.
×
28
        meta_handler (MetaHandler, optional): MetaHandler object. Defaults to None.
×
29
    """
×
30

31
    # pylint: disable=too-few-public-methods
32

33
    __name__ = "BaseLoader"
3✔
34

35
    supported_file_types: List[str] = []
3✔
36

37
    def __init__(
3✔
38
        self,
39
        config: dict = None,
3✔
40
    ):
41
        self._config = config if config is not None else {}
3✔
42

43
        self.files: List[str] = []
3✔
44
        self.runs: List[str] = []
3✔
45
        self.metadata: Dict[Any, Any] = {}
3✔
46

47
    @abstractmethod
3✔
48
    def read_dataframe(
3✔
49
        self,
50
        files: Union[str, Sequence[str]] = None,
3✔
51
        folders: Union[str, Sequence[str]] = None,
3✔
52
        runs: Union[str, Sequence[str]] = None,
3✔
53
        ftype: str = None,
3✔
54
        metadata: dict = None,
3✔
55
        collect_metadata: bool = False,
3✔
56
        **kwds,
57
    ) -> Tuple[ddf.DataFrame, dict]:
3✔
58
        """Reads data from given files, folder, or runs and returns a dask dataframe
59
        and corresponding metadata.
60

61
        Args:
62
            files (Union[str, Sequence[str]], optional): File path(s) to process.
63
                Defaults to None.
64
            folders (Union[str, Sequence[str]], optional): Path to folder(s) where files
65
                are stored. Path has priority such that if it's specified, the specified
66
                files will be ignored. Defaults to None.
67
            runs (Union[str, Sequence[str]], optional): Run identifier(s). Corresponding
68
                files will be located in the location provided by ``folders``. Takes
69
                precendence over ``files`` and ``folders``. Defaults to None.
70
            ftype (str, optional): File type to read ('parquet', 'json', 'csv', etc).
71
                If a folder path is given, all files with the specified extension are
72
                read into the dataframe in the reading order. Defaults to None.
73
            metadata (dict, optional): Manual metadata dictionary. Auto-generated
74
                metadata will be added to it. Defaults to None.
75
            collect_metadata (bool): Option to collect metadata from files. Requires
76
                a valid config dict. Defaults to False.
77
            **kwds: keyword arguments. See description in respective loader.
78

79
        Returns:
80
            Tuple[ddf.DataFrame, dict]: Dask dataframe and metadata read from
81
            specified files.
82
        """
83

84
        if metadata is None:
3✔
85
            metadata = {}
3✔
86

87
        if runs is not None:
3✔
88
            if isinstance(runs, (str, int)):
89
                runs = [runs]
90
            self.runs = list(runs)
91
            files = []
92
            for run in runs:
93
                files.extend(self.get_files_from_run_id(run, folders, **kwds))
94

95
        elif folders is not None:
3✔
96
            if isinstance(folders, str):
3✔
97
                folders = [folders]
3✔
98
            files = []
3✔
99
            for folder in folders:
3✔
100
                folder = os.path.realpath(folder)
3✔
101
                files.extend(
3✔
102
                    gather_files(
3✔
103
                        folder=folder,
3✔
104
                        extension=ftype,
3✔
105
                        file_sorting=True,
3✔
106
                        **kwds,
3✔
107
                    ),
108
                )
109

110
        elif files is None:
3✔
111
            raise ValueError(
112
                "Either folder, file paths, or runs should be provided!",
113
            )
114

115
        if files is not None:
3✔
116
            if isinstance(files, str):
3✔
117
                files = [files]
3✔
118
            files = [os.path.realpath(file) for file in files]
3✔
119
            self.files = files
3✔
120

121
        self.metadata = deepcopy(metadata)
3✔
122

123
        if not files:
3✔
124
            raise FileNotFoundError("No valid files or runs found!")
125

126
        return None, None
3✔
127

128
    @abstractmethod
3✔
129
    def get_files_from_run_id(
3✔
130
        self,
131
        run_id: str,
3✔
132
        folders: Union[str, Sequence[str]] = None,
3✔
133
        extension: str = None,
3✔
134
        **kwds,
135
    ) -> List[str]:
3✔
136
        """Locate the files for a given run identifier.
137

138
        Args:
139
            run_id (str): The run identifier to locate.
140
            folders (Union[str, Sequence[str]], optional): The directory(ies) where the raw
141
                data is located. Defaults to None.
142
            extension (str, optional): The file extension. Defaults to None.
143
            kwds: Keyword arguments
144

145
        Return:
146
            List[str]: List of files for the given run.
147
        """
148
        raise NotImplementedError
149

150
    @abstractmethod
3✔
151
    def get_count_rate(
3✔
152
        self,
153
        fids: Sequence[int] = None,
3✔
154
        **kwds,
155
    ) -> Tuple[np.ndarray, np.ndarray]:
3✔
156
        """Create count rate data for the files specified in ``fids``.
157

158
        Args:
159
            fids (Sequence[int], optional): fids (Sequence[int]): the file ids to
160
                include. Defaults to list of all file ids.
161
            kwds: Keyword arguments
162

163
        Return:
164
            Tuple[np.ndarray, np.ndarray]: Arrays containing countrate and seconds
165
            into the scan.
166
        """
167
        return None, None
168

169
    @abstractmethod
3✔
170
    def get_elapsed_time(self, fids: Sequence[int] = None, **kwds) -> float:
3✔
171
        """Return the elapsed time in the specified in ``fids``.
172

173
        Args:
174
            fids (Sequence[int], optional): fids (Sequence[int]): the file ids to
175
                include. Defaults to list of all file ids.
176
            kwds: Keyword arguments
177

178
        Return:
179
            float: The elapsed time in the files in seconds.
180
        """
181
        return None
182

183

184
LOADER = BaseLoader
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc