• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OpenCOMPES / sed / 12737093410

12 Jan 2025 09:08PM UTC coverage: 92.047% (+0.2%) from 91.801%
12737093410

Pull #437

github

web-flow
Merge pull request #542 from OpenCOMPES/more-broken-file-fixes

add further exceptions for completely empty files, and exceptions
Pull Request #437: Upgrade to V1

2103 of 2238 new or added lines in 53 files covered. (93.97%)

4 existing lines in 1 file now uncovered.

7581 of 8236 relevant lines covered (92.05%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.25
/src/sed/loader/base/loader.py
1
"""The abstract class off of which to implement loaders.
2
"""
3
from __future__ import annotations
1✔
4

5
import os
1✔
6
from abc import ABC
1✔
7
from abc import abstractmethod
1✔
8
from collections.abc import Sequence
1✔
9
from copy import deepcopy
1✔
10
from typing import Any
1✔
11

12
import dask.dataframe as ddf
1✔
13
import numpy as np
1✔
14

15
from sed.loader.utils import gather_files
1✔
16

17

18
class BaseLoader(ABC):
1✔
19
    """
20
    The abstract class off of which to implement loaders.
21

22
    The reader's folder name is the identifier.
23
    For this BaseLoader with filename base/loader.py the ID  becomes 'base'
24

25
    Args:
26
        config (dict, optional): Config dictionary. Defaults to None.
27
        verbose (bool, optional): Option to print out diagnostic information.
28
            Defaults to True.
29
    """
30

31
    __name__ = "BaseLoader"
1✔
32

33
    supported_file_types: list[str] = []
1✔
34

35
    def __init__(
1✔
36
        self,
37
        config: dict = None,
38
        verbose: bool = True,
39
    ):
40
        self._config = config if config is not None else {}
1✔
41

42
        self.files: list[str] = []
1✔
43
        self.runs: list[str] = []
1✔
44
        self.metadata: dict[Any, Any] = {}
1✔
45
        self._verbose = verbose
1✔
46

47
    @property
1✔
48
    def verbose(self) -> bool:
1✔
49
        """Accessor to the verbosity flag.
50

51
        Returns:
52
            bool: Verbosity flag.
53
        """
NEW
54
        return self._verbose
×
55

56
    @verbose.setter
1✔
57
    def verbose(self, verbose: bool):
1✔
58
        """Setter for the verbosity.
59

60
        Args:
61
            verbose (bool): Option to turn on verbose output. Sets loglevel to INFO.
62
        """
NEW
63
        self._verbose = verbose
×
64

65
    @abstractmethod
1✔
66
    def read_dataframe(
1✔
67
        self,
68
        files: str | Sequence[str] = None,
69
        folders: str | Sequence[str] = None,
70
        runs: str | Sequence[str] = None,
71
        ftype: str = None,
72
        metadata: dict = None,
73
        collect_metadata: bool = False,
74
        **kwds,
75
    ) -> tuple[ddf.DataFrame, ddf.DataFrame, dict]:
76
        """Reads data from given files, folder, or runs and returns a dask dataframe
77
        and corresponding metadata.
78

79
        Args:
80
            files (str | Sequence[str], optional): File path(s) to process.
81
                Defaults to None.
82
            folders (str | Sequence[str], optional): Path to folder(s) where files
83
                are stored. Path has priority such that if it's specified, the specified
84
                files will be ignored. Defaults to None.
85
            runs (str | Sequence[str], optional): Run identifier(s). Corresponding
86
                files will be located in the location provided by ``folders``. Takes
87
                precedence over ``files`` and ``folders``. Defaults to None.
88
            ftype (str, optional): File type to read ('parquet', 'json', 'csv', etc).
89
                If a folder path is given, all files with the specified extension are
90
                read into the dataframe in the reading order. Defaults to None.
91
            metadata (dict, optional): Manual metadata dictionary. Auto-generated
92
                metadata will be added to it. Defaults to None.
93
            collect_metadata (bool): Option to collect metadata from files. Requires
94
                a valid config dict. Defaults to False.
95
            **kwds: keyword arguments. See description in respective loader.
96

97
        Returns:
98
            tuple[ddf.DataFrame, ddf.DataFrame, dict]: Dask dataframe, timed dataframe and metadata
99
            read from specified files.
100
        """
101

102
        if metadata is None:
1✔
103
            metadata = {}
1✔
104

105
        if runs is not None:
1✔
106
            if isinstance(runs, (str, int)):
×
107
                runs = [runs]
×
108
            self.runs = list(runs)
×
109
            files = []
×
110
            for run in runs:
×
111
                files.extend(self.get_files_from_run_id(run, folders, **kwds))
×
112

113
        elif folders is not None:
1✔
114
            if isinstance(folders, str):
1✔
115
                folders = [folders]
1✔
116
            files = []
1✔
117
            for folder in folders:
1✔
118
                folder = os.path.realpath(folder)
1✔
119
                files.extend(
1✔
120
                    gather_files(
121
                        folder=folder,
122
                        extension=ftype,
123
                        file_sorting=True,
124
                        **kwds,
125
                    ),
126
                )
127

128
        elif files is None:
1✔
129
            raise ValueError(
1✔
130
                "Either folders, files, or runs have to be provided!",
131
            )
132

133
        if files is not None:
1✔
134
            if isinstance(files, str):
1✔
135
                files = [files]
1✔
136
            files = [os.path.realpath(file) for file in files]
1✔
137
            self.files = files
1✔
138

139
        self.metadata = deepcopy(metadata)
1✔
140

141
        if not files:
1✔
142
            raise FileNotFoundError("No valid files or runs found!")
×
143

144
        return None, None, None
1✔
145

146
    @abstractmethod
1✔
147
    def get_files_from_run_id(
1✔
148
        self,
149
        run_id: str,
150
        folders: str | Sequence[str] = None,
151
        extension: str = None,
152
        **kwds,
153
    ) -> list[str]:
154
        """Locate the files for a given run identifier.
155

156
        Args:
157
            run_id (str): The run identifier to locate.
158
            folders (str | Sequence[str], optional): The directory(ies) where the raw
159
                data is located. Defaults to None.
160
            extension (str, optional): The file extension. Defaults to None.
161
            kwds: Keyword arguments
162

163
        Return:
164
            list[str]: List of files for the given run.
165
        """
166
        raise NotImplementedError
×
167

168
    @abstractmethod
1✔
169
    def get_count_rate(
1✔
170
        self,
171
        fids: Sequence[int] = None,
172
        **kwds,
173
    ) -> tuple[np.ndarray, np.ndarray]:
174
        """Create count rate data for the files specified in ``fids``.
175

176
        Args:
177
            fids (Sequence[int], optional): fids (Sequence[int]): the file ids to
178
                include. Defaults to list of all file ids.
179
            kwds: Keyword arguments
180

181
        Return:
182
            tuple[np.ndarray, np.ndarray]: Arrays containing countrate and seconds
183
            into the scan.
184
        """
185
        return None, None
×
186

187
    @abstractmethod
1✔
188
    def get_elapsed_time(self, fids: Sequence[int] = None, **kwds) -> float:
1✔
189
        """Return the elapsed time in the specified in ``fids``.
190

191
        Args:
192
            fids (Sequence[int], optional): fids (Sequence[int]): the file ids to
193
                include. Defaults to list of all file ids.
194
            kwds: Keyword arguments
195

196
        Return:
197
            float: The elapsed time in the files in seconds.
198
        """
199
        return None
×
200

201

202
LOADER = BaseLoader
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc