• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

p2p-ld / numpydantic / 9232062240

25 May 2024 01:57AM UTC coverage: 99.718%. Remained the same
9232062240

push

github

sneakers-the-rat
v1.1.0 bump version, add changelog

708 of 710 relevant lines covered (99.72%)

2.97 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

100.0
/src/numpydantic/interface/hdf5.py
1
"""
2
Interfaces for HDF5 Datasets
3
"""
4

5
import sys
3✔
6
from pathlib import Path
3✔
7
from typing import Any, NamedTuple, Optional, Tuple, Union
3✔
8

9
import numpy as np
3✔
10
from pydantic import SerializationInfo
3✔
11

12
from numpydantic.interface.interface import Interface
3✔
13
from numpydantic.types import NDArrayType
3✔
14

15
try:
3✔
16
    import h5py
3✔
17
except ImportError:  # pragma: no cover
18
    h5py = None
19

20
if sys.version_info.minor >= 10:
3✔
21
    from typing import TypeAlias
2✔
22
else:
23
    from typing_extensions import TypeAlias
1✔
24

25
H5Arraylike: TypeAlias = Tuple[Union[Path, str], str]
3✔
26

27

28
class H5ArrayPath(NamedTuple):
3✔
29
    """Location specifier for arrays within an HDF5 file"""
30

31
    file: Union[Path, str]
3✔
32
    """Location of HDF5 file"""
2✔
33
    path: str
3✔
34
    """Path within the HDF5 file"""
3✔
35

36

37
class H5Proxy:
3✔
38
    """
39
    Proxy class to mimic numpy-like array behavior with an HDF5 array
40

41
    The attribute and item access methods only open the file for the duration of the
42
    method, making it less perilous to share this object between threads and processes.
43

44
    This class attempts to be a passthrough class to a :class:`h5py.Dataset` object,
45
    including its attributes and item getters/setters.
46

47
    When using read-only methods, no locking is attempted (beyond the HDF5 defaults),
48
    but when using the write methods (setting an array value), try and use the
49
    ``locking`` methods of :class:`h5py.File` .
50

51
    Args:
52
        file (pathlib.Path | str): Location of hdf5 file on filesystem
53
        path (str): Path to array within hdf5 file
54
    """
55

56
    def __init__(self, file: Union[Path, str], path: str):
3✔
57
        self._h5f = None
3✔
58
        self.file = Path(file)
3✔
59
        self.path = path
3✔
60

61
    def array_exists(self) -> bool:
3✔
62
        """Check that there is in fact an array at :attr:`.path` within :attr:`.file`"""
63
        with h5py.File(self.file, "r") as h5f:
3✔
64
            obj = h5f.get(self.path)
3✔
65
            return obj is not None
3✔
66

67
    @classmethod
3✔
68
    def from_h5array(cls, h5array: H5ArrayPath) -> "H5Proxy":
3✔
69
        """Instantiate using :class:`.H5ArrayPath`"""
70
        return H5Proxy(file=h5array.file, path=h5array.path)
3✔
71

72
    def __getattr__(self, item: str):
3✔
73
        with h5py.File(self.file, "r") as h5f:
3✔
74
            obj = h5f.get(self.path)
3✔
75
            return getattr(obj, item)
3✔
76

77
    def __getitem__(self, item: Union[int, slice]) -> np.ndarray:
3✔
78
        with h5py.File(self.file, "r") as h5f:
3✔
79
            obj = h5f.get(self.path)
3✔
80
            return obj[item]
3✔
81

82
    def __setitem__(self, key: Union[int, slice], value: Union[int, float, np.ndarray]):
3✔
83
        with h5py.File(self.file, "r+", locking=True) as h5f:
3✔
84
            obj = h5f.get(self.path)
3✔
85
            obj[key] = value
3✔
86

87
    def open(self, mode: str = "r") -> "h5py.Dataset":
3✔
88
        """
89
        Return the opened :class:`h5py.Dataset` object
90

91
        You must remember to close the associated file with :meth:`.close`
92
        """
93
        if self._h5f is None:
3✔
94
            self._h5f = h5py.File(self.file, mode)
3✔
95
        return self._h5f.get(self.path)
3✔
96

97
    def close(self) -> None:
3✔
98
        """
99
        Close the :class:`h5py.File` object left open when returning the dataset with
100
        :meth:`.open`
101
        """
102
        if self._h5f is not None:
3✔
103
            self._h5f.close()
3✔
104
        self._h5f = None
3✔
105

106

107
class H5Interface(Interface):
3✔
108
    """
109
    Interface for Arrays stored as datasets within an HDF5 file.
110

111
    Takes a :class:`.H5ArrayPath` specifier to select a :class:`h5py.Dataset` from a
112
    :class:`h5py.File` and returns a :class:`.H5Proxy` class that acts like a
113
    passthrough numpy-like interface to the dataset.
114
    """
115

116
    input_types = (
3✔
117
        H5ArrayPath,
118
        H5Arraylike,
119
    )
120
    return_type = H5Proxy
3✔
121

122
    @classmethod
3✔
123
    def enabled(cls) -> bool:
3✔
124
        """Check whether h5py can be imported"""
125
        return h5py is not None
3✔
126

127
    @classmethod
3✔
128
    def check(cls, array: Union[H5ArrayPath, Tuple[Union[Path, str], str]]) -> bool:
3✔
129
        """
130
        Check that the given array is a :class:`.H5ArrayPath` or something that
131
        resembles one.
132
        """
133
        if isinstance(array, H5ArrayPath):
3✔
134
            return True
3✔
135

136
        if isinstance(array, (tuple, list)) and len(array) == 2:
3✔
137
            # check that the first arg is an hdf5 file
138
            try:
3✔
139
                file = Path(array[0])
3✔
140
            except TypeError:
3✔
141
                # not a path, we don't apply.
142
                return False
3✔
143

144
            if not file.exists():
3✔
145
                return False
3✔
146

147
            # hdf5 files are commonly given odd suffixes,
148
            # so we just try and open it and see what happens
149
            try:
3✔
150
                with h5py.File(file, "r"):
3✔
151
                    # don't check that the array exists and raise here,
152
                    # this check is just for whether the validator applies or not.
153
                    pass
3✔
154
                return True
3✔
155
            except (FileNotFoundError, OSError):
3✔
156
                return False
3✔
157

158
        return False
3✔
159

160
    def before_validation(self, array: Any) -> NDArrayType:
3✔
161
        """Create an :class:`.H5Proxy` to use throughout validation"""
162
        if isinstance(array, H5ArrayPath):
3✔
163
            array = H5Proxy.from_h5array(h5array=array)
3✔
164
        elif isinstance(array, (tuple, list)) and len(array) == 2:  # pragma: no cover
165
            array = H5Proxy(file=array[0], path=array[1])
166
        else:  # pragma: no cover
167
            # this should never happen really since `check` confirms this before
168
            # we'd reach here, but just to complete the if else...
169
            raise ValueError(
170
                "Need to specify a file and a path within an HDF5 file to use the HDF5 "
171
                "Interface"
172
            )
173

174
        if not array.array_exists():
3✔
175
            raise ValueError(
3✔
176
                f"HDF5 file located at {array.file}, "
177
                f"but no array found at {array.path}"
178
            )
179

180
        return array
3✔
181

182
    @classmethod
3✔
183
    def to_json(cls, array: H5Proxy, info: Optional[SerializationInfo] = None) -> dict:
3✔
184
        """
185
        Dump to a dictionary containing
186

187
        * ``file``: :attr:`.file`
188
        * ``path``: :attr:`.path`
189
        * ``attrs``: Any HDF5 attributes on the dataset
190
        * ``array``: The array as a list of lists
191
        """
192
        try:
3✔
193
            dset = array.open()
3✔
194
            meta = {
3✔
195
                "file": array.file,
196
                "path": array.path,
197
                "attrs": dict(dset.attrs),
198
                "array": dset[:].tolist(),
199
            }
200
            return meta
3✔
201
        finally:
202
            array.close()
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc