• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

geo-engine / geoengine-python / 21037270339

15 Jan 2026 03:47PM UTC coverage: 78.884% (-0.7%) from 79.615%
21037270339

Pull #221

github

web-flow
Merge d5dad0f2b into eedc483f5
Pull Request #221: Pixel_based_queries_rewrite

3097 of 3926 relevant lines covered (78.88%)

0.79 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

67.95
geoengine/raster.py
1
"""Raster data types"""
2

3
from __future__ import annotations
1✔
4

5
from collections.abc import AsyncIterator
1✔
6
from typing import cast
1✔
7

8
import geoengine_openapi_client
1✔
9
import numpy as np
1✔
10
import pyarrow as pa
1✔
11
import xarray as xr
1✔
12

13
import geoengine.types as gety
1✔
14
from geoengine.util import clamp_datetime_ms_ns
1✔
15

16

17
class RasterTile2D:
1✔
18
    """A 2D raster tile as produced by the Geo Engine"""
19

20
    size_x: int
1✔
21
    size_y: int
1✔
22
    data: pa.Array
1✔
23
    geo_transform: gety.GeoTransform
1✔
24
    crs: str
1✔
25
    time: gety.TimeInterval
1✔
26
    band: int
1✔
27

28
    # pylint: disable=too-many-arguments,too-many-positional-arguments
29
    def __init__(
1✔
30
        self,
31
        shape: tuple[int, int],
32
        data: pa.Array,
33
        geo_transform: gety.GeoTransform,
34
        crs: str,
35
        time: gety.TimeInterval,
36
        band: int,
37
    ):
38
        """Create a RasterTile2D object"""
39
        self.size_y, self.size_x = shape
1✔
40
        self.data = data
1✔
41
        self.geo_transform = geo_transform
1✔
42
        self.crs = crs
1✔
43
        self.time = time
1✔
44
        self.band = band
1✔
45

46
    @property
1✔
47
    def shape(self) -> tuple[int, int]:
1✔
48
        """Return the shape of the raster tile in numpy order (y_size, x_size)"""
49
        return (self.size_y, self.size_x)
1✔
50

51
    @property
1✔
52
    def data_type(self) -> pa.DataType:
1✔
53
        """Return the arrow data type of the raster tile"""
54
        return self.data.type
1✔
55

56
    @property
1✔
57
    def numpy_data_type(self) -> np.dtype:
1✔
58
        """Return the numpy dtype of the raster tile"""
59
        return self.data_type.to_pandas_dtype()
1✔
60

61
    @property
1✔
62
    def has_null_values(self) -> bool:
1✔
63
        """Return whether the raster tile has null values"""
64
        return self.data.null_count > 0
1✔
65

66
    @property
1✔
67
    def time_start_ms(self) -> np.datetime64:
1✔
68
        return self.time.start.astype("datetime64[ms]")
1✔
69

70
    @property
1✔
71
    def time_end_ms(self) -> np.datetime64 | None:
1✔
72
        return None if self.time.end is None else self.time.end.astype("datetime64[ms]")
×
73

74
    @property
1✔
75
    def pixel_size(self) -> tuple[float, float]:
1✔
76
        return (self.geo_transform.x_pixel_size, self.geo_transform.y_pixel_size)
1✔
77

78
    def to_numpy_data_array(self, fill_null_value=0) -> np.ndarray:
1✔
79
        """
80
        Return the raster tile as a numpy array.
81
        Caution: this will not mask nodata values but replace them with the provided value !
82
        """
83
        nulled_array = self.data.fill_null(fill_null_value)
1✔
84
        return nulled_array.to_numpy(
1✔
85
            zero_copy_only=True,  # data was already copied when creating the "null filled" array
86
        ).reshape(self.shape)
87

88
    def to_numpy_mask_array(self, nan_is_null=False) -> np.ndarray | None:
1✔
89
        """
90
        Return the raster tiles mask as a numpy array.
91
        True means no data, False means data.
92
        If the raster tile has no null values, None is returned.
93
        It is possible to specify whether NaN values should be considered as no data when creating the mask.
94
        """
95
        numpy_mask = None
1✔
96
        if self.has_null_values:
1✔
97
            numpy_mask = (
1✔
98
                self.data.is_null(
99
                    nan_is_null=nan_is_null  # nan is not no data
100
                )
101
                .to_numpy(
102
                    zero_copy_only=False  # cannot zero-copy with bools
103
                )
104
                .reshape(self.shape)
105
            )
106
        return numpy_mask
1✔
107

108
    def to_numpy_masked_array(self, nan_is_null=False) -> np.ma.MaskedArray:
1✔
109
        """Return the raster tile as a masked numpy array"""
110
        numpy_data = self.to_numpy_data_array()
1✔
111
        maybe_numpy_mask = self.to_numpy_mask_array(nan_is_null=nan_is_null)
1✔
112

113
        assert maybe_numpy_mask is None or maybe_numpy_mask.shape == numpy_data.shape
1✔
114

115
        numpy_mask: np.ndarray | np.ma.MaskType = np.ma.nomask if maybe_numpy_mask is None else maybe_numpy_mask
1✔
116

117
        numpy_masked_data: np.ma.MaskedArray = np.ma.masked_array(numpy_data, mask=numpy_mask)
1✔
118

119
        return numpy_masked_data
1✔
120

121
    def coords_x(self, pixel_center=False) -> np.ndarray:
1✔
122
        """
123
        Return the x coordinates of the raster tile
124
        If pixel_center is True, the coordinates will be the center of the pixels.
125
        Otherwise they will be the upper left edges.
126
        """
127
        start = self.geo_transform.x_min
1✔
128

129
        if pixel_center:
1✔
130
            start += self.geo_transform.x_half_pixel_size
1✔
131

132
        return np.arange(
1✔
133
            start,
134
            stop=self.geo_transform.pixel_x_to_coord_x(self.size_x),
135
            step=self.geo_transform.x_pixel_size,
136
        )
137

138
    def coords_y(self, pixel_center=False) -> np.ndarray:
1✔
139
        """
140
        Return the y coordinates of the raster tile
141
        If pixel_center is True, the coordinates will be the center of the pixels.
142
        Otherwise they will be the upper left edges.
143
        """
144
        start = self.geo_transform.y_max
1✔
145

146
        if pixel_center:
1✔
147
            start += self.geo_transform.y_half_pixel_size
1✔
148

149
        return np.arange(
1✔
150
            start,
151
            stop=self.geo_transform.pixel_y_to_coord_y(self.size_y),
152
            step=self.geo_transform.y_pixel_size,
153
        )
154

155
    def to_xarray(self, clip_with_bounds: gety.SpatialBounds | None = None) -> xr.DataArray:
1✔
156
        """
157
        Return the raster tile as an xarray.DataArray.
158

159
        Note:
160
            - Xarray does not support masked arrays.
161
                - Masked pixels are converted to NaNs and the nodata value is set to NaN as well.
162
            - Xarray uses numpy's datetime64[ns] which only covers the years from 1678 to 2262.
163
                - Date times that are outside of the defined range are clipped to the limits of the range.
164
        """
165

166
        # clamp the dates to the min and max range
167
        clamped_date = clamp_datetime_ms_ns(self.time_start_ms)
1✔
168

169
        array = xr.DataArray(
1✔
170
            self.to_numpy_masked_array(),
171
            dims=["y", "x"],
172
            coords={
173
                "x": self.coords_x(pixel_center=True),
174
                "y": self.coords_y(pixel_center=True),
175
                "time": clamped_date,  # TODO: incorporate time end?
176
                "band": self.band,
177
            },
178
        )
179
        array.rio.write_crs(self.crs, inplace=True)
1✔
180

181
        if clip_with_bounds is not None:
1✔
182
            array = array.rio.clip_box(*clip_with_bounds.as_bbox_tuple(), auto_expand=True)
×
183
            array = cast(xr.DataArray, array)
×
184

185
        return array
1✔
186

187
    def spatial_partition(self) -> gety.SpatialPartition2D:
1✔
188
        """Return the spatial partition of the raster tile"""
189
        return gety.SpatialPartition2D(
×
190
            self.geo_transform.x_min,
191
            self.geo_transform.pixel_y_to_coord_y(self.size_y),
192
            self.geo_transform.pixel_x_to_coord_x(self.size_x),
193
            self.geo_transform.y_max,
194
        )
195

196
    def spatial_resolution(self) -> gety.SpatialResolution:
1✔
197
        return self.geo_transform.spatial_resolution()
×
198

199
    def is_empty(self) -> bool:
1✔
200
        """Returns true if the tile is empty"""
201
        num_pixels = self.size_x * self.size_y
×
202
        num_nulls = self.data.null_count
×
203
        return num_pixels == num_nulls
×
204

205
    @staticmethod
1✔
206
    def from_ge_record_batch(record_batch: pa.RecordBatch) -> RasterTile2D:
1✔
207
        """Create a RasterTile2D from an Arrow record batch recieved from the Geo Engine"""
208
        metadata = record_batch.schema.metadata
1✔
209
        inner = geoengine_openapi_client.GeoTransform.from_json(metadata[b"geoTransform"])
1✔
210
        assert inner is not None, "Failed to parse geoTransform"
1✔
211
        geo_transform = gety.GeoTransform.from_response(inner)
1✔
212
        x_size = int(metadata[b"xSize"])
1✔
213
        y_size = int(metadata[b"ySize"])
1✔
214
        spatial_reference = metadata[b"spatialReference"].decode("utf-8")
1✔
215
        # We know from the backend that there is only one array a.k.a. one column
216
        arrow_array = record_batch.column(0)
1✔
217

218
        inner_time = geoengine_openapi_client.TimeInterval.from_json(metadata[b"time"])
1✔
219
        assert inner_time is not None, "Failed to parse time"
1✔
220
        time = gety.TimeInterval.from_response(inner_time)
1✔
221

222
        band = int(metadata[b"band"])
1✔
223

224
        return RasterTile2D(
1✔
225
            (y_size, x_size),
226
            arrow_array,
227
            geo_transform,
228
            spatial_reference,
229
            time,
230
            band,
231
        )
232

233

234
class RasterTileStack2D:
1✔
235
    """A stack of all the bands of a raster tile as produced by the Geo Engine"""
236

237
    size_y: int
1✔
238
    size_x: int
1✔
239
    geo_transform: gety.GeoTransform
1✔
240
    crs: str
1✔
241
    time: gety.TimeInterval
1✔
242
    data: list[pa.Array]
1✔
243
    bands: list[int]
1✔
244

245
    # pylint: disable=too-many-arguments,too-many-positional-arguments
246
    def __init__(
1✔
247
        self,
248
        tile_shape: tuple[int, int],
249
        data: list[pa.Array],
250
        geo_transform: gety.GeoTransform,
251
        crs: str,
252
        time: gety.TimeInterval,
253
        bands: list[int],
254
    ):
255
        """Create a RasterTileStack2D object"""
256
        (self.size_y, self.size_x) = tile_shape
×
257
        self.data = data
×
258
        self.geo_transform = geo_transform
×
259
        self.crs = crs
×
260
        self.time = time
×
261
        self.bands = bands
×
262

263
    def single_band(self, index: int) -> RasterTile2D:
1✔
264
        """Return a single band from the stack"""
265
        return RasterTile2D(
×
266
            (self.size_y, self.size_x),
267
            self.data[index],
268
            self.geo_transform,
269
            self.crs,
270
            self.time,
271
            self.bands[index],
272
        )
273

274
    def to_numpy_masked_array_stack(self) -> np.ma.MaskedArray:
1✔
275
        """Return the raster stack as a 3D masked numpy array"""
276
        arrays = [self.single_band(i).to_numpy_masked_array() for i in range(0, len(self.data))]
×
277
        stack = np.ma.stack(arrays, axis=0)
×
278
        return stack
×
279

280
    def to_xarray(self, clip_with_bounds: gety.SpatialBounds | None = None) -> xr.DataArray:
1✔
281
        """Return the raster stack as an xarray.DataArray"""
282
        arrays = [self.single_band(i).to_xarray(clip_with_bounds) for i in range(0, len(self.data))]
×
283
        stack = xr.concat(arrays, dim="band")
×
284
        return stack
×
285

286

287
async def tile_stream_to_stack_stream(raster_stream: AsyncIterator[RasterTile2D]) -> AsyncIterator[RasterTileStack2D]:
1✔
288
    """Convert a stream of raster tiles to stream of stacked tiles"""
289
    store: list[RasterTile2D] = []
×
290
    first_band: int = -1
×
291

292
    async for tile in raster_stream:
×
293
        if len(store) == 0:
×
294
            first_band = tile.band
×
295
            store.append(tile)
×
296

297
        else:
298
            # check things that should be the same for all tiles
299
            assert tile.shape == store[0].shape, "Tile shapes do not match"
×
300
            # TODO: geo transform should be the same for all tiles
301
            #       tiles should have a tile position or global pixel position
302

303
            # assert tile.geo_transform == store[0].geo_transform, 'Tile geo_transforms do not match'
304
            assert tile.crs == store[0].crs, "Tile crs do not match"
×
305

306
            if tile.band == first_band:
×
307
                assert tile.time.start >= store[0].time.start, "Tile time intervals must be equal or increasing"
×
308

309
                stack = [tile.data for tile in store]
×
310
                tile_shape = store[0].shape
×
311
                bands = [tile.band for tile in store]
×
312
                geo_transforms = store[0].geo_transform
×
313
                crs = store[0].crs
×
314
                time = store[0].time
×
315

316
                store = [tile]
×
317
                yield RasterTileStack2D(tile_shape, stack, geo_transforms, crs, time, bands)
×
318

319
            else:
320
                assert tile.time == store[0].time, "Time missmatch. " + str(store[0].time) + " != " + str(tile.time)
×
321
                store.append(tile)
×
322

323
    if len(store) > 0:
×
324
        tile_shape = store[0].shape
×
325
        stack = [tile.data for tile in store]
×
326
        bands = [tile.band for tile in store]
×
327
        geo_transforms = store[0].geo_transform
×
328
        crs = store[0].crs
×
329
        time = store[0].time
×
330

331
        store = []
×
332

333
        yield RasterTileStack2D(tile_shape, stack, geo_transforms, crs, time, bands)
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc