• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ltelab / disdrodb / 15485160797

06 Jun 2025 07:24AM UTC coverage: 85.51%. First build
15485160797

push

github

web-flow
Add readers for JMA, DELFT, CSWR FARM and NASA PIERS  (#226)

61 of 81 new or added lines in 3 files covered. (75.31%)

5919 of 6922 relevant lines covered (85.51%)

0.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.0
/disdrodb/utils/xarray.py
1
#!/usr/bin/env python3
2

3
# -----------------------------------------------------------------------------.
4
# Copyright (c) 2021-2023 DISDRODB developers
5
#
6
# This program is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
# -----------------------------------------------------------------------------.
19
"""Xarray utilities."""
20
import numpy as np
1✔
21
import xarray as xr
1✔
22
from xarray.core import dtypes
1✔
23

24

25
def xr_get_last_valid_idx(da_condition, dim, fill_value=None):
1✔
26
    """
27
    Get the index of the last True value along a specified dimension in an xarray DataArray.
28

29
    This function finds the last index along the given dimension where the condition is True.
30
    If all values are False or NaN along that dimension, the function returns ``fill_value``.
31

32
    Parameters
33
    ----------
34
    da_condition : xarray.DataArray
35
        A boolean DataArray where True indicates valid or desired values.
36
        Should have the dimension specified in `dim`.
37
    dim : str
38
        The name of the dimension along which to find the last True index.
39
    fill_value : int or float
40
        The fill value when all values are False or NaN along the specified dimension.
41
        The default value is ``dim_size - 1``.
42

43
    Returns
44
    -------
45
    last_idx : xarray.DataArray
46
        An array containing the index of the last True value along the specified dimension.
47
        If all values are False or NaN, the corresponding entry in `last_idx` will be NaN.
48

49
    Notes
50
    -----
51
    The function works by reversing the DataArray along the specified dimension and using
52
    `argmax` to find the first True value in the reversed array. It then calculates the
53
    corresponding index in the original array. To handle cases where all values are False
54
    or NaN (and `argmax` would return 0), the function checks if there is any True value
55
    along the dimension and assigns NaN to `last_idx` where appropriate.
56

57
    Examples
58
    --------
59
    >>> import xarray as xr
60
    >>> da = xr.DataArray([[False, False, True], [False, False, False]], dims=["time", "my_dimension"])
61
    >>> last_idx = xr_get_last_valid_idx(da, "my_dimension")
62
    >>> print(last_idx)
63
    <xarray.DataArray (time: 2)>
64
    array([2., nan])
65
    Dimensions without coordinates: time
66

67
    In this example, for the first time step, the last True index is 2.
68
    For the second time step, all values are False, so the function returns NaN.
69

70
    """
71
    # Check input is a boolean array
72
    if not np.issubdtype(da_condition.dtype, np.bool_):
1✔
73
        raise ValueError("`da_condition` must be a boolean DataArray.")
1✔
74

75
    # Get the size of the 'dim' dimension
76
    dim_size = da_condition.sizes[dim]
1✔
77

78
    # Define default fillvalue
79
    if fill_value is None:
1✔
80
        fill_value = dim_size - 1
1✔
81

82
    # Reverse the mask along 'dim'
83
    da_condition_reversed = da_condition.isel({dim: slice(None, None, -1)})
1✔
84

85
    # Check if there is any True value along the dimension for each slice
86
    has_true = da_condition.any(dim=dim)
1✔
87

88
    # Find the first non-zero index in the reversed array
89
    last_idx_from_end = da_condition_reversed.argmax(dim=dim)
1✔
90

91
    # Calculate the last True index in the original array
92
    last_idx = xr.where(
1✔
93
        has_true,
94
        dim_size - last_idx_from_end - 1,
95
        fill_value,
96
    )
97
    return last_idx
1✔
98

99

100
####-------------------------------------------------------------------
101
#### Unstacking dimension
102

103

104
def _check_coord_handling(coord_handling):
1✔
NEW
105
    if coord_handling not in {"keep", "drop", "unstack"}:
×
NEW
106
        raise ValueError("coord_handling must be one of 'keep', 'drop', or 'unstack'.")
×
107

108

109
def _unstack_coordinates(xr_obj, dim, prefix, suffix):
1✔
110
    # Identify coordinates that share the target dimension
NEW
111
    coords_with_dim = _get_non_dimensional_coordinates(xr_obj, dim=dim)
×
NEW
112
    ds = xr.Dataset()
×
NEW
113
    for coord_name in coords_with_dim:
×
NEW
114
        coord_da = xr_obj[coord_name]
×
115
        # Split the coordinate DataArray along the target dimension, drop coordinate and merge
NEW
116
        split_ds = unstack_datarray_dimension(coord_da, coord_handling="drop", dim=dim, prefix=prefix, suffix=suffix)
×
NEW
117
        ds.update(split_ds)
×
NEW
118
    return ds
×
119

120

121
def _handle_unstack_non_dim_coords(ds, source_xr_obj, coord_handling, dim, prefix, suffix):
1✔
122
    # Deal with coordinates sharing the target dimension
123
    if coord_handling == "keep":
1✔
124
        return ds
1✔
NEW
125
    if coord_handling == "unstack":
×
NEW
126
        ds_coords = _unstack_coordinates(source_xr_obj, dim=dim, prefix=prefix, suffix=suffix)
×
NEW
127
        ds.update(ds_coords)
×
128
    # Remove non dimensional coordinates (unstack and drop coord_handling)
NEW
129
    ds = ds.drop_vars(_get_non_dimensional_coordinates(ds, dim=dim))
×
NEW
130
    return ds
×
131

132

133
def _get_non_dimensional_coordinates(xr_obj, dim):
1✔
NEW
134
    return [coord_name for coord_name, coord_da in xr_obj.coords.items() if dim in coord_da.dims and coord_name != dim]
×
135

136

137
def unstack_datarray_dimension(da, dim, coord_handling="keep", prefix="", suffix=""):
1✔
138
    """
139
    Split a DataArray along a specified dimension into a Dataset with separate prefixed and suffixed variables.
140

141
    Parameters
142
    ----------
143
    da : xarray.DataArray
144
        The DataArray to split.
145
    dim : str
146
        The dimension along which to split the DataArray.
147
    coord_handling : str, optional
148
        Option to handle coordinates sharing the target dimension.
149
        Choices are 'keep', 'drop', or 'unstack'. Defaults to 'keep'.
150
    prefix : str, optional
151
        String to prepend to each new variable name.
152
    suffix : str, optional
153
        String to append to each new variable name.
154

155
    Returns
156
    -------
157
    xarray.Dataset
158
        A Dataset with each variable split along the specified dimension.
159
        The Dataset variables are named  "{prefix}{name}{suffix}{dim_value}".
160
        Coordinates sharing the target dimension are handled based on `coord_handling`.
161
    """
162
    # Retrieve DataArray name
163
    name = da.name
1✔
164
    # Unstack variables
165
    ds = da.to_dataset(dim=dim)
1✔
166
    rename_dict = {dim_value: f"{prefix}{name}{suffix}{dim_value}" for dim_value in list(ds.data_vars)}
1✔
167
    ds = ds.rename_vars(rename_dict)
1✔
168
    # Deal with coordinates sharing the target dimension
169
    return _handle_unstack_non_dim_coords(
1✔
170
        ds=ds,
171
        source_xr_obj=da,
172
        coord_handling=coord_handling,
173
        dim=dim,
174
        prefix=prefix,
175
        suffix=suffix,
176
    )
177

178

179
####--------------------------------------------------------------------------
180
#### Fill Values Utilities
181

182

183
def define_dataarray_fill_value(da):
1✔
184
    """Define the fill value for a numerical xarray.DataArray."""
185
    if np.issubdtype(da.dtype, np.floating):
1✔
186
        return dtypes.NA
1✔
187
    if np.issubdtype(da.dtype, np.integer):
1✔
188
        if "_FillValue" in da.attrs:
1✔
189
            return da.attrs["_FillValue"]
1✔
190
        if "_FillValue" in da.encoding:
1✔
191
            return da.encoding["_FillValue"]
1✔
192
        return np.iinfo(da.dtype).max
1✔
193
    return None
1✔
194

195

196
def define_dataarray_fill_value_dictionary(da):
1✔
197
    """Define fill values for numerical variables and coordinates of a xarray.DataArray.
198

199
    Return a dict of fill values:
200
      - floating → NaN
201
      - integer → ds[var].attrs["_FillValue"] if present, else np.iinfo(dtype).max
202
    """
203
    fill_value_dict = {}
1✔
204
    # Add fill value of DataArray
205
    fill_value_array = define_dataarray_fill_value(da)
1✔
206
    if fill_value_array is not None:
1✔
207
        fill_value_dict[da.name] = fill_value_array
1✔
208
    # Add fill value of coordinates
209
    fill_value_dict.update(define_dataset_fill_value_dictionary(da.coords))
1✔
210
    # Return fill value dictionary
211
    return fill_value_dict
1✔
212

213

214
def define_dataset_fill_value_dictionary(ds):
1✔
215
    """Define fill values for numerical variables and coordinates of a xarray.Dataset.
216

217
    Return a dict of per-variable fill values:
218
      - floating --> NaN
219
      - integer --> ds[var].attrs["_FillValue"] if present, else the maximum allowed number.
220
    """
221
    fill_value_dict = {}
1✔
222
    # Retrieve fill values for numerical variables and coordinates
223
    for var in list(ds.variables):
1✔
224
        array_fill_value = define_dataarray_fill_value(ds[var])
1✔
225
        if array_fill_value is not None:
1✔
226
            fill_value_dict[var] = array_fill_value
1✔
227
    # Return fill value dictionary
228
    return fill_value_dict
1✔
229

230

231
def define_fill_value_dictionary(xr_obj):
1✔
232
    """Define fill values for numerical variables and coordinates of a xarray object.
233

234
    Return a dict of per-variable fill values:
235
      - floating --> NaN
236
      - integer --> ds[var].attrs["_FillValue"] if present, else the maximum allowed number.
237
    """
238
    if isinstance(xr_obj, xr.Dataset):
1✔
239
        return define_dataset_fill_value_dictionary(xr_obj)
1✔
240
    return define_dataarray_fill_value_dictionary(xr_obj)
1✔
241

242

243
####-----------------------------------------------------------------------------------
244
#### Diameter and Velocity Coordinates
245

246

247
def remove_diameter_coordinates(xr_obj):
1✔
248
    """Drop diameter coordinates from xarray object."""
249
    from disdrodb import DIAMETER_COORDS
1✔
250

251
    return xr_obj.drop_vars(DIAMETER_COORDS, errors="ignore")
1✔
252

253

254
def remove_velocity_coordinates(xr_obj):
1✔
255
    """Drop velocity coordinates from xarray object."""
256
    from disdrodb import VELOCITY_COORDS
1✔
257

258
    return xr_obj.drop_vars(VELOCITY_COORDS, errors="ignore")
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc