• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Ouranosinc / xclim / 12954486692

24 Jan 2025 05:26PM UTC coverage: 89.967% (+0.03%) from 89.934%
12954486692

Pull #2054

github

web-flow
Merge 96d93d316 into 210ad9567
Pull Request #2054: Support Python3.13

35 of 44 new or added lines in 9 files covered. (79.55%)

55 existing lines in 3 files now uncovered.

9523 of 10585 relevant lines covered (89.97%)

7.62 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.57
/src/xclim/core/dataflags.py
1
"""
2
Data Flags
3
==========
4

5
Pseudo-indicators designed to analyse supplied variables for suspicious/erroneous indicator values.
6
"""
7

8
from __future__ import annotations
9✔
9

10
from collections.abc import Callable, Sequence
9✔
11
from functools import reduce
9✔
12
from inspect import signature
9✔
13

14
import numpy as np
9✔
15
import xarray
9✔
16

17
from xclim.core._exceptions import MissingVariableError, raise_warn_or_log
9✔
18
from xclim.core._types import VARIABLES, Quantified
9✔
19
from xclim.core.calendar import climatological_mean_doy, within_bnds_doy
9✔
20
from xclim.core.formatting import update_xclim_history
9✔
21
from xclim.core.units import convert_units_to, declare_units, infer_context, str2pint
9✔
22
from xclim.core.utils import InputKind, infer_kind_from_parameter
9✔
23
from xclim.indices.generic import binary_ops
9✔
24
from xclim.indices.run_length import suspicious_run
9✔
25

26
_REGISTRY = {}
9✔
27

28

29
class DataQualityException(Exception):
9✔
30
    """
31
    Raised when any data evaluation checks are flagged as `True`.
32

33
    Parameters
34
    ----------
35
    flag_array : xarray.Dataset
36
        Xarray.Dataset of Data Flags.
37
    message : str
38
        Message prepended to the error messages.
39
    """
40

41
    flag_array: xarray.Dataset | None = None
9✔
42

43
    def __init__(
9✔
44
        self,
45
        flag_array: xarray.Dataset,
46
        message="Data quality flags indicate suspicious values. Flags raised are:\n  - ",
47
    ):
48
        self.message = message
9✔
49
        self.flags = []
9✔
50
        for value in flag_array.data_vars.values():
9✔
51
            if value.any():
9✔
52
                for attribute in value.attrs.keys():
9✔
53
                    if str(attribute) == "description":
9✔
54
                        self.flags.append(value.attrs[attribute])
9✔
55
        super().__init__(self.message)
9✔
56

57
    def __str__(self):
9✔
58
        """Format the errors for history."""
59
        nl = "\n  - "
9✔
60
        return f"{self.message}{nl.join(self.flags)}"
9✔
61

62

63
__all__ = [
9✔
64
    "DataQualityException",
65
    "data_flags",
66
    "ecad_compliant",
67
    "negative_accumulation_values",
68
    "outside_n_standard_deviations_of_climatology",
69
    "percentage_values_outside_of_bounds",
70
    "register_methods",
71
    "tas_below_tasmin",
72
    "tas_exceeds_tasmax",
73
    "tasmax_below_tasmin",
74
    "temperature_extremely_high",
75
    "temperature_extremely_low",
76
    "values_op_thresh_repeating_for_n_or_more_days",
77
    "values_repeating_for_n_or_more_days",
78
    "very_large_precipitation_events",
79
    "wind_values_outside_of_bounds",
80
]
81

82

83
def register_methods(variable_name: str = None) -> Callable:
9✔
84
    """
85
    Register a data flag as functional.
86

87
    Argument can be the output variable name template. The template may use any of the string-like input arguments.
88
    If not given, the function name is used instead, which may create variable conflicts.
89

90
    Parameters
91
    ----------
92
    variable_name : str, optional
93
        The output variable name template. Default is `None`.
94

95
    Returns
96
    -------
97
    callable
98
        The function being registered.
99
    """
100

101
    def _register_methods(func):
9✔
102
        """Summarize all methods used in dataflags checks."""
103
        func.__dict__["variable_name"] = variable_name or func.__name__
9✔
104
        _REGISTRY[func.__name__] = func
9✔
105
        return func
9✔
106

107
    return _register_methods
9✔
108

109

110
def _sanitize_attrs(da: xarray.DataArray) -> xarray.DataArray:
9✔
111
    to_remove = []
9✔
112
    for attr in da.attrs.keys():
9✔
113
        if str(attr) != "history":
9✔
114
            to_remove.append(attr)
9✔
115
    for attr in to_remove:
9✔
116
        del da.attrs[attr]
9✔
117
    return da
9✔
118

119

120
@register_methods()
9✔
121
@update_xclim_history
9✔
122
@declare_units(tasmax="[temperature]", tasmin="[temperature]")
9✔
123
def tasmax_below_tasmin(
9✔
124
    tasmax: xarray.DataArray,
125
    tasmin: xarray.DataArray,
126
) -> xarray.DataArray:
127
    """
128
    Check if tasmax values are below tasmin values for any given day.
129

130
    Parameters
131
    ----------
132
    tasmax : xarray.DataArray
133
        Maximum temperature.
134
    tasmin : xarray.DataArray
135
        Minimum temperature.
136

137
    Returns
138
    -------
139
    xarray.DataArray, [bool]
140
        Boolean array of True where tasmax is below tasmin.
141

142
    Examples
143
    --------
144
    To gain access to the flag_array:
145

146
    >>> from xclim.core.dataflags import tasmax_below_tasmin
147
    >>> ds = xr.open_dataset(path_to_tas_file)
148
    >>> flagged = tasmax_below_tasmin(ds.tasmax, ds.tasmin)
149
    """
150
    tasmax_lt_tasmin = _sanitize_attrs(tasmax < tasmin)
9✔
151
    description = "Maximum temperature values found below minimum temperatures."
9✔
152
    tasmax_lt_tasmin.attrs["description"] = description
9✔
153
    tasmax_lt_tasmin.attrs["units"] = ""
9✔
154
    return tasmax_lt_tasmin
9✔
155

156

157
@register_methods()
9✔
158
@update_xclim_history
9✔
159
@declare_units(tas="[temperature]", tasmax="[temperature]")
9✔
160
def tas_exceeds_tasmax(
9✔
161
    tas: xarray.DataArray,
162
    tasmax: xarray.DataArray,
163
) -> xarray.DataArray:
164
    """
165
    Check if tas values tasmax values for any given day.
166

167
    Parameters
168
    ----------
169
    tas : xarray.DataArray
170
        Mean temperature.
171
    tasmax : xarray.DataArray
172
        Maximum temperature.
173

174
    Returns
175
    -------
176
    xarray.DataArray, [bool]
177
        Boolean array of True where tas is above tasmax.
178

179
    Examples
180
    --------
181
    To gain access to the flag_array:
182

183
    >>> from xclim.core.dataflags import tas_exceeds_tasmax
184
    >>> ds = xr.open_dataset(path_to_tas_file)
185
    >>> flagged = tas_exceeds_tasmax(ds.tas, ds.tasmax)
186
    """
187
    tas_gt_tasmax = _sanitize_attrs(tas > tasmax)
9✔
188
    description = "Mean temperature values found above maximum temperatures."
9✔
189
    tas_gt_tasmax.attrs["description"] = description
9✔
190
    tas_gt_tasmax.attrs["units"] = ""
9✔
191
    return tas_gt_tasmax
9✔
192

193

194
@register_methods()
9✔
195
@update_xclim_history
9✔
196
@declare_units(tas="[temperature]", tasmin="[temperature]")
9✔
197
def tas_below_tasmin(
9✔
198
    tas: xarray.DataArray, tasmin: xarray.DataArray
199
) -> xarray.DataArray:
200
    """
201
    Check if tas values are below tasmin values for any given day.
202

203
    Parameters
204
    ----------
205
    tas : xarray.DataArray
206
        Mean temperature.
207
    tasmin : xarray.DataArray
208
        Minimum temperature.
209

210
    Returns
211
    -------
212
    xarray.DataArray, [bool]
213
        Boolean array of True where tas is below tasmin.
214

215
    Examples
216
    --------
217
    To gain access to the flag_array:
218

219
    >>> from xclim.core.dataflags import tas_below_tasmin
220
    >>> ds = xr.open_dataset(path_to_tas_file)
221
    >>> flagged = tas_below_tasmin(ds.tas, ds.tasmin)
222
    """
223
    tas_lt_tasmin = _sanitize_attrs(tas < tasmin)
9✔
224
    description = "Mean temperature values found below minimum temperatures."
9✔
225
    tas_lt_tasmin.attrs["description"] = description
9✔
226
    tas_lt_tasmin.attrs["units"] = ""
9✔
227
    return tas_lt_tasmin
9✔
228

229

230
@register_methods()
9✔
231
@update_xclim_history
9✔
232
@declare_units(da="[temperature]", thresh="[temperature]")
9✔
233
def temperature_extremely_low(
9✔
234
    da: xarray.DataArray, *, thresh: Quantified = "-90 degC"
235
) -> xarray.DataArray:
236
    """
237
    Check if temperatures values are below -90 degrees Celsius for any given day.
238

239
    Parameters
240
    ----------
241
    da : xarray.DataArray
242
        Temperature.
243
    thresh : str
244
        Threshold below which temperatures are considered problematic and a flag is raised.
245
        Default is -90 degrees Celsius.
246

247
    Returns
248
    -------
249
    xarray.DataArray, [bool]
250
        Boolean array of True where temperatures are below the threshold.
251

252
    Examples
253
    --------
254
    To gain access to the flag_array:
255

256
    >>> from xclim.core.dataflags import temperature_extremely_low
257
    >>> ds = xr.open_dataset(path_to_tas_file)
258
    >>> temperature = "-90 degC"
259
    >>> flagged = temperature_extremely_low(ds.tas, thresh=temperature)
260
    """
261
    thresh_converted = convert_units_to(thresh, da)
9✔
262
    extreme_low = _sanitize_attrs(da < thresh_converted)
9✔
263
    description = f"Temperatures found below {thresh} in {da.name}."
9✔
264
    extreme_low.attrs["description"] = description
9✔
265
    extreme_low.attrs["units"] = ""
9✔
266
    return extreme_low
9✔
267

268

269
@register_methods()
9✔
270
@update_xclim_history
9✔
271
@declare_units(da="[temperature]", thresh="[temperature]")
9✔
272
def temperature_extremely_high(
9✔
273
    da: xarray.DataArray, *, thresh: Quantified = "60 degC"
274
) -> xarray.DataArray:
275
    """
276
    Check if temperatures values exceed 60 degrees Celsius for any given day.
277

278
    Parameters
279
    ----------
280
    da : xarray.DataArray
281
        Temperature.
282
    thresh : str
283
        Threshold above which temperatures are considered problematic and a flag is raised. Default is 60 degrees Celsius.
284

285
    Returns
286
    -------
287
    xarray.DataArray, [bool]
288
        Boolean array of True where temperatures are above the threshold.
289

290
    Examples
291
    --------
292
    To gain access to the flag_array:
293

294
    >>> from xclim.core.dataflags import temperature_extremely_high
295
    >>> ds = xr.open_dataset(path_to_tas_file)
296
    >>> temperature = "60 degC"
297
    >>> flagged = temperature_extremely_high(ds.tas, thresh=temperature)
298
    """
299
    thresh_converted = convert_units_to(thresh, da)
9✔
300
    extreme_high = _sanitize_attrs(da > thresh_converted)
9✔
301
    description = f"Temperatures found in excess of {thresh} in {da.name}."
9✔
302
    extreme_high.attrs["description"] = description
9✔
303
    extreme_high.attrs["units"] = ""
9✔
304
    return extreme_high
9✔
305

306

307
@register_methods()
9✔
308
@update_xclim_history
9✔
309
def negative_accumulation_values(
9✔
310
    da: xarray.DataArray,
311
) -> xarray.DataArray:
312
    """
313
    Check if variable values are negative for any given day.
314

315
    Parameters
316
    ----------
317
    da : xarray.DataArray
318
        Variable array.
319

320
    Returns
321
    -------
322
    xarray.DataArray, [bool]
323
        Boolean array of True where values are negative.
324

325
    Examples
326
    --------
327
    To gain access to the flag_array:
328

329
    >>> from xclim.core.dataflags import negative_accumulation_values
330
    >>> ds = xr.open_dataset(path_to_pr_file)
331
    >>> flagged = negative_accumulation_values(ds.pr)
332
    """
333
    negative_accumulations = _sanitize_attrs(da < 0)
9✔
334
    description = f"Negative values found for {da.name}."
9✔
335
    negative_accumulations.attrs["description"] = description
9✔
336
    negative_accumulations.attrs["units"] = ""
9✔
337
    return negative_accumulations
9✔
338

339

340
@register_methods()
9✔
341
@update_xclim_history
9✔
342
@declare_units(da="[precipitation]", thresh="[precipitation]")
9✔
343
def very_large_precipitation_events(
9✔
344
    da: xarray.DataArray, *, thresh: Quantified = "300 mm d-1"
345
) -> xarray.DataArray:
346
    """
347
    Check if precipitation values exceed 300 mm/day for any given day.
348

349
    Parameters
350
    ----------
351
    da : xarray.DataArray
352
        Precipitation.
353
    thresh : str
354
        Threshold to search array for that will trigger flag if any day exceeds value.
355

356
    Returns
357
    -------
358
    xarray.DataArray, [bool]
359
        Boolean array of True where precipitation values exceed the threshold.
360

361
    Examples
362
    --------
363
    To gain access to the flag_array:
364

365
    >>> from xclim.core.dataflags import very_large_precipitation_events
366
    >>> ds = xr.open_dataset(path_to_pr_file)
367
    >>> rate = "300 mm d-1"
368
    >>> flagged = very_large_precipitation_events(ds.pr, thresh=rate)
369
    """
370
    thresh_converted = convert_units_to(thresh, da, context="hydro")
9✔
371
    very_large_events = _sanitize_attrs(da > thresh_converted)
9✔
372
    description = f"Precipitation events in excess of {thresh} for {da.name}."
9✔
373
    very_large_events.attrs["description"] = description
9✔
374
    very_large_events.attrs["units"] = ""
9✔
375
    return very_large_events
9✔
376

377

378
@register_methods("values_{op}_{thresh}_repeating_for_{n}_or_more_days")
9✔
379
@update_xclim_history
9✔
380
def values_op_thresh_repeating_for_n_or_more_days(
9✔
381
    da: xarray.DataArray, *, n: int, thresh: Quantified, op: str = "=="
382
) -> xarray.DataArray:
383
    """
384
    Check if array values repeat at a given threshold for `N` or more days.
385

386
    Parameters
387
    ----------
388
    da : xarray.DataArray
389
        Variable array.
390
    n : int
391
        Number of repeating days needed to trigger flag.
392
    thresh : str
393
        Repeating values to search for that will trigger flag.
394
    op : {">", "gt", "<", "lt", ">=", "ge", "<=", "le", "==", "eq", "!=", "ne"}
395
        Operator used for comparison with thresh.
396

397
    Returns
398
    -------
399
    xarray.DataArray, [bool]
400
        Boolean array of True where values repeat at threshold for `N` or more days.
401

402
    Examples
403
    --------
404
    To gain access to the flag_array:
405

406
    >>> from xclim.core.dataflags import values_op_thresh_repeating_for_n_or_more_days
407
    >>> ds = xr.open_dataset(path_to_pr_file)
408
    >>> units = "5 mm d-1"
409
    >>> days = 5
410
    >>> comparison = "eq"
411
    >>> flagged = values_op_thresh_repeating_for_n_or_more_days(
412
    ...     ds.pr, n=days, thresh=units, op=comparison
413
    ... )
414
    """
415
    thresh = convert_units_to(
9✔
416
        thresh, da, context=infer_context(standard_name=da.attrs.get("standard_name"))
417
    )
418

419
    repetitions = _sanitize_attrs(suspicious_run(da, window=n, op=op, thresh=thresh))
9✔
420
    description = (
9✔
421
        f"Repetitive values at {thresh} for at least {n} days found for {da.name}."
422
    )
423
    repetitions.attrs["description"] = description
9✔
424
    repetitions.attrs["units"] = ""
9✔
425
    return repetitions
9✔
426

427

428
@register_methods()
9✔
429
@update_xclim_history
9✔
430
@declare_units(da="[speed]", lower="[speed]", upper="[speed]")
9✔
431
def wind_values_outside_of_bounds(
9✔
432
    da: xarray.DataArray,
433
    *,
434
    lower: Quantified = "0 m s-1",
435
    upper: Quantified = "46 m s-1",
436
) -> xarray.DataArray:
437
    """
438
    Check if wind speed values exceed reasonable bounds for any given day.
439

440
    Parameters
441
    ----------
442
    da : xarray.DataArray
443
        Wind speed.
444
    lower : str
445
        The lower limit for wind speed. Default is 0 m s-1.
446
    upper : str
447
        The upper limit for wind speed. Default is 46 m s-1.
448

449
    Returns
450
    -------
451
    xarray.DataArray, [bool]
452
        The boolean array of True where values exceed the bounds.
453

454
    Examples
455
    --------
456
    To gain access to the flag_array:
457

458
    >>> from xclim.core.dataflags import wind_values_outside_of_bounds
459
    >>> ceiling, floor = "46 m s-1", "0 m s-1"
460
    >>> flagged = wind_values_outside_of_bounds(
461
    ...     sfcWind_dataset, upper=ceiling, lower=floor
462
    ... )
463
    """
464
    lower, upper = convert_units_to(lower, da), convert_units_to(upper, da)
9✔
465
    unbounded_percentages = _sanitize_attrs((da < lower) | (da > upper))
9✔
466
    description = f"Percentage values exceeding bounds of {lower} and {upper} found for {da.name}."
9✔
467
    unbounded_percentages.attrs["description"] = description
9✔
468
    unbounded_percentages.attrs["units"] = ""
9✔
469
    return unbounded_percentages
9✔
470

471

472
# TODO: 'Many excessive dry days' = the amount of dry days lies outside a 14·bivariate standard deviation
473

474

475
@register_methods("outside_{n}_standard_deviations_of_climatology")
9✔
476
@update_xclim_history
9✔
477
def outside_n_standard_deviations_of_climatology(
9✔
478
    da: xarray.DataArray,
479
    *,
480
    n: int,
481
    window: int = 5,
482
) -> xarray.DataArray:
483
    """
484
    Check if any daily value is outside `n` standard deviations from the day of year mean.
485

486
    Parameters
487
    ----------
488
    da : xarray.DataArray
489
        Variable array.
490
    n : int
491
        Number of standard deviations.
492
    window : int
493
        Moving window used in determining the climatological mean. Default: `5`.
494

495
    Returns
496
    -------
497
    xarray.DataArray, [bool]
498
        The boolean array of True where values exceed the bounds.
499

500
    Notes
501
    -----
502
    A moving window of five (5) days is suggested for `tas` data flag calculations according to ICCLIM data quality standards.
503

504
    References
505
    ----------
506
    :cite:cts:`project_team_eca&d_algorithm_2013`
507

508
    Examples
509
    --------
510
    To gain access to the flag_array:
511

512
    >>> from xclim.core.dataflags import outside_n_standard_deviations_of_climatology
513
    >>> ds = xr.open_dataset(path_to_tas_file)
514
    >>> std_devs = 5
515
    >>> average_over = 5
516
    >>> flagged = outside_n_standard_deviations_of_climatology(
517
    ...     ds.tas, n=std_devs, window=average_over
518
    ... )
519
    """
520
    mu, sig = climatological_mean_doy(da, window=window)
9✔
521
    within_bounds = _sanitize_attrs(
9✔
522
        within_bnds_doy(da, high=(mu + n * sig), low=(mu - n * sig))
523
    )
524
    description = (
9✔
525
        f"Values outside of {n} standard deviations from climatology found for {da.name} "
526
        f"with moving window of {window} days."
527
    )
528
    within_bounds.attrs["description"] = description
9✔
529
    within_bounds.attrs["units"] = ""
9✔
530
    return ~within_bounds
9✔
531

532

533
@register_methods("values_repeating_for_{n}_or_more_days")
9✔
534
@update_xclim_history
9✔
535
def values_repeating_for_n_or_more_days(
9✔
536
    da: xarray.DataArray, *, n: int
537
) -> xarray.DataArray:
538
    """
539
    Check if exact values are found to be repeating for at least 5 or more days.
540

541
    Parameters
542
    ----------
543
    da : xarray.DataArray
544
        Variable array.
545
    n : int
546
        Number of days to trigger flag.
547

548
    Returns
549
    -------
550
    xarray.DataArray, [bool]
551
        The boolean array of True where values repeat for `n` or more days.
552

553
    Examples
554
    --------
555
    To gain access to the flag_array:
556

557
    >>> from xclim.core.dataflags import values_repeating_for_n_or_more_days
558
    >>> ds = xr.open_dataset(path_to_pr_file)
559
    >>> flagged = values_repeating_for_n_or_more_days(ds.pr, n=5)
560
    """
561
    repetition = _sanitize_attrs(suspicious_run(da, window=n))
9✔
562
    description = f"Runs of repetitive values for {n} or more days found for {da.name}."
9✔
563
    repetition.attrs["description"] = description
9✔
564
    repetition.attrs["units"] = ""
9✔
565
    return repetition
9✔
566

567

568
@register_methods()
9✔
569
@update_xclim_history
9✔
570
def percentage_values_outside_of_bounds(da: xarray.DataArray) -> xarray.DataArray:
9✔
571
    """
572
    Check if variable values fall below 0% or exceed 100% for any given day.
573

574
    Parameters
575
    ----------
576
    da : xarray.DataArray
577
        Variable array.
578

579
    Returns
580
    -------
581
    xarray.DataArray, [bool]
582
        The boolean array of True where values exceed the bounds.
583

584
    Examples
585
    --------
586
    To gain access to the flag_array:
587

588
    >>> from xclim.core.dataflags import percentage_values_outside_of_bounds
589
    >>> flagged = percentage_values_outside_of_bounds(huss_dataset)
590
    """
591
    unbounded_percentages = _sanitize_attrs((da < 0) | (da > 100))
9✔
592
    description = f"Percentage values beyond bounds found for {da.name}."
9✔
593
    unbounded_percentages.attrs["description"] = description
9✔
594
    return unbounded_percentages
9✔
595

596

597
def data_flags(  # noqa: C901
9✔
598
    da: xarray.DataArray,
599
    ds: xarray.Dataset | None = None,
600
    flags: dict | None = None,
601
    dims: None | str | Sequence[str] = "all",
602
    freq: str | None = None,
603
    raise_flags: bool = False,
604
) -> xarray.Dataset:
605
    """
606
    Evaluate the supplied DataArray for a set of data flag checks.
607

608
    Test triggers depend on variable name and availability of extra variables within Dataset for comparison.
609
    If called with `raise_flags=True`, will raise a DataQualityException with comments for each failed quality check.
610

611
    Parameters
612
    ----------
613
    da : xarray.DataArray
614
        The variable to check.
615
        Must have a name that is a valid CMIP6 variable name and appears in :py:obj:`xclim.core.utils.VARIABLES`.
616
    ds : xarray.Dataset, optional
617
        An optional dataset with extra variables needed by some checks.
618
    flags : dict, optional
619
        A dictionary where the keys are the name of the flags to check and the values are parameter dictionaries.
620
        The value can be None if there are no parameters to pass (i.e. default will be used).
621
        The default, None, means that the data flags list will be taken from :py:obj:`xclim.core.utils.VARIABLES`.
622
    dims : {"all", None} or str or a sequence of strings
623
        Dimensions upon which the aggregation should be performed. Default: "all".
624
    freq : str, optional
625
        Resampling frequency to have data_flags aggregated over periods.
626
        Defaults to None, which means the "time" axis is treated as any other dimension (see `dims`).
627
    raise_flags : bool
628
        Raise exception if any of the quality assessment flags are raised. Default: False.
629

630
    Returns
631
    -------
632
    xarray.Dataset
633
        The Dataset of boolean flag arrays.
634

635
    Examples
636
    --------
637
    To evaluate all applicable data flags for a given variable:
638

639
    >>> from xclim.core.dataflags import data_flags
640
    >>> ds = xr.open_dataset(path_to_pr_file)
641
    >>> flagged_multi = data_flags(ds.pr, ds)
642
    >>> # The next example evaluates only one data flag, passing specific parameters. It also aggregates the flags
643
    >>> # yearly over the "time" dimension only, such that a True means there is a bad data point for that year
644
    >>> # at that location.
645
    >>> flagged_single = data_flags(
646
    ...     ds.pr,
647
    ...     ds,
648
    ...     flags={"very_large_precipitation_events": {"thresh": "250 mm d-1"}},
649
    ...     dims=None,
650
    ...     freq="YS",
651
    ... )
652
    """
653

654
    def _get_variable_name(function, _kwargs):
9✔
655
        format_args = {}
9✔
656
        _kwargs = _kwargs or {}
9✔
657
        for arg, param in signature(function).parameters.items():
9✔
658
            val = _kwargs.get(arg, param.default)
9✔
659
            kind = infer_kind_from_parameter(param)
9✔
660
            if arg == "op":
9✔
661
                format_args[arg] = val if val not in binary_ops else binary_ops[val]
9✔
662
            elif kind in [
9✔
663
                InputKind.FREQ_STR,
664
                InputKind.NUMBER,
665
                InputKind.STRING,
666
                InputKind.DAY_OF_YEAR,
667
                InputKind.DATE,
668
                InputKind.BOOL,
669
            ]:
670
                format_args[arg] = val
9✔
671
            elif kind == InputKind.QUANTIFIED:
9✔
672
                if isinstance(val, xarray.DataArray):
9✔
UNCOV
673
                    format_args[arg] = "array"
×
674
                else:
675
                    val = str2pint(val).magnitude
9✔
676
                    if val == int(val):
9✔
677
                        val = str(int(val))
9✔
678
                    else:
679
                        val = str(val).replace(".", "point")
9✔
680
                    val = val.replace("-", "minus")
9✔
681
                    format_args[arg] = str(val)
9✔
682
        return function.variable_name.format(**format_args)
9✔
683

684
    def _missing_vars(function, dataset: xarray.Dataset, var_provided: str):
9✔
685
        """Handle missing variables in passed datasets."""
686
        sig = signature(function)
9✔
687
        sig_params = sig.parameters
9✔
688
        extra_vars = {}
9✔
689
        for arg, val in sig_params.items():
9✔
690
            if arg in ["da", var_provided]:
9✔
691
                continue
9✔
692
            kind = infer_kind_from_parameter(val)
9✔
693
            if kind in [InputKind.VARIABLE]:
9✔
694
                if arg in dataset:
9✔
695
                    extra_vars[arg] = dataset[arg]
9✔
696
                else:
697
                    raise MissingVariableError()
9✔
698
        return extra_vars
9✔
699

700
    var = str(da.name)
9✔
701
    if dims == "all":
9✔
702
        dims = da.dims
9✔
UNCOV
703
    elif isinstance(dims, str):
×
704
        # Thus, a single dimension name, we allow this option to mirror xarray.
UNCOV
705
        dims = {dims}
×
706
    if freq is not None and dims is not None:
9✔
UNCOV
707
        dims = (
×
708
            set(dims) - {"time"}
709
        ) or None  # Will return None if the only dimension was "time".
710

711
    if flags is None:
9✔
712
        try:
9✔
713
            flag_funcs = VARIABLES.get(var)["data_flags"]
9✔
714
        except (KeyError, TypeError) as err:
9✔
715
            raise_warn_or_log(
9✔
716
                err,
717
                mode="raise" if raise_flags else "log",
718
                msg=f"Data quality checks do not exist for '{var}' variable.",
719
                err_type=NotImplementedError,
720
            )
721
            return xarray.Dataset()
9✔
722
    else:
723
        flag_funcs = [flags]
9✔
724

725
    ds = ds or xarray.Dataset()
9✔
726

727
    flags = {}
9✔
728
    for flag_func in flag_funcs:
9✔
729
        for name, kwargs in flag_func.items():
9✔
730
            func = _REGISTRY[name]
9✔
731
            variable_name = _get_variable_name(func, kwargs)
9✔
732
            named_da_variable = None
9✔
733

734
            try:
9✔
735
                extras = _missing_vars(func, ds, str(da.name))
9✔
736
                # Entries in extras implies that there are two variables being compared
737
                # Both variables will be sent in as dict entries
738
                if extras:
9✔
739
                    named_da_variable = {da.name: da}
9✔
740

741
            except MissingVariableError:
9✔
742
                flags[variable_name] = None
9✔
743
            else:
744
                with xarray.set_options(keep_attrs=True):
9✔
745
                    if named_da_variable:
9✔
746
                        out = func(**named_da_variable, **extras, **(kwargs or {}))
9✔
747
                    else:
748
                        out = func(da, **extras, **(kwargs or {}))
9✔
749

750
                    # Aggregation
751
                    if freq is not None:
9✔
UNCOV
752
                        out = out.resample(time=freq).any()
×
753
                    if dims is not None:
9✔
754
                        out = out.any(dims)
9✔
755

756
                flags[variable_name] = out
9✔
757

758
    ds_flags = xarray.Dataset(data_vars=flags)
9✔
759

760
    if raise_flags:
9✔
761
        if np.any([ds_flags[dv] for dv in ds_flags.data_vars]):
9✔
762
            raise DataQualityException(ds_flags)
9✔
763

764
    return ds_flags
9✔
765

766

767
def ecad_compliant(
9✔
768
    ds: xarray.Dataset,
769
    dims: None | str | Sequence[str] = "all",
770
    raise_flags: bool = False,
771
    append: bool = True,
772
) -> xarray.DataArray | xarray.Dataset | None:
773
    """
774
    Run ECAD compliance tests.
775

776
    Assert file adheres to ECAD-based quality assurance checks.
777

778
    Parameters
779
    ----------
780
    ds : xarray.Dataset
781
        Variable-containing dataset.
782
    dims : {"all"} or str or a sequence of strings, optional
783
        Dimensions upon which aggregation should be performed. Default: ``"all"``.
784
    raise_flags : bool
785
        Raise exception if any of the quality assessment flags are raised, otherwise returns None. Default: ``False``.
786
    append : bool
787
        If `True`, returns the Dataset with the `ecad_qc_flag` array appended to data_vars.
788
        If `False`, returns the DataArray of the `ecad_qc_flag` variable.
789

790
    Returns
791
    -------
792
    xarray.DataArray or xarray.Dataset or None
793
        Flag array or Dataset with flag array(s) appended.
794
    """
795
    flags = xarray.Dataset()
9✔
796
    history: list[str] = []
9✔
797
    for var in ds.data_vars:
9✔
798
        df = data_flags(ds[var], ds, dims=dims)
9✔
799
        for flag_name, flag_data in df.data_vars.items():
9✔
800
            flags = flags.assign({f"{var}_{flag_name}": flag_data})
9✔
801

802
            if (
9✔
803
                "history" in flag_data.attrs.keys()
804
                and np.all(flag_data.values) is not None
805
            ):
806
                # The extra `split("\n") should be removed when merge_attributes(missing_str=None)
807
                history_elems = flag_data.attrs["history"].split("\n")[-1].split(" ")
9✔
808
                if not history:
9✔
809
                    history.append(
9✔
810
                        " ".join(
811
                            [
812
                                " ".join(history_elems[0:2]),
813
                                " ".join(history_elems[-4:]),
814
                                "- Performed the following checks:",
815
                            ]
816
                        )
817
                    )
818
                history.append(" ".join(history_elems[3:-4]))
9✔
819

820
    if raise_flags:
9✔
821
        if np.any([flags[dv] for dv in flags.data_vars]):
9✔
822
            raise DataQualityException(flags)
9✔
823
        return None
3✔
824

825
    ecad_flag = xarray.DataArray(
9✔
826
        # TODO: Test for this change concerning data of type None in dataflag variables
827
        ~reduce(
828
            np.logical_or,
829
            filter(lambda x: x.dtype == bool, flags.data_vars.values()),  # noqa
830
        ),
831
        name="ecad_qc_flag",
832
        attrs={
833
            "comment": "Adheres to ECAD quality control checks.",
834
            "history": "\n".join(history),
835
        },
836
    )
837

838
    if append:
9✔
839
        return xarray.merge([ds, ecad_flag])
9✔
UNCOV
840
    return ecad_flag
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc