• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ghiggi / gpm_api / 8501239008

31 Mar 2024 10:12PM UTC coverage: 87.857% (+0.2%) from 87.669%
8501239008

Pull #53

github

ghiggi
Add flake8-comprehensions
Pull Request #53: Refactor code style

701 of 796 new or added lines in 86 files covered. (88.07%)

4 existing lines in 4 files now uncovered.

9001 of 10245 relevant lines covered (87.86%)

0.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.34
/gpm/dataset/decoding/dataarray_attrs.py
1
# -----------------------------------------------------------------------------.
2
# MIT License
3

4
# Copyright (c) 2024 GPM-API developers
5
#
6
# This file is part of GPM-API.
7

8
# Permission is hereby granted, free of charge, to any person obtaining a copy
9
# of this software and associated documentation files (the "Software"), to deal
10
# in the Software without restriction, including without limitation the rights
11
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
# copies of the Software, and to permit persons to whom the Software is
13
# furnished to do so, subject to the following conditions:
14
#
15
# The above copyright notice and this permission notice shall be included in all
16
# copies or substantial portions of the Software.
17
#
18
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
# SOFTWARE.
25

26
# -----------------------------------------------------------------------------.
27
"""This module contains functions to standardize GPM-API Dataset attributes."""
1✔
28
import re
1✔
29

30
import numpy as np
1✔
31

32

33
def convert_string_to_number(string):
1✔
34
    if string.isdigit():
1✔
35
        return int(string)
1✔
36
    return float(string)
1✔
37

38

39
def ensure_dtype_name(dtype):
1✔
40
    """Ensure the dtype is a string name.
41

42
    This function convert numpy.dtype to the string name.
43
    """
44
    if isinstance(dtype, np.dtype):
1✔
45
        dtype = dtype.name
1✔
46
    return dtype
1✔
47

48

49
def _check_fillvalue_format(attrs):
1✔
50
    # Ensure fill values are numbers
51
    if "CodeMissingValue" in attrs and isinstance(attrs["CodeMissingValue"], str):
1✔
52
        attrs["CodeMissingValue"] = convert_string_to_number(attrs["CodeMissingValue"])
1✔
53
    if "_FillValue" in attrs and isinstance(attrs["_FillValue"], str):
1✔
NEW
54
        attrs["_FillValue"] = convert_string_to_number(attrs["_FillValue"])
×
55

56
    # Check _FillValue and CodeMissingValue agrees
57
    # - Do not since _FillValue often badly defined !
58
    # - TODO: report issues to NASA team
59
    # if "_FillValue" in attrs  and "CodeMissingValue" in attrs:
60
    #     if attrs["_FillValue"] != attrs["CodeMissingValue"]:
61
    #         name = da.name
62
    #         fillvalue = attrs["_FillValue"]
63
    #         codevalue = attrs["CodeMissingValue"]
64
    #         raise ValueError(f"In {name}, _FillValue is {fillvalue} and CodeMissingValue is {codevalue}")
65

66
    # Convert CodeMissingValue' to _FillValue if available
67
    if "CodeMissingValue" in attrs:
1✔
68
        attrs["_FillValue"] = attrs["CodeMissingValue"]
1✔
69

70
    # Remove 'CodeMissingValue'
71
    _ = attrs.pop("CodeMissingValue", None)
1✔
72

73
    return attrs
1✔
74

75

76
def _sanitize_attributes(attrs):
1✔
77
    # Convert 'Units' to 'units'
78
    if not attrs.get("units", False) and attrs.get("Units", False):
1✔
79
        attrs["units"] = attrs.pop("Units")
×
80

81
    # Remove 'Units'
82
    attrs.pop("Units", None)
1✔
83

84
    # Remove 'DimensionNames'
85
    attrs.pop("DimensionNames", None)
1✔
86

87
    # Sanitize LongName if present
88
    if "LongName" in attrs:
1✔
89
        attrs["description"] = re.sub(
1✔
90
            " +",
91
            " ",
92
            attrs["LongName"].replace("\n", " ").replace("\t", " "),
93
        ).strip()
94
        attrs.pop("LongName")
1✔
95
    return attrs
1✔
96

97

98
def _format_dataarray_attrs(da, product=None):
1✔
99
    attrs = da.attrs
1✔
100

101
    # Ensure fill values are numbers
102
    # - If CodeMissingValue is present, it is used as _FillValue
103
    # - _FillValue are moved to encoding by xr.decode_cf !
104
    attrs = _check_fillvalue_format(attrs)
1✔
105

106
    # Remove Units, DimensionNames and sanitize LongName
107
    attrs = _sanitize_attributes(attrs)
1✔
108

109
    # Ensure encoding and source_dtype is a dtype string name
110
    if "dtype" in da.encoding:
1✔
111
        da.encoding["dtype"] = ensure_dtype_name(da.encoding["dtype"])
1✔
112

113
    if "source_dtype" in attrs:
1✔
114
        attrs["source_dtype"] = ensure_dtype_name(attrs["source_dtype"])
×
115

116
    # Add source dtype from encoding if not present
117
    if "source_dtype" not in attrs and "dtype" in da.encoding:
1✔
118
        attrs["source_dtype"] = da.encoding["dtype"]
1✔
119

120
    # Add gpm_api product name
121
    if product is not None:
1✔
122
        attrs["gpm_api_product"] = product
1✔
123

124
    # Attach attributes
125
    da.attrs = attrs
1✔
126

127
    return da
1✔
128

129

130
def standardize_dataarrays_attrs(ds, product):
1✔
131
    # Sanitize variable attributes
132
    for var, da in ds.items():
1✔
133
        ds[var] = _format_dataarray_attrs(da, product)
1✔
134

135
    # Drop attributes from bounds coordinates
136
    # - https://github.com/pydata/xarray/issues/8368
137
    # - Attribute is lost when writing to netcdf
138
    bounds_coords = ["time_bnds", "lon_bnds", "lat_bnds"]
1✔
139
    for bnds in bounds_coords:
1✔
140
        if bnds in ds:
1✔
141
            ds[bnds].attrs = {}
1✔
142

143
    # Sanitize coordinates attributes
144
    for coord in list(ds.coords):
1✔
145
        ds[coord].attrs = _sanitize_attributes(ds[coord].attrs)
1✔
146

147
    return ds
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc