• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ghiggi / gpm_api / 8501078316

31 Mar 2024 09:37PM UTC coverage: 87.854% (+0.2%) from 87.669%
8501078316

Pull #53

github

ghiggi
Add pandas-vet rules
Pull Request #53: Refactor code style

649 of 737 new or added lines in 86 files covered. (88.06%)

4 existing lines in 4 files now uncovered.

9005 of 10250 relevant lines covered (87.85%)

0.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.34
/gpm/dataset/decoding/dataarray_attrs.py
1
# -----------------------------------------------------------------------------.
2
# MIT License
3

4
# Copyright (c) 2024 GPM-API developers
5
#
6
# This file is part of GPM-API.
7

8
# Permission is hereby granted, free of charge, to any person obtaining a copy
9
# of this software and associated documentation files (the "Software"), to deal
10
# in the Software without restriction, including without limitation the rights
11
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
# copies of the Software, and to permit persons to whom the Software is
13
# furnished to do so, subject to the following conditions:
14
#
15
# The above copyright notice and this permission notice shall be included in all
16
# copies or substantial portions of the Software.
17
#
18
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
# SOFTWARE.
25

26
# -----------------------------------------------------------------------------.
27
"""This module contains functions to standardize GPM-API Dataset attributes."""
1✔
28
import re
1✔
29

30
import numpy as np
1✔
31

32

33
def convert_string_to_number(string):
1✔
34
    if string.isdigit():
1✔
35
        return int(string)
1✔
36
    return float(string)
1✔
37

38

39
def ensure_dtype_name(dtype):
1✔
40
    """Ensure the dtype is a string name.
41

42
    This function convert numpy.dtype to the string name.
43
    """
44
    if isinstance(dtype, np.dtype):
1✔
45
        dtype = dtype.name
1✔
46
    return dtype
1✔
47

48

49
def _check_fillvalue_format(attrs):
1✔
50
    # Ensure fill values are numbers
51
    if "CodeMissingValue" in attrs and isinstance(attrs["CodeMissingValue"], str):
1✔
52
        attrs["CodeMissingValue"] = convert_string_to_number(attrs["CodeMissingValue"])
1✔
53
    if "_FillValue" in attrs and isinstance(attrs["_FillValue"], str):
1✔
NEW
54
        attrs["_FillValue"] = convert_string_to_number(attrs["_FillValue"])
×
55

56
    # Check _FillValue and CodeMissingValue agrees
57
    # - Do not since _FillValue often badly defined !
58
    # - TODO: report issues to NASA team
59
    # if "_FillValue" in attrs  and "CodeMissingValue" in attrs:
60
    #     if attrs["_FillValue"] != attrs["CodeMissingValue"]:
61
    #         name = da.name
62
    #         fillvalue = attrs["_FillValue"]
63
    #         codevalue = attrs["CodeMissingValue"]
64
    #         raise ValueError(f"In {name}, _FillValue is {fillvalue} and CodeMissingValue is {codevalue}")
65

66
    # Convert CodeMissingValue' to _FillValue if available
67
    if "CodeMissingValue" in attrs:
1✔
68
        attrs["_FillValue"] = attrs["CodeMissingValue"]
1✔
69

70
    # Remove 'CodeMissingValue'
71
    _ = attrs.pop("CodeMissingValue", None)
1✔
72

73
    return attrs
1✔
74

75

76
def _sanitize_attributes(attrs):
1✔
77
    # Convert 'Units' to 'units'
78
    if not attrs.get("units", False) and attrs.get("Units", False):
1✔
79
        attrs["units"] = attrs.pop("Units")
×
80

81
    # Remove 'Units'
82
    attrs.pop("Units", None)
1✔
83

84
    # Remove 'DimensionNames'
85
    attrs.pop("DimensionNames", None)
1✔
86

87
    # Sanitize LongName if present
88
    if "LongName" in attrs:
1✔
89
        attrs["description"] = re.sub(
1✔
90
            " +", " ", attrs["LongName"].replace("\n", " ").replace("\t", " ")
91
        ).strip()
92
        attrs.pop("LongName")
1✔
93
    return attrs
1✔
94

95

96
def _format_dataarray_attrs(da, product=None):
1✔
97
    attrs = da.attrs
1✔
98

99
    # Ensure fill values are numbers
100
    # - If CodeMissingValue is present, it is used as _FillValue
101
    # - _FillValue are moved to encoding by xr.decode_cf !
102
    attrs = _check_fillvalue_format(attrs)
1✔
103

104
    # Remove Units, DimensionNames and sanitize LongName
105
    attrs = _sanitize_attributes(attrs)
1✔
106

107
    # Ensure encoding and source_dtype is a dtype string name
108
    if "dtype" in da.encoding:
1✔
109
        da.encoding["dtype"] = ensure_dtype_name(da.encoding["dtype"])
1✔
110

111
    if "source_dtype" in attrs:
1✔
112
        attrs["source_dtype"] = ensure_dtype_name(attrs["source_dtype"])
×
113

114
    # Add source dtype from encoding if not present
115
    if "source_dtype" not in attrs and "dtype" in da.encoding:
1✔
116
        attrs["source_dtype"] = da.encoding["dtype"]
1✔
117

118
    # Add gpm_api product name
119
    if product is not None:
1✔
120
        attrs["gpm_api_product"] = product
1✔
121

122
    # Attach attributes
123
    da.attrs = attrs
1✔
124

125
    return da
1✔
126

127

128
def standardize_dataarrays_attrs(ds, product):
1✔
129
    # Sanitize variable attributes
130
    for var, da in ds.items():
1✔
131
        ds[var] = _format_dataarray_attrs(da, product)
1✔
132

133
    # Drop attributes from bounds coordinates
134
    # - https://github.com/pydata/xarray/issues/8368
135
    # - Attribute is lost when writing to netcdf
136
    bounds_coords = ["time_bnds", "lon_bnds", "lat_bnds"]
1✔
137
    for bnds in bounds_coords:
1✔
138
        if bnds in ds:
1✔
139
            ds[bnds].attrs = {}
1✔
140

141
    # Sanitize coordinates attributes
142
    for coord in list(ds.coords):
1✔
143
        ds[coord].attrs = _sanitize_attributes(ds[coord].attrs)
1✔
144

145
    return ds
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc