• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Ouranosinc / miranda / 2117113978

pending completion
2117113978

Pull #24

github

GitHub
Merge 22a6d4cb6 into bf78f91b7
Pull Request #24: Add CMIP file structure, use pyessv controlled vocabularies, and major refactoring

234 of 1077 new or added lines in 35 files covered. (21.73%)

13 existing lines in 4 files now uncovered.

728 of 3221 relevant lines covered (22.6%)

0.68 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

34.57
/miranda/convert/hq.py
1
import csv
3✔
2
import datetime as dt
3✔
3
import json
3✔
4
import logging.config
3✔
5
import re
3✔
6
from pathlib import Path
3✔
7
from typing import Any, Optional, Tuple, Union
3✔
8

9
import numpy as np
3✔
10
import pandas as pd
3✔
11
import xarray as xr
3✔
12

13
from miranda.scripting import LOGGING_CONFIG
3✔
14
from miranda.units import u, units2pint
3✔
15

16
logging.config.dictConfig(LOGGING_CONFIG)
3✔
17

18
__all__ = ["open_csv"]
3✔
19

20
# CMOR-like attributes
21
cmor = json.load(open(Path(__file__).parent / "data" / "hq_cf_attrs.json"))[
3✔
22
    "variable_entry"
23
]
24

25
fp = r"[-+]?\d*,\d+|\d+"
3✔
26

27
section_patterns = r"(\w+) :\n"
3✔
28

29
meta_patterns = {
3✔
30
    "Installation": {
31
        "nom": "Nom;(.*)",
32
        "type": "Type;(.*)",
33
        "ouverture": "Ouverture;(.+)",
34
        "fermeture": "Fermeture;(.+)",
35
        "x": rf"XCOORD \(degrés\.décimales\);({fp})",
36
        "y": rf"YCOORD \(degrés\.décimales\);({fp})",
37
        "z": rf"ZCOORD \(mètres\);({fp})",
38
    },
39
    "Données": {
40
        "variable": "Type de donnée;(.*)",
41
        "fuseau": "Fuseau horaire;(.*)",
42
        "pas": "Pas de temps;(.*)",
43
        "mesure": "Type de mesure;(.*)",
44
        "unité": "Unité;(.*)",
45
    },
46
}
47

48
csv_patterns = ("Propriétaires", "Correspondances")
3✔
49

50
data_header_pattern = "Dateheure;Valeur;Qualite;Statut"
3✔
51

52
converters = {
3✔
53
    "ouverture": np.datetime64,
54
    "fermeture": np.datetime64,
55
    "x": lambda x: float(x.replace(",", ".")),
56
    "y": lambda x: float(x.replace(",", ".")),
57
    "z": lambda x: float(x.replace(",", ".")),
58
    "Dateheure": lambda x: dt.datetime.strptime(x, "%Y-%m-%d %H:%M"),
59
    "Valeur": lambda x: float(x.replace(",", ".")) if x == "" else np.nan,
60
    "Qualite": lambda x: int(x.split("-")[0]),
61
    "Statut": lambda x: int(x.split("-")[0]),
62
    "unité": lambda x: cf_units.get(x, x),
63
    "pas": lambda x: cf_frequency.get(x, x),
64
}
65

66

67
def guess_variable(meta, cf_table: Optional[dict]) -> Tuple[str, Optional[str]]:
3✔
68
    """Return the corresponding CMOR variable."""
69
    if cf_table is None:
×
70
        cf_table = cmor
×
71

72
    v = meta["variable"]
×
73

74
    corr = {
×
75
        "Pluie (mm)": "prlp",
76
        "Neige (mm)": "prsn",
77
        "Épaisseur de neige": "snd",
78
        "Vitesse du vent 10 mètres": "sfcWind",
79
        "Direction du vent 10 mètres": "sfcWindAz",
80
        "Humidité relative 2 mètres": "hurs",
81
    }
82

NEW
83
    name = ""
×
84
    table_name = None
×
85
    if v in corr:
×
86
        name = corr[v]
×
87
    else:
88
        if v == "Température":
×
89
            if meta["mesure"] == "Maximum":
×
90
                name = "tasmax"
×
91
            elif meta["mesure"] == "Minimum":
×
92
                name = "tasmin"
×
93

NEW
94
            table_name = f"{name}_{meta['pas']}"
×
95

96
    if meta["pas"] != cf_table[table_name or name]["frequency"]:
×
97
        raise ValueError("Unexpected frequency.")
×
98

99
    return name, table_name or name
×
100

101

102
cf_units = {"°C": "celsius", "mm": "mm/day"}
3✔
103
cf_frequency = {
3✔
104
    "Fin du pas journalier": "day",
105
    "Instantanée du pas horaire": "1h",
106
    "Fin du pas horaire": "1h",
107
}
108
cf_attrs_names = {"x": "lon", "y": "lat", "z": "elevation", "nom": "site"}
3✔
109

110

111
def extract_daily(path) -> Tuple[dict, pd.DataFrame]:
3✔
112
    """Extract data and metadata from HQ meteo file."""
113

114
    with open(path, encoding="latin1") as fh:
×
115
        txt = fh.read()
×
116
        meta, data = re.split(data_header_pattern, txt, maxsplit=2)
×
117

118
    sections = iter(re.split(section_patterns, meta)[1:])
×
119

120
    m = dict()
×
121
    for sec in sections:
×
122
        if sec in meta_patterns:
×
123
            content = next(sections)
×
124
            for key, pat in meta_patterns[sec].items():
×
125
                match = re.search(pat, content)
×
126
                if match:
×
127
                    m[key] = match.groups()[0]
×
128

129
        elif sec in csv_patterns:
×
130
            content = next(sections).strip()
×
131
            m[sec] = list(
×
132
                csv.reader(content.splitlines(), dialect="unix", delimiter=";")
133
            )
134

135
    d = pd.read_csv(
×
136
        path,
137
        delimiter=";",
138
        skiprows=len(meta.splitlines()),
139
        encoding="latin1",
140
        index_col=0,
141
        parse_dates=True,
142
        infer_datetime_format=True,
143
        decimal=",",
144
    )
145

146
    return m, d
×
147

148

149
def to_cf(
3✔
150
    meta: dict, data: pd.DataFrame, cf_table: Optional[dict] = None
151
) -> xr.DataArray:
152
    """Return CF-compliant metadata."""
153

NEW
154
    if cf_table is None:
×
NEW
155
        cf_table = dict()
×
156

157
    # Convert meta values
158
    m = dict()
×
159
    for key, val in meta.items():
×
NEW
160
        m[key] = converters.get(key, lambda q: q)(val)
×
161

162
    # Get default variable attributes
163
    name, table_name = guess_variable(m, cf_table)
×
164
    attrs = cf_table.get(table_name)
×
165

166
    # Add custom HQ attributes
167
    for key, val in cf_attrs_names.items():
×
168
        if key in m:
×
169
            attrs[val] = m[key]
×
170

171
    x = data["Valeur"].values.astype(float)
×
172

173
    # Convert units
174
    if attrs["units"] != m["unité"]:
×
175
        x = u.convert(x, m["unité"], units2pint(attrs["units"]))
×
176

177
    coords = {k: attrs.pop(k, np.nan) for k in ["lon", "lat", "elevation", "site"]}
×
178
    coords["time"] = data.index.values
×
179
    cf_corrected = xr.DataArray(
×
180
        data=x, dims="time", coords=coords, name=name, attrs=attrs
181
    )
182
    return cf_corrected
×
183

184

185
def open_csv(path: Union[str, Path], cf_table: Optional[dict] = cmor) -> xr.DataArray:
3✔
186
    """Extract daily HQ meteo data and convert to xr.DataArray with CF-Convention attributes."""
187
    meta, data = extract_daily(path)
×
188
    return to_cf(meta, data, cf_table)
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2024 Coveralls, Inc