• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Ouranosinc / miranda / 2197825867

pending completion
2197825867

Pull #33

github

GitHub
Merge d0dcd4e6c into d55f76503
Pull Request #33: Support CORDEX and CMIP5/6

2 of 25 new or added lines in 5 files covered. (8.0%)

567 existing lines in 15 files now uncovered.

737 of 3285 relevant lines covered (22.44%)

0.67 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

32.08
/miranda/convert/deh.py
1
import json
3✔
2
import logging.config
3✔
3
import re
3✔
4
from pathlib import Path
3✔
5
from typing import Optional, Tuple, Union
3✔
6

7
import pandas as pd
3✔
8
import xarray as xr
3✔
9

10
from miranda.scripting import LOGGING_CONFIG
3✔
11
from miranda.units import u
3✔
12

13
logging.config.dictConfig(LOGGING_CONFIG)
3✔
14

15
__all__ = ["open_txt"]
3✔
16

17
# CMOR-like attributes
18
cmor = json.load(open(Path(__file__).parent / "data" / "deh_cf_attrs.json"))[  # noqa
3✔
19
    "variable_entry"
20
]
21

22
# TODO: Some potentially useful attributes were skipped, because they would be complicated to include in a dataset since they vary per station
23
meta_patterns = {
3✔
24
    "Station: ": "name",
25
    "Bassin versant: ": "bv",
26
    "Coordonnées: (NAD83) ": "coords",
27
}
28

29
data_header_pattern = "Station Date Débit (m³/s) Remarque\n"
3✔
30

31

32
def extract_daily(path) -> Tuple[dict, pd.DataFrame]:
3✔
33
    """Extract data and metadata from DEH (MELCC) stream flow file."""
34

UNCOV
35
    with open(path, encoding="latin1") as fh:
×
36
        txt = fh.read()
×
37
        txt = re.sub(" +", " ", txt)
×
38
        meta, data = txt.split(data_header_pattern)
×
39

UNCOV
40
    m = dict()
×
41
    for key in meta_patterns:
×
42
        # Various possible separators to take into account
UNCOV
43
        m[meta_patterns[key]] = (
×
44
            meta.split(key)[1].split(" \n")[0].split("\n")[0].split(" Régime")[0]
45
        )
46

UNCOV
47
    d = pd.read_csv(
×
48
        path,
49
        delimiter=r"\s+",
50
        skiprows=len(meta.splitlines()),
51
        encoding="latin1",
52
        converters={0: lambda x: str(x)},  # noqa
53
        index_col=1,
54
        parse_dates=True,
55
        infer_datetime_format=True,
56
    )
UNCOV
57
    if len(d["Station"].unique()) == 1:
×
58
        m["station"] = d["Station"].unique()[0]
×
59
        d = d.drop("Station", axis=1)
×
60
    else:
UNCOV
61
        raise ValueError("Multiple stations detected in the same file.")
×
62
    d = d.rename(columns={"Remarque": "Nan", "(m³/s)": "Remarque"})
×
63
    d.index.names = ["time"]
×
64
    d = d.drop("Nan", axis=1)
×
65

UNCOV
66
    return m, d
×
67

68

69
def to_cf(meta: dict, data: pd.DataFrame, cf_table: Optional[dict] = {}) -> xr.Dataset:
3✔
70
    """Return CF-compliant metadata."""
UNCOV
71
    ds = xr.Dataset()
×
72

UNCOV
73
    ds["q"] = xr.DataArray(data["Débit"], attrs=cf_table["q"])
×
74
    ds["flag"] = xr.DataArray(data["Remarque"], attrs=cf_table["flag"])
×
75

UNCOV
76
    ds["name"] = xr.DataArray(meta["name"])
×
77
    ds["station_id"] = xr.DataArray(meta["station"])
×
78

UNCOV
79
    ds["area"] = xr.DataArray(
×
80
        u.convert(float(meta["bv"].split(" ")[0]), meta["bv"].split(" ")[1], "km²"),
81
        attrs={"long_name": "drainage area", "units": "km2"},
82
    )
83

UNCOV
84
    def parse_dms(coord):
×
85
        deg, minutes, seconds, _ = re.split("[°'\"]", coord)
×
86
        if float(deg) > 0:
×
87
            return round(
×
88
                float(deg) + float(minutes) / 60 + float(seconds) / (60 * 60), 6
89
            )
UNCOV
90
        return round(float(deg) - (float(minutes) / 60 + float(seconds) / (60 * 60)), 6)
×
91

UNCOV
92
    coords = meta["coords"].split(" // ")
×
93
    ds["lat"] = xr.DataArray(
×
94
        parse_dms(coords[0]),
95
        attrs={
96
            "standard_name": "latitude",
97
            "long_name": "latitude",
98
            "units": "decimal_degrees",
99
        },
100
    )
UNCOV
101
    ds["lon"] = xr.DataArray(
×
102
        parse_dms(coords[1]),
103
        attrs={
104
            "standard_name": "longitude",
105
            "long_name": "longitude",
106
            "units": "decimal_degrees",
107
        },
108
    )
109

UNCOV
110
    ds.attrs[
×
111
        "institution"
112
    ] = "Ministère de l'Environnement et de la Lutte contre les changements climatiques"
UNCOV
113
    ds.attrs[
×
114
        "source"
115
    ] = "Hydrometric data <https://www.cehq.gouv.qc.ca/hydrometrie/historique_donnees/index.asp>"
UNCOV
116
    ds.attrs["redistribution"] = "Redistribution policy unknown. For internal use only."
×
117

UNCOV
118
    return ds
×
119

120

121
def open_txt(path: Union[str, Path], cf_table: Optional[dict] = cmor) -> xr.Dataset:
3✔
122
    """Extract daily HQ meteorological data and convert to xr.DataArray with CF-Convention attributes."""
UNCOV
123
    meta, data = extract_daily(path)
×
124
    return to_cf(meta, data, cf_table)
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2024 Coveralls, Inc