2197825867

Build # 2197825867

Build Type

github

Committed by GitHub

Commit Message

Merge d0dcd4e6c into d55f76503

Pull Request Pull Request #33: Support CORDEX and CMIP5/6

Run Details

2 of 25 new or added lines in 5 files covered. (8.0%)

567 existing lines in 15 files now uncovered.

737 of 3285 relevant lines covered (22.44%)

0.67 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

32.08

/miranda/convert/deh.py

import json
import logging.config
import re
from pathlib import Path
from typing import Optional, Tuple, Union

import pandas as pd
import xarray as xr

from miranda.scripting import LOGGING_CONFIG
from miranda.units import u

logging.config.dictConfig(LOGGING_CONFIG)

__all__ = ["open_txt"]

# CMOR-like attributes
cmor = json.load(open(Path(__file__).parent / "data" / "deh_cf_attrs.json"))[  # noqa
    "variable_entry"
]

# TODO: Some potentially useful attributes were skipped, because they would be complicated to include in a dataset since they vary per station
meta_patterns = {
    "Station: ": "name",
    "Bassin versant: ": "bv",
    "Coordonnées: (NAD83) ": "coords",
}

data_header_pattern = "Station Date Débit (m³/s) Remarque\n"


def extract_daily(path) -> Tuple[dict, pd.DataFrame]:
    """Extract data and metadata from DEH (MELCC) stream flow file."""

    with open(path, encoding="latin1") as fh:
        txt = fh.read()
        txt = re.sub(" +", " ", txt)
        meta, data = txt.split(data_header_pattern)

    m = dict()
    for key in meta_patterns:
        # Various possible separators to take into account
        m[meta_patterns[key]] = (
            meta.split(key)[1].split(" \n")[0].split("\n")[0].split(" Régime")[0]
        )

    d = pd.read_csv(
        path,
        delimiter=r"\s+",
        skiprows=len(meta.splitlines()),
        encoding="latin1",
        converters={0: lambda x: str(x)},  # noqa
        index_col=1,
        parse_dates=True,
        infer_datetime_format=True,
    )
    if len(d["Station"].unique()) == 1:
        m["station"] = d["Station"].unique()[0]
        d = d.drop("Station", axis=1)
    else:
        raise ValueError("Multiple stations detected in the same file.")
    d = d.rename(columns={"Remarque": "Nan", "(m³/s)": "Remarque"})
    d.index.names = ["time"]
    d = d.drop("Nan", axis=1)

    return m, d


def to_cf(meta: dict, data: pd.DataFrame, cf_table: Optional[dict] = {}) -> xr.Dataset:
    """Return CF-compliant metadata."""
    ds = xr.Dataset()

    ds["q"] = xr.DataArray(data["Débit"], attrs=cf_table["q"])
    ds["flag"] = xr.DataArray(data["Remarque"], attrs=cf_table["flag"])

    ds["name"] = xr.DataArray(meta["name"])
    ds["station_id"] = xr.DataArray(meta["station"])

    ds["area"] = xr.DataArray(
        u.convert(float(meta["bv"].split(" ")[0]), meta["bv"].split(" ")[1], "km²"),
        attrs={"long_name": "drainage area", "units": "km2"},
    )

    def parse_dms(coord):
        deg, minutes, seconds, _ = re.split("[°'\"]", coord)
        if float(deg) > 0:
            return round(
                float(deg) + float(minutes) / 60 + float(seconds) / (60 * 60), 6
            )
        return round(float(deg) - (float(minutes) / 60 + float(seconds) / (60 * 60)), 6)

    coords = meta["coords"].split(" // ")
    ds["lat"] = xr.DataArray(
        parse_dms(coords[0]),
        attrs={
            "standard_name": "latitude",
            "long_name": "latitude",
            "units": "decimal_degrees",
        },
    )
    ds["lon"] = xr.DataArray(
        parse_dms(coords[1]),
        attrs={
            "standard_name": "longitude",
            "long_name": "longitude",
            "units": "decimal_degrees",
        },
    )

    ds.attrs[
        "institution"
    ] = "Ministère de l'Environnement et de la Lutte contre les changements climatiques"
    ds.attrs[
        "source"
    ] = "Hydrometric data <https://www.cehq.gouv.qc.ca/hydrometrie/historique_donnees/index.asp>"
    ds.attrs["redistribution"] = "Redistribution policy unknown. For internal use only."

    return ds


def open_txt(path: Union[str, Path], cf_table: Optional[dict] = cmor) -> xr.Dataset:
    """Extract daily HQ meteorological data and convert to xr.DataArray with CF-Convention attributes."""
    meta, data = extract_daily(path)
    return to_cf(meta, data, cf_table)

1	import json	3✔
2	import logging.config	3✔
3	import re	3✔
4	from pathlib import Path	3✔
5	from typing import Optional, Tuple, Union	3✔
6
7	import pandas as pd	3✔
8	import xarray as xr	3✔
9
10	from miranda.scripting import LOGGING_CONFIG	3✔
11	from miranda.units import u	3✔
12
13	logging.config.dictConfig(LOGGING_CONFIG)	3✔
14
15	__all__ = ["open_txt"]	3✔
16
17	# CMOR-like attributes
18	cmor = json.load(open(Path(__file__).parent / "data" / "deh_cf_attrs.json"))[ # noqa	3✔
19	"variable_entry"
20	]
21
22	# TODO: Some potentially useful attributes were skipped, because they would be complicated to include in a dataset since they vary per station
23	meta_patterns = {	3✔
24	"Station: ": "name",
25	"Bassin versant: ": "bv",
26	"Coordonnées: (NAD83) ": "coords",
27	}
28
29	data_header_pattern = "Station Date Débit (m³/s) Remarque\n"	3✔
30
31
32	def extract_daily(path) -> Tuple[dict, pd.DataFrame]:	3✔
33	"""Extract data and metadata from DEH (MELCC) stream flow file."""
34
UNCOV 35	with open(path, encoding="latin1") as fh:	×
36	txt = fh.read()	×
37	txt = re.sub(" +", " ", txt)	×
38	meta, data = txt.split(data_header_pattern)	×
39
UNCOV 40	m = dict()	×
41	for key in meta_patterns:	×
42	# Various possible separators to take into account
UNCOV 43	m[meta_patterns[key]] = (	×
44	meta.split(key)[1].split(" \n")[0].split("\n")[0].split(" Régime")[0]
45	)
46
UNCOV 47	d = pd.read_csv(	×
48	path,
49	delimiter=r"\s+",
50	skiprows=len(meta.splitlines()),
51	encoding="latin1",
52	converters={0: lambda x: str(x)}, # noqa
53	index_col=1,
54	parse_dates=True,
55	infer_datetime_format=True,
56	)
UNCOV 57	if len(d["Station"].unique()) == 1:	×
58	m["station"] = d["Station"].unique()[0]	×
59	d = d.drop("Station", axis=1)	×
60	else:
UNCOV 61	raise ValueError("Multiple stations detected in the same file.")	×
62	d = d.rename(columns={"Remarque": "Nan", "(m³/s)": "Remarque"})	×
63	d.index.names = ["time"]	×
64	d = d.drop("Nan", axis=1)	×
65
UNCOV 66	return m, d	×
67
68
69	def to_cf(meta: dict, data: pd.DataFrame, cf_table: Optional[dict] = {}) -> xr.Dataset:	3✔
70	"""Return CF-compliant metadata."""
UNCOV 71	ds = xr.Dataset()	×
72
UNCOV 73	ds["q"] = xr.DataArray(data["Débit"], attrs=cf_table["q"])	×
74	ds["flag"] = xr.DataArray(data["Remarque"], attrs=cf_table["flag"])	×
75
UNCOV 76	ds["name"] = xr.DataArray(meta["name"])	×
77	ds["station_id"] = xr.DataArray(meta["station"])	×
78
UNCOV 79	ds["area"] = xr.DataArray(	×
80	u.convert(float(meta["bv"].split(" ")[0]), meta["bv"].split(" ")[1], "km²"),
81	attrs={"long_name": "drainage area", "units": "km2"},
82	)
83
UNCOV 84	def parse_dms(coord):	×
85	deg, minutes, seconds, _ = re.split("[°'\"]", coord)	×
86	if float(deg) > 0:	×
87	return round(	×
88	float(deg) + float(minutes) / 60 + float(seconds) / (60 * 60), 6
89	)
UNCOV 90	return round(float(deg) - (float(minutes) / 60 + float(seconds) / (60 * 60)), 6)	×
91
UNCOV 92	coords = meta["coords"].split(" // ")	×
93	ds["lat"] = xr.DataArray(	×
94	parse_dms(coords[0]),
95	attrs={
96	"standard_name": "latitude",
97	"long_name": "latitude",
98	"units": "decimal_degrees",
99	},
100	)
UNCOV 101	ds["lon"] = xr.DataArray(	×
102	parse_dms(coords[1]),
103	attrs={
104	"standard_name": "longitude",
105	"long_name": "longitude",
106	"units": "decimal_degrees",
107	},
108	)
109
UNCOV 110	ds.attrs[	×
111	"institution"
112	] = "Ministère de l'Environnement et de la Lutte contre les changements climatiques"
UNCOV 113	ds.attrs[	×
114	"source"
115	] = "Hydrometric data <https://www.cehq.gouv.qc.ca/hydrometrie/historique_donnees/index.asp>"
UNCOV 116	ds.attrs["redistribution"] = "Redistribution policy unknown. For internal use only."	×
117
UNCOV 118	return ds	×
119
120
121	def open_txt(path: Union[str, Path], cf_table: Optional[dict] = cmor) -> xr.Dataset:	3✔
122	"""Extract daily HQ meteorological data and convert to xr.DataArray with CF-Convention attributes."""
UNCOV 123	meta, data = extract_daily(path)	×
124	return to_cf(meta, data, cf_table)	×

Ouranosinc / miranda / 2197825867

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous