9607256703

Committed 21 Jun 2024 02:17AM UTC coverage: 100.0%. First build

Build # 9607256703

Build Type

Pull #540

github

Committed by

web-flow

Commit Message

Merge 2cb621cd0 into ec49d5248

Pull Request Pull Request #540: (ready for review) 426 consistent scoring rules

Run Details

281 of 281 branches covered (100.0%)

Branch coverage included in aggregate %.

41 of 41 new or added lines in 2 files covered. (100.0%)

1702 of 1702 relevant lines covered (100.0%)

2.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

100.0

/src/scores/continuous/consistent_impl.py

"""
Implementation of scoring functions that are consistent for
single-valued forecasts targeting quantiles, expectiles or Huber functionals.
"""

from typing import Callable, Optional

import xarray as xr

from scores.functions import apply_weights
from scores.typing import FlexibleDimensionTypes
from scores.utils import gather_dimensions


def consistent_expectile_score(
    fcst: xr.DataArray,
    obs: xr.DataArray,
    *,  # Force keywords arguments to be keyword-only
    alpha: float,
    phi: Callable[[xr.DataArray], xr.DataArray],
    phi_prime: Callable[[xr.DataArray], xr.DataArray],
    reduce_dims: Optional[FlexibleDimensionTypes] = None,
    preserve_dims: Optional[FlexibleDimensionTypes] = None,
    weights: Optional[xr.DataArray] = None,
) -> xr.DataArray:
    """
    Returns the score using a scoring function that is consistent for the
    alpha-expectile functional, based on a supplied convex function phi.
    See Geniting (2011), or Equation (10) from Taggart (2021).

    Args:
        fcst: array of forecast values.
        obs: array of corresponding observation values.
        alpha: expectile level. Must be strictly between 0 and 1.
        phi: a convex function on the real numbers, accepting a single array like argument.
        phi_prime: a subderivative of `phi`, accepting a single array like argument.
        reduce_dims: Optionally specify which dimensions to reduce when
            calculating the consistent expectile score. All other dimensions will be preserved. As a
            special case, 'all' will allow all dimensions to be reduced. Only one
            of `reduce_dims` and `preserve_dims` can be supplied. The default behaviour
            if neither are supplied is to reduce all dims.
        preserve_dims: Optionally specify which dimensions to preserve when calculating
            the consistent quantile score. All other dimensions will be reduced. As a special case, 'all'
            will allow all dimensions to be preserved. In this case, the result will be in
            the same shape/dimensionality as the forecast, and the errors will be the consistent quantile
            score at each point (i.e. single-value comparison against observed), and the
            forecast and observed dimensions must match precisely. Only one of `reduce_dims`
            and `preserve_dims` can be supplied. The default behaviour if neither are supplied
            is to reduce all dims.
        weights: Optionally provide an array for weighted averaging (e.g. by area, by latitude,
            by population, custom)

    Returns:
        array of (mean) scores that is consistent for alpha-expectile functional,
        with the dimensions specified by `dims`. If `dims` is `None`, the returned DataArray will have
        only one entry, the overall mean score.

    Raises:
        ValueError: if `alpha` is not strictly between 0 and 1.

    Note:
        .. math::

            S(x, y) =
            \\begin{cases}
            (1 - \\alpha)(\\phi(y) - \\phi(x) - \\phi'(x)(y-x)), & y < x \\\\
            \\alpha(\\phi(y) - \\phi(x) - \\phi'(x)(y-x)), & x \\leq y
            \\end{cases}

        where

            - :math:`x` is the forecast
            - :math:`y` is the observation
            - :math:`\\alpha` is the expectile level
            - :math:`\\phi` is a convex function of a single variable
            - :math:`\\phi'` is the subderivative of :math:`\\phi`
            - :math:`S(x,y)` is the score.

        Note that if :math:`\\phi` is differentiable then `\\phi'` is its derivative.

    References:
        -   Gneiting, T. (2011). "Making and evaluating point forecasts",
            J. Amer. Statist. Assoc.,
            https://doi.org/10.1198/jasa.2011.r10138
        -   Taggart, R. (2021). "Evaluation of point forecasts for extreme events
            using consistent scoring functions", Q. J. Royal Meteorol. Soc.,
            https://doi.org/10.1002/qj.4206

    """
    check_alpha(alpha)

    if preserve_dims or reduce_dims:
        reduce_dims = gather_dimensions(fcst.dims, obs.dims, reduce_dims=reduce_dims, preserve_dims=preserve_dims)

    score_overfcst = (1 - alpha) * (phi(obs) - phi(fcst) - phi_prime(fcst) * (obs - fcst))
    score_underfcst = alpha * (phi(obs) - phi(fcst) - phi_prime(fcst) * (obs - fcst))
    result = score_overfcst.where(obs < fcst, score_underfcst)
    result = apply_weights(result, weights=weights)
    result = result.mean(dim=reduce_dims)

    return result


def consistent_huber_score(
    fcst: xr.DataArray,
    obs: xr.DataArray,
    *,  # Force keywords arguments to be keyword-only
    huber_param: float,
    phi: Callable[[xr.DataArray], xr.DataArray],
    phi_prime: Callable[[xr.DataArray], xr.DataArray],
    reduce_dims: Optional[FlexibleDimensionTypes] = None,
    preserve_dims: Optional[FlexibleDimensionTypes] = None,
    weights: Optional[xr.DataArray] = None,
) -> xr.DataArray:
    """
    Score that is consistent for the Huber mean functional with tuning parameter `tuning_param`,
    based on convex function phi. See Taggart (2022), or Equation (11) from Taggart (2021).
    See Taggart (2021), end of Section 3.4, for the standard formula.

    Args:
        fcst: array of forecast values.
        obs: array of corresponding observation values.
        huber_param: Huber mean tuning parameter. This corresponds to the transition point between
            linear and quadratic loss for Huber loss. Must be positive.
        phi: a convex function on the real numbers, accepting a single array like argument.
        phi_prime: a subderivative of `phi`, accepting a single array like argument.
        reduce_dims: Optionally specify which dimensions to reduce when
            calculating the consistent Huber score. All other dimensions will be preserved. As a
            special case, 'all' will allow all dimensions to be reduced. Only one
            of `reduce_dims` and `preserve_dims` can be supplied. The default behaviour
            if neither are supplied is to reduce all dims.
        preserve_dims: Optionally specify which dimensions to preserve when calculating
            the consistent Huber score. All other dimensions will be reduced. As a special case, 'all'
            will allow all dimensions to be preserved. In this case, the result will be in
            the same shape/dimensionality as the forecast, and the errors will be the consistent Huber
            score at each point (i.e. single-value comparison against observed), and the
            forecast and observed dimensions must match precisely. Only one of `reduce_dims`
            and `preserve_dims` can be supplied. The default behaviour if neither are supplied
            is to reduce all dims.
        weights: Optionally provide an array for weighted averaging (e.g. by area, by latitude,
            by population, custom)

    Returns:
        array of (mean) scores that is consistent for Huber mean functional,
        with the dimensions specified by `dims`. If `dims` is `None`, the returned DataArray will have
        only one entry, the overall mean score.

    Raises:
       ValueError: if `huber_param <= 0`.

    References:
        -   Taggart, R. (2021). "Evaluation of point forecasts for extreme events
            using consistent scoring functions", Q. J. Royal Meteorol. Soc.,
            https://doi.org/10.1002/qj.4206
        -   Taggart, R. (2022). "Point forecasting and forecast evaluation with
            generalized Huber loss", Electron. J. Statist. 16(1): 201-231.
            https://doi.org/10.1214/21-ejs1957
    """
    check_huber_param(huber_param)
    if preserve_dims or reduce_dims:
        reduce_dims = gather_dimensions(fcst.dims, obs.dims, reduce_dims=reduce_dims, preserve_dims=preserve_dims)

    kappa = (fcst - obs).clip(min=-huber_param, max=huber_param)
    result = 0.5 * (phi(obs) - phi(kappa + obs) + kappa * phi_prime(fcst))
    result = apply_weights(result, weights=weights)
    result = result.mean(dim=reduce_dims)

    return result


def consistent_quantile_score(
    fcst: xr.DataArray,
    obs: xr.DataArray,
    *,  # Force keywords arguments to be keyword-only
    alpha: float,
    g: Callable[[xr.DataArray], xr.DataArray],
    reduce_dims: Optional[FlexibleDimensionTypes] = None,
    preserve_dims: Optional[FlexibleDimensionTypes] = None,
    weights: Optional[xr.DataArray] = None,
) -> xr.DataArray:
    """
    Score that is consistent for the alpha-quantile functional, based on nondecreasing function g.
    See Gneiting (2011), or Equation (8) from Taggart (2022).

    Args:
        fcst: array of forecast values.
        obs: array of corresponding observation values.
        alpha: quantile level. Must be strictly between 0 and 1.
        g: nondecreasing function on the real numbers, accepting a single array like argument.
        reduce_dims: Optionally specify which dimensions to reduce when
            calculating the consistent quantile score. All other dimensions will be preserved. As a
            special case, 'all' will allow all dimensions to be reduced. Only one
            of `reduce_dims` and `preserve_dims` can be supplied. The default behaviour
            if neither are supplied is to reduce all dims.
        preserve_dims: Optionally specify which dimensions to preserve when calculating
            the consistent quantile score. All other dimensions will be reduced. As a special case, 'all'
            will allow all dimensions to be preserved. In this case, the result will be in
            the same shape/dimensionality as the forecast, and the errors will be the consistent quantile
            score at each point (i.e. single-value comparison against observed), and the
            forecast and observed dimensions must match precisely. Only one of `reduce_dims`
            and `preserve_dims` can be supplied. The default behaviour if neither are supplied
            is to reduce all dims.
        weights: Optionally provide an array for weighted averaging (e.g. by area, by latitude,
            by population, custom)

    Returns:
        array of (mean) scores that are consistent for alpha-quantile functional,
        with the dimensions specified by `dims`. If `dims` is `None`, the returned DataArray will have
        only one entry, the overall mean score.

    Raises:
        ValueError: if `alpha` is not strictly between 0 and 1.

    Note:
        .. math::

            S(x, y) =
            \\begin{cases}
            (1 - \\alpha)(g(x) - g(y)), & y < x \\\\
            \\alpha(g(y) - g(x)), & x \\leq y
            \\end{cases}

        where

            - :math:`x` is the forecast
            - :math:`y` is the observation
            - :math:`\\alpha` is the quantile level
            - :math:`g` is a nondecreasing function of a single variable
            - :math:`S(x,y)` is the score.


    References:
        -   Gneiting, T. (2011). "Making and evaluating point forecasts",
            J. Amer. Statist. Assoc.,
            https://doi.org/10.1198/jasa.2011.r10138
        -   Taggart, R. (2021). "Evaluation of point forecasts for extreme events
            using consistent scoring functions", Q. J. Royal Meteorol. Soc.,
            https://doi.org/10.1002/qj.4206
    """
    check_alpha(alpha)
    if preserve_dims or reduce_dims:
        reduce_dims = gather_dimensions(fcst.dims, obs.dims, reduce_dims=reduce_dims, preserve_dims=preserve_dims)

    score_overfcst = (1 - alpha) * (g(fcst) - g(obs))
    score_underfcst = -alpha * (g(fcst) - g(obs))
    result = score_overfcst.where(obs < fcst, score_underfcst)
    result = apply_weights(result, weights=weights)
    result = result.mean(dim=reduce_dims)

    return result


def check_alpha(alpha: float) -> None:
    """Raises if quantile or expectile level `alpha` not in the open interval (0,1)."""
    if alpha <= 0 or alpha >= 1:
        raise ValueError("`alpha` must be strictly between 0 and 1")


def check_huber_param(huber_param: float) -> None:
    """Raises if `huber_param` is not positive."""
    if huber_param <= 0:
        raise ValueError("`huber_param` must be positive")

1	"""
2	Implementation of scoring functions that are consistent for
3	single-valued forecasts targeting quantiles, expectiles or Huber functionals.
4	"""
5
6	from typing import Callable, Optional	2✔
7
8	import xarray as xr	2✔
9
10	from scores.functions import apply_weights	2✔
11	from scores.typing import FlexibleDimensionTypes	2✔
12	from scores.utils import gather_dimensions	2✔
13
14
15	def consistent_expectile_score(	2✔
16	fcst: xr.DataArray,
17	obs: xr.DataArray,
18	*, # Force keywords arguments to be keyword-only
19	alpha: float,
20	phi: Callable[[xr.DataArray], xr.DataArray],
21	phi_prime: Callable[[xr.DataArray], xr.DataArray],
22	reduce_dims: Optional[FlexibleDimensionTypes] = None,
23	preserve_dims: Optional[FlexibleDimensionTypes] = None,
24	weights: Optional[xr.DataArray] = None,
25	) -> xr.DataArray:
26	"""
27	Returns the score using a scoring function that is consistent for the
28	alpha-expectile functional, based on a supplied convex function phi.
29	See Geniting (2011), or Equation (10) from Taggart (2021).
30
31	Args:
32	fcst: array of forecast values.
33	obs: array of corresponding observation values.
34	alpha: expectile level. Must be strictly between 0 and 1.
35	phi: a convex function on the real numbers, accepting a single array like argument.
36	phi_prime: a subderivative of `phi`, accepting a single array like argument.
37	reduce_dims: Optionally specify which dimensions to reduce when
38	calculating the consistent expectile score. All other dimensions will be preserved. As a
39	special case, 'all' will allow all dimensions to be reduced. Only one
40	of `reduce_dims` and `preserve_dims` can be supplied. The default behaviour
41	if neither are supplied is to reduce all dims.
42	preserve_dims: Optionally specify which dimensions to preserve when calculating
43	the consistent quantile score. All other dimensions will be reduced. As a special case, 'all'
44	will allow all dimensions to be preserved. In this case, the result will be in
45	the same shape/dimensionality as the forecast, and the errors will be the consistent quantile
46	score at each point (i.e. single-value comparison against observed), and the
47	forecast and observed dimensions must match precisely. Only one of `reduce_dims`
48	and `preserve_dims` can be supplied. The default behaviour if neither are supplied
49	is to reduce all dims.
50	weights: Optionally provide an array for weighted averaging (e.g. by area, by latitude,
51	by population, custom)
52
53	Returns:
54	array of (mean) scores that is consistent for alpha-expectile functional,
55	with the dimensions specified by `dims`. If `dims` is `None`, the returned DataArray will have
56	only one entry, the overall mean score.
57
58	Raises:
59	ValueError: if `alpha` is not strictly between 0 and 1.
60
61	Note:
62	.. math::
63
64	S(x, y) =
65	\\begin{cases}
66	(1 - \\alpha)(\\phi(y) - \\phi(x) - \\phi'(x)(y-x)), & y < x \\\\
67	\\alpha(\\phi(y) - \\phi(x) - \\phi'(x)(y-x)), & x \\leq y
68	\\end{cases}
69
70	where
71
72	- :math:`x` is the forecast
73	- :math:`y` is the observation
74	- :math:`\\alpha` is the expectile level
75	- :math:`\\phi` is a convex function of a single variable
76	- :math:`\\phi'` is the subderivative of :math:`\\phi`
77	- :math:`S(x,y)` is the score.
78
79	Note that if :math:`\\phi` is differentiable then `\\phi'` is its derivative.
80
81	References:
82	- Gneiting, T. (2011). "Making and evaluating point forecasts",
83	J. Amer. Statist. Assoc.,
84	https://doi.org/10.1198/jasa.2011.r10138
85	- Taggart, R. (2021). "Evaluation of point forecasts for extreme events
86	using consistent scoring functions", Q. J. Royal Meteorol. Soc.,
87	https://doi.org/10.1002/qj.4206
88
89	"""
90	check_alpha(alpha)	2✔
91
92	if preserve_dims or reduce_dims:	2✔
93	reduce_dims = gather_dimensions(fcst.dims, obs.dims, reduce_dims=reduce_dims, preserve_dims=preserve_dims)	2✔
94
95	score_overfcst = (1 - alpha) * (phi(obs) - phi(fcst) - phi_prime(fcst) * (obs - fcst))	2✔
96	score_underfcst = alpha * (phi(obs) - phi(fcst) - phi_prime(fcst) * (obs - fcst))	2✔
97	result = score_overfcst.where(obs < fcst, score_underfcst)	2✔
98	result = apply_weights(result, weights=weights)	2✔
99	result = result.mean(dim=reduce_dims)	2✔
100
101	return result	2✔
102
103
104	def consistent_huber_score(	2✔
105	fcst: xr.DataArray,
106	obs: xr.DataArray,
107	*, # Force keywords arguments to be keyword-only
108	huber_param: float,
109	phi: Callable[[xr.DataArray], xr.DataArray],
110	phi_prime: Callable[[xr.DataArray], xr.DataArray],
111	reduce_dims: Optional[FlexibleDimensionTypes] = None,
112	preserve_dims: Optional[FlexibleDimensionTypes] = None,
113	weights: Optional[xr.DataArray] = None,
114	) -> xr.DataArray:
115	"""
116	Score that is consistent for the Huber mean functional with tuning parameter `tuning_param`,
117	based on convex function phi. See Taggart (2022), or Equation (11) from Taggart (2021).
118	See Taggart (2021), end of Section 3.4, for the standard formula.
119
120	Args:
121	fcst: array of forecast values.
122	obs: array of corresponding observation values.
123	huber_param: Huber mean tuning parameter. This corresponds to the transition point between
124	linear and quadratic loss for Huber loss. Must be positive.
125	phi: a convex function on the real numbers, accepting a single array like argument.
126	phi_prime: a subderivative of `phi`, accepting a single array like argument.
127	reduce_dims: Optionally specify which dimensions to reduce when
128	calculating the consistent Huber score. All other dimensions will be preserved. As a
129	special case, 'all' will allow all dimensions to be reduced. Only one
130	of `reduce_dims` and `preserve_dims` can be supplied. The default behaviour
131	if neither are supplied is to reduce all dims.
132	preserve_dims: Optionally specify which dimensions to preserve when calculating
133	the consistent Huber score. All other dimensions will be reduced. As a special case, 'all'
134	will allow all dimensions to be preserved. In this case, the result will be in
135	the same shape/dimensionality as the forecast, and the errors will be the consistent Huber
136	score at each point (i.e. single-value comparison against observed), and the
137	forecast and observed dimensions must match precisely. Only one of `reduce_dims`
138	and `preserve_dims` can be supplied. The default behaviour if neither are supplied
139	is to reduce all dims.
140	weights: Optionally provide an array for weighted averaging (e.g. by area, by latitude,
141	by population, custom)
142
143	Returns:
144	array of (mean) scores that is consistent for Huber mean functional,
145	with the dimensions specified by `dims`. If `dims` is `None`, the returned DataArray will have
146	only one entry, the overall mean score.
147
148	Raises:
149	ValueError: if `huber_param <= 0`.
150
151	References:
152	- Taggart, R. (2021). "Evaluation of point forecasts for extreme events
153	using consistent scoring functions", Q. J. Royal Meteorol. Soc.,
154	https://doi.org/10.1002/qj.4206
155	- Taggart, R. (2022). "Point forecasting and forecast evaluation with
156	generalized Huber loss", Electron. J. Statist. 16(1): 201-231.
157	https://doi.org/10.1214/21-ejs1957
158	"""
159	check_huber_param(huber_param)	2✔
160	if preserve_dims or reduce_dims:	2✔
161	reduce_dims = gather_dimensions(fcst.dims, obs.dims, reduce_dims=reduce_dims, preserve_dims=preserve_dims)	2✔
162
163	kappa = (fcst - obs).clip(min=-huber_param, max=huber_param)	2✔
164	result = 0.5 * (phi(obs) - phi(kappa + obs) + kappa * phi_prime(fcst))	2✔
165	result = apply_weights(result, weights=weights)	2✔
166	result = result.mean(dim=reduce_dims)	2✔
167
168	return result	2✔
169
170
171	def consistent_quantile_score(	2✔
172	fcst: xr.DataArray,
173	obs: xr.DataArray,
174	*, # Force keywords arguments to be keyword-only
175	alpha: float,
176	g: Callable[[xr.DataArray], xr.DataArray],
177	reduce_dims: Optional[FlexibleDimensionTypes] = None,
178	preserve_dims: Optional[FlexibleDimensionTypes] = None,
179	weights: Optional[xr.DataArray] = None,
180	) -> xr.DataArray:
181	"""
182	Score that is consistent for the alpha-quantile functional, based on nondecreasing function g.
183	See Gneiting (2011), or Equation (8) from Taggart (2022).
184
185	Args:
186	fcst: array of forecast values.
187	obs: array of corresponding observation values.
188	alpha: quantile level. Must be strictly between 0 and 1.
189	g: nondecreasing function on the real numbers, accepting a single array like argument.
190	reduce_dims: Optionally specify which dimensions to reduce when
191	calculating the consistent quantile score. All other dimensions will be preserved. As a
192	special case, 'all' will allow all dimensions to be reduced. Only one
193	of `reduce_dims` and `preserve_dims` can be supplied. The default behaviour
194	if neither are supplied is to reduce all dims.
195	preserve_dims: Optionally specify which dimensions to preserve when calculating
196	the consistent quantile score. All other dimensions will be reduced. As a special case, 'all'
197	will allow all dimensions to be preserved. In this case, the result will be in
198	the same shape/dimensionality as the forecast, and the errors will be the consistent quantile
199	score at each point (i.e. single-value comparison against observed), and the
200	forecast and observed dimensions must match precisely. Only one of `reduce_dims`
201	and `preserve_dims` can be supplied. The default behaviour if neither are supplied
202	is to reduce all dims.
203	weights: Optionally provide an array for weighted averaging (e.g. by area, by latitude,
204	by population, custom)
205
206	Returns:
207	array of (mean) scores that are consistent for alpha-quantile functional,
208	with the dimensions specified by `dims`. If `dims` is `None`, the returned DataArray will have
209	only one entry, the overall mean score.
210
211	Raises:
212	ValueError: if `alpha` is not strictly between 0 and 1.
213
214	Note:
215	.. math::
216
217	S(x, y) =
218	\\begin{cases}
219	(1 - \\alpha)(g(x) - g(y)), & y < x \\\\
220	\\alpha(g(y) - g(x)), & x \\leq y
221	\\end{cases}
222
223	where
224
225	- :math:`x` is the forecast
226	- :math:`y` is the observation
227	- :math:`\\alpha` is the quantile level
228	- :math:`g` is a nondecreasing function of a single variable
229	- :math:`S(x,y)` is the score.
230
231
232	References:
233	- Gneiting, T. (2011). "Making and evaluating point forecasts",
234	J. Amer. Statist. Assoc.,
235	https://doi.org/10.1198/jasa.2011.r10138
236	- Taggart, R. (2021). "Evaluation of point forecasts for extreme events
237	using consistent scoring functions", Q. J. Royal Meteorol. Soc.,
238	https://doi.org/10.1002/qj.4206
239	"""
240	check_alpha(alpha)	2✔
241	if preserve_dims or reduce_dims:	2✔
242	reduce_dims = gather_dimensions(fcst.dims, obs.dims, reduce_dims=reduce_dims, preserve_dims=preserve_dims)	2✔
243
244	score_overfcst = (1 - alpha) * (g(fcst) - g(obs))	2✔
245	score_underfcst = -alpha * (g(fcst) - g(obs))	2✔
246	result = score_overfcst.where(obs < fcst, score_underfcst)	2✔
247	result = apply_weights(result, weights=weights)	2✔
248	result = result.mean(dim=reduce_dims)	2✔
249
250	return result	2✔
251
252
253	def check_alpha(alpha: float) -> None:	2✔
254	"""Raises if quantile or expectile level `alpha` not in the open interval (0,1)."""
255	if alpha <= 0 or alpha >= 1:	2✔
256	raise ValueError("`alpha` must be strictly between 0 and 1")	2✔
257
258
259	def check_huber_param(huber_param: float) -> None:	2✔
260	"""Raises if `huber_param` is not positive."""
261	if huber_param <= 0:	2✔
262	raise ValueError("`huber_param` must be positive")	2✔

nci / scores / 9607256703

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous