11455483170

Committed 22 Oct 2024 07:38AM UTC coverage: 81.626%. Remained the same

Build # 11455483170

Build Type

push

github

Committed by

davidwilby

Commit Message

update pre-commit ruff version

Run Details

2048 of 2509 relevant lines covered (81.63%)

1.63 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.63

/deepsensor/model/defaults.py

from deepsensor.data.loader import TaskLoader

import numpy as np
import pandas as pd
import xarray as xr

from deepsensor.data.utils import (
    compute_xarray_data_resolution,
    compute_pandas_data_resolution,
)

from typing import List


def compute_greatest_data_density(task_loader: TaskLoader) -> int:
    """Computes data-informed settings for the model's internal grid density (ppu,
    points per unit).

    Loops over all context and target variables in the ``TaskLoader`` and
    computes the data resolution for each. The model ppu is then set to the
    maximum data ppu.

    Args:
        task_loader (:class:`~.data.loader.TaskLoader`):
            TaskLoader object containing context and target sets.

    Returns:
        max_density (int):
            The maximum data density (ppu) across all context and target
            variables, where 'density' is the number of points per unit of
            input space (in both spatial dimensions).
    """
    # List of data resolutions for each context/target variable (in points-per-unit)
    data_densities = []
    for var in [*task_loader.context, *task_loader.target]:
        if isinstance(var, (xr.DataArray, xr.Dataset)):
            # Gridded variable: use data resolution
            data_resolution = compute_xarray_data_resolution(var)
        elif isinstance(var, (pd.DataFrame, pd.Series)):
            # Point-based variable: calculate density based on pairwise distances between observations
            data_resolution = compute_pandas_data_resolution(
                var, n_times=1000, percentile=5
            )
        else:
            raise ValueError(f"Unknown context input type: {type(var)}")
        data_density = int(1 / data_resolution)
        data_densities.append(data_density)
    max_density = int(max(data_densities))
    return max_density


def gen_decoder_scale(model_ppu: int) -> float:
    """Computes informed setting for the decoder SetConv scale.

    This sets the length scale of the Gaussian basis functions used interpolate
    from the model's internal grid to the target locations.

    The decoder scale should be as small as possible given the model's
    internal grid. The value chosen is 1 / model_ppu (i.e. the length scale is
    equal to the model's internal grid spacing).

    Args:
        model_ppu (int):
            Model ppu (points per unit), i.e. the number of points per unit of
            input space.

    Returns:
        float: Decoder scale.
    """
    return 1 / model_ppu


def gen_encoder_scales(model_ppu: int, task_loader: TaskLoader) -> List[float]:
    """Computes data-informed settings for the encoder SetConv scale for each
    context set.

    This sets the length scale of the Gaussian basis functions used to encode
    the context sets.

    For off-grid station data, the scale should be as small as possible given
    the model's internal grid density (ppu, points per unit). The value chosen
    is 0.5 / model_ppu (i.e. half the model's internal resolution).

    For gridded data, the scale should be such that the functional
    representation smoothly interpolates the data. This is determined by
    computing the *data resolution* (the distance between the nearest two data
    points) for each context variable. The encoder scale is then set to 0.5 *
    data_resolution.

    Args:
        model_ppu (int):
            Model ppu (points per unit), i.e. the number of points per unit of
            input space.
        task_loader (:class:`~.data.loader.TaskLoader`):
            TaskLoader object containing context and target sets.

    Returns:
        list[float]: List of encoder scales for each context set.
    """
    encoder_scales = []
    for var in task_loader.context:
        if isinstance(var, (xr.DataArray, xr.Dataset)):
            encoder_scale = 0.5 * compute_xarray_data_resolution(var)
        elif isinstance(var, (pd.DataFrame, pd.Series)):
            encoder_scale = 0.5 / model_ppu
        else:
            raise ValueError(f"Unknown context input type: {type(var)}")
        encoder_scales.append(encoder_scale)

    if task_loader.aux_at_contexts:
        # Add encoder scale for the final auxiliary-at-contexts context set: use smallest possible
        # scale within model discretisation
        encoder_scales.append(0.5 / model_ppu)

    return encoder_scales

1	from deepsensor.data.loader import TaskLoader	2✔
2
3	import numpy as np	2✔
4	import pandas as pd	2✔
5	import xarray as xr	2✔
6
7	from deepsensor.data.utils import (	2✔
8	compute_xarray_data_resolution,
9	compute_pandas_data_resolution,
10	)
11
12	from typing import List	2✔
13
14
15	def compute_greatest_data_density(task_loader: TaskLoader) -> int:	2✔
16	"""Computes data-informed settings for the model's internal grid density (ppu,
17	points per unit).
18
19	Loops over all context and target variables in the ``TaskLoader`` and
20	computes the data resolution for each. The model ppu is then set to the
21	maximum data ppu.
22
23	Args:
24	task_loader (:class:`~.data.loader.TaskLoader`):
25	TaskLoader object containing context and target sets.
26
27	Returns:
28	max_density (int):
29	The maximum data density (ppu) across all context and target
30	variables, where 'density' is the number of points per unit of
31	input space (in both spatial dimensions).
32	"""
33	# List of data resolutions for each context/target variable (in points-per-unit)
34	data_densities = []	2✔
35	for var in [task_loader.context, task_loader.target]:	2✔
36	if isinstance(var, (xr.DataArray, xr.Dataset)):	2✔
37	# Gridded variable: use data resolution
38	data_resolution = compute_xarray_data_resolution(var)	2✔
39	elif isinstance(var, (pd.DataFrame, pd.Series)):	2✔
40	# Point-based variable: calculate density based on pairwise distances between observations
41	data_resolution = compute_pandas_data_resolution(	2✔
42	var, n_times=1000, percentile=5
43	)
44	else:
45	raise ValueError(f"Unknown context input type: {type(var)}")	×
46	data_density = int(1 / data_resolution)	2✔
47	data_densities.append(data_density)	2✔
48	max_density = int(max(data_densities))	2✔
49	return max_density	2✔
50
51
52	def gen_decoder_scale(model_ppu: int) -> float:	2✔
53	"""Computes informed setting for the decoder SetConv scale.
54
55	This sets the length scale of the Gaussian basis functions used interpolate
56	from the model's internal grid to the target locations.
57
58	The decoder scale should be as small as possible given the model's
59	internal grid. The value chosen is 1 / model_ppu (i.e. the length scale is
60	equal to the model's internal grid spacing).
61
62	Args:
63	model_ppu (int):
64	Model ppu (points per unit), i.e. the number of points per unit of
65	input space.
66
67	Returns:
68	float: Decoder scale.
69	"""
70	return 1 / model_ppu	2✔
71
72
73	def gen_encoder_scales(model_ppu: int, task_loader: TaskLoader) -> List[float]:	2✔
74	"""Computes data-informed settings for the encoder SetConv scale for each
75	context set.
76
77	This sets the length scale of the Gaussian basis functions used to encode
78	the context sets.
79
80	For off-grid station data, the scale should be as small as possible given
81	the model's internal grid density (ppu, points per unit). The value chosen
82	is 0.5 / model_ppu (i.e. half the model's internal resolution).
83
84	For gridded data, the scale should be such that the functional
85	representation smoothly interpolates the data. This is determined by
86	computing the data resolution (the distance between the nearest two data
87	points) for each context variable. The encoder scale is then set to 0.5 *
88	data_resolution.
89
90	Args:
91	model_ppu (int):
92	Model ppu (points per unit), i.e. the number of points per unit of
93	input space.
94	task_loader (:class:`~.data.loader.TaskLoader`):
95	TaskLoader object containing context and target sets.
96
97	Returns:
98	list[float]: List of encoder scales for each context set.
99	"""
100	encoder_scales = []	2✔
101	for var in task_loader.context:	2✔
102	if isinstance(var, (xr.DataArray, xr.Dataset)):	2✔
103	encoder_scale = 0.5 * compute_xarray_data_resolution(var)	2✔
104	elif isinstance(var, (pd.DataFrame, pd.Series)):	2✔
105	encoder_scale = 0.5 / model_ppu	2✔
106	else:
107	raise ValueError(f"Unknown context input type: {type(var)}")	×
108	encoder_scales.append(encoder_scale)	2✔
109
110	if task_loader.aux_at_contexts:	2✔
111	# Add encoder scale for the final auxiliary-at-contexts context set: use smallest possible
112	# scale within model discretisation
113	encoder_scales.append(0.5 / model_ppu)	×
114
115	return encoder_scales	2✔

alan-turing-institute / deepsensor / 11455483170

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous