• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

alan-turing-institute / deepsensor / 11455483170

22 Oct 2024 07:38AM UTC coverage: 81.626%. Remained the same
11455483170

push

github

davidwilby
update pre-commit ruff version

2048 of 2509 relevant lines covered (81.63%)

1.63 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.63
/deepsensor/model/defaults.py
1
from deepsensor.data.loader import TaskLoader
2✔
2

3
import numpy as np
2✔
4
import pandas as pd
2✔
5
import xarray as xr
2✔
6

7
from deepsensor.data.utils import (
2✔
8
    compute_xarray_data_resolution,
9
    compute_pandas_data_resolution,
10
)
11

12
from typing import List
2✔
13

14

15
def compute_greatest_data_density(task_loader: TaskLoader) -> int:
2✔
16
    """Computes data-informed settings for the model's internal grid density (ppu,
17
    points per unit).
18

19
    Loops over all context and target variables in the ``TaskLoader`` and
20
    computes the data resolution for each. The model ppu is then set to the
21
    maximum data ppu.
22

23
    Args:
24
        task_loader (:class:`~.data.loader.TaskLoader`):
25
            TaskLoader object containing context and target sets.
26

27
    Returns:
28
        max_density (int):
29
            The maximum data density (ppu) across all context and target
30
            variables, where 'density' is the number of points per unit of
31
            input space (in both spatial dimensions).
32
    """
33
    # List of data resolutions for each context/target variable (in points-per-unit)
34
    data_densities = []
2✔
35
    for var in [*task_loader.context, *task_loader.target]:
2✔
36
        if isinstance(var, (xr.DataArray, xr.Dataset)):
2✔
37
            # Gridded variable: use data resolution
38
            data_resolution = compute_xarray_data_resolution(var)
2✔
39
        elif isinstance(var, (pd.DataFrame, pd.Series)):
2✔
40
            # Point-based variable: calculate density based on pairwise distances between observations
41
            data_resolution = compute_pandas_data_resolution(
2✔
42
                var, n_times=1000, percentile=5
43
            )
44
        else:
45
            raise ValueError(f"Unknown context input type: {type(var)}")
×
46
        data_density = int(1 / data_resolution)
2✔
47
        data_densities.append(data_density)
2✔
48
    max_density = int(max(data_densities))
2✔
49
    return max_density
2✔
50

51

52
def gen_decoder_scale(model_ppu: int) -> float:
2✔
53
    """Computes informed setting for the decoder SetConv scale.
54

55
    This sets the length scale of the Gaussian basis functions used interpolate
56
    from the model's internal grid to the target locations.
57

58
    The decoder scale should be as small as possible given the model's
59
    internal grid. The value chosen is 1 / model_ppu (i.e. the length scale is
60
    equal to the model's internal grid spacing).
61

62
    Args:
63
        model_ppu (int):
64
            Model ppu (points per unit), i.e. the number of points per unit of
65
            input space.
66

67
    Returns:
68
        float: Decoder scale.
69
    """
70
    return 1 / model_ppu
2✔
71

72

73
def gen_encoder_scales(model_ppu: int, task_loader: TaskLoader) -> List[float]:
2✔
74
    """Computes data-informed settings for the encoder SetConv scale for each
75
    context set.
76

77
    This sets the length scale of the Gaussian basis functions used to encode
78
    the context sets.
79

80
    For off-grid station data, the scale should be as small as possible given
81
    the model's internal grid density (ppu, points per unit). The value chosen
82
    is 0.5 / model_ppu (i.e. half the model's internal resolution).
83

84
    For gridded data, the scale should be such that the functional
85
    representation smoothly interpolates the data. This is determined by
86
    computing the *data resolution* (the distance between the nearest two data
87
    points) for each context variable. The encoder scale is then set to 0.5 *
88
    data_resolution.
89

90
    Args:
91
        model_ppu (int):
92
            Model ppu (points per unit), i.e. the number of points per unit of
93
            input space.
94
        task_loader (:class:`~.data.loader.TaskLoader`):
95
            TaskLoader object containing context and target sets.
96

97
    Returns:
98
        list[float]: List of encoder scales for each context set.
99
    """
100
    encoder_scales = []
2✔
101
    for var in task_loader.context:
2✔
102
        if isinstance(var, (xr.DataArray, xr.Dataset)):
2✔
103
            encoder_scale = 0.5 * compute_xarray_data_resolution(var)
2✔
104
        elif isinstance(var, (pd.DataFrame, pd.Series)):
2✔
105
            encoder_scale = 0.5 / model_ppu
2✔
106
        else:
107
            raise ValueError(f"Unknown context input type: {type(var)}")
×
108
        encoder_scales.append(encoder_scale)
2✔
109

110
    if task_loader.aux_at_contexts:
2✔
111
        # Add encoder scale for the final auxiliary-at-contexts context set: use smallest possible
112
        # scale within model discretisation
113
        encoder_scales.append(0.5 / model_ppu)
×
114

115
    return encoder_scales
2✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc