• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

rafaelpadilla / 3W / 24912462866

24 Apr 2026 09:21PM UTC coverage: 76.362% (-3.1%) from 79.464%
24912462866

push

github

web-flow
Merge pull request #73 from rafaelpadilla/eduardo/refactor_data_operations

Refactor of data operations, trainers and models.

244 of 339 branches covered (71.98%)

Branch coverage included in aggregate %.

1317 of 1706 new or added lines in 50 files covered. (77.2%)

28 existing lines in 5 files now uncovered.

2124 of 2762 relevant lines covered (76.9%)

0.77 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.3
/toolkit/ThreeWToolkit/preprocessing/normalize.py
1
import numpy as np
1✔
2
import pandas as pd
1✔
3
from typing import Literal
1✔
4
from pydantic import Field, field_validator, PrivateAttr
1✔
5
from ..core.base_dataset import BaseDataset
1✔
6
from ..core.base_preprocessing import BasePreprocessing, BasePreprocessingConfig
1✔
7
from ..core.dataset_outputs import DatasetOutputs
1✔
8
from .clean_signals import _3W_CATEGORICAL_FEATURES
1✔
9

10

11
class NormalizeConfig(BasePreprocessingConfig):
1✔
12
    """Configuration for the Normalize preprocessing step."""
13

14
    norm: Literal["l1", "l2", "max"] | float = Field(
1✔
15
        default="l2",
16
        description="Normalization method: 'l1', 'l2', 'max' for standard methods, or a custom numeric norm value.",
17
    )
18
    exclude_features: list[str] = Field(
1✔
19
        default=_3W_CATEGORICAL_FEATURES,
20
        description="Feature names to exclude from normalization. Categorical features left unchanged.",
21
    )
22
    eps: float = Field(
1✔
23
        default=1e-6,
24
        description="Small constant added to denominator to prevent division by zero.",
25
    )
26
    _target: type = PrivateAttr(default_factory=lambda: Normalize)
1✔
27

28
    @field_validator("norm")
1✔
29
    def validate_norm(
1✔
30
        cls, value: Literal["l1", "l2", "max"] | float
31
    ) -> Literal["l1", "l2", "max"] | float:
32
        if isinstance(value, str) and value not in {"l1", "l2", "max"}:
1✔
NEW
33
            raise ValueError("norm must be 'l1', 'l2', 'max' or a float value.")
×
34
        if isinstance(value, (int, float)) and value <= 0:
1✔
NEW
35
            raise ValueError("If norm is a numeric value, it must be greater than 0.")
×
36
        return value
1✔
37

38

39
class Normalize(BasePreprocessing):
1✔
40
    """
41
    A data processing step that normalizes signal data using z-score normalization.
42

43
    Collects statistics (mean and std) from signal columns across events during training,
44
    then applies normalization to standardize the signals.
45

46
    Attributes:
47
        config (NormalizeConfig): Configuration object containing normalization parameters
48
        collected (dict): Accumulated statistics for each signal column
49
        statistics (dict): Computed mean and std for each signal column
50
    """
51

52
    def __init__(
1✔
53
        self,
54
        config: NormalizeConfig,
55
    ):
56
        """
57
        Initialize the Normalize step with the provided configuration.
58

59
        Args:
60
            config (NormalizeConfig): Configuration containing norm type, axis, and other parameters
61
        """
62
        self.config: NormalizeConfig = config
1✔
63

64
        if self.config.norm == "l1":
1✔
65
            self.norm = 1.0
1✔
66
        elif self.config.norm == "l2":
1✔
67
            self.norm = 2.0
1✔
68
        elif self.config.norm == "max":
1✔
69
            self.norm = np.inf
1✔
70
        else:
71
            self.norm = self.config.norm
1✔
72

73
        self.global_average: pd.Series | None = None
1✔
74
        self.global_moment: pd.Series | None = None
1✔
75

76
    def _compute_global_average(self, data: BaseDataset) -> None:
1✔
77
        """Compute the global average (mean) for each signal column across all events."""
78
        _sums = []
1✔
79
        _counts = []
1✔
80
        for event in data:
1✔
81
            _sums.append(event.signal.sum())
1✔
82
            _counts.append(event.signal.count())
1✔
83
        # compute average across all events
84
        sums = pd.concat(_sums, axis=1).transpose()
1✔
85
        counts = pd.concat(_counts, axis=1).transpose()
1✔
86

87
        self.global_average = sums.mean() / counts.mean()
1✔
88

89
    def _compute_global_moments(self, data: BaseDataset) -> None:
1✔
90
        """Compute the global moment (e.g., std for L2) for each signal column across all events."""
91
        _moments = []
1✔
92
        _counts = []
1✔
93
        for event in data:
1✔
94
            _moments.append(
1✔
95
                (event.signal - self.global_average).abs().pow(self.norm).sum()
96
            )
97
            _counts.append(event.signal.count())
1✔
98
        # compute average of the central dispersion measure across all events
99
        moments = pd.concat(_moments, axis=1).transpose()
1✔
100
        counts = pd.concat(_counts, axis=1).transpose()
1✔
101

102
        self.global_moment = moments.mean() / counts.mean()
1✔
103
        self.global_moment = (
1✔
104
            self.global_moment.pow(1 / self.norm) + self.config.eps
105
        )  # take the root to get back to the original scale
106

107
    def _compute_global_max(self, data: BaseDataset) -> None:
1✔
108
        """Compute the global max (for max normalization) for each signal column across all events."""
109
        _maxes = []
1✔
110
        for event in data:
1✔
111
            _maxes.append((event.signal - self.global_average).abs().max())
1✔
112
        # compute global max across all events
113
        maxes = pd.concat(_maxes, axis=1).transpose()
1✔
114
        self.global_moment = (
1✔
115
            maxes.max() + self.config.eps
116
        )  # add epsilon to avoid division by zero
117

118
    def fit(self, data: BaseDataset) -> None:
1✔
119
        """
120
        Collect statistics from a single event for aggregation.
121

122
        Args:
123
            data: DatasetOutputs object containing signal DataFrame
124
        """
125

126
        self._compute_global_average(data)
1✔
127
        if self.config.norm == "max":
1✔
128
            self._compute_global_max(data)
1✔
129
        else:
130
            self._compute_global_moments(data)
1✔
131

132
    def transform(self, data: DatasetOutputs) -> DatasetOutputs:
1✔
133
        """
134
        Apply normalization to the 'signal' data using computed statistics.
135

136
        Performs Lp normalization: (x - mean) / k on signal columns.
137

138
        Args:
139
            data: DatasetOutputs object containing signal DataFrame
140

141
        Returns:
142
            DatasetOutputs: Transformed data with normalized signal DataFrame
143
        """
144
        if self.global_average is None or self.global_moment is None:
1✔
NEW
145
            raise ValueError(
×
146
                "Normalize: fit must be called before transform to compute statistics."
147
            )
148

149
        signal = data.signal.copy()
1✔
150
        columns_to_normalize = [
1✔
151
            col for col in signal.columns if col not in self.config.exclude_features
152
        ]
153

154
        if len(columns_to_normalize) > 0:
1✔
155
            signal.loc[:, columns_to_normalize] = (
1✔
156
                signal.loc[:, columns_to_normalize]
157
                - self.global_average.loc[columns_to_normalize]
158
            ) / self.global_moment.loc[columns_to_normalize]
159

160
        return DatasetOutputs(signal=signal, label=data.label, metadata=data.metadata)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc