• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

paulmthompson / WhiskerToolbox / 15664585996

15 Jun 2025 03:13PM UTC coverage: 66.58% (+1.4%) from 65.21%
15664585996

push

github

paulmthompson
Refactor min/max statistical functions with NaN error handling and TimeFrameIndex support

- Replace 0.0f return values with std::numeric_limits<float>::quiet_NaN() for invalid operations
  * Consistent with IEEE 754 standards and mathematical library conventions
  * Matches existing DataAggregation.cpp pattern of using NaN for missing data
  * Enables proper error detection with std::isnan()

- Create layered architecture for min/max functions:
  * Core template implementations: calculate_min_impl/calculate_max_impl(Iterator, Iterator)
  * Supporting vector implementations: calculate_min_impl/calculate_max_impl(vector, start, end)
  * Span-based operations: calculate_min/calculate_max(std::span<const float>)
  * High-level interfaces:
    - calculate_min/calculate_max(AnalogTimeSeries) - entire series
    - calculate_min/calculate_max(AnalogTimeSeries, start, end) - index-based range
    - calculate_min_in_time_range/calculate_max_in_time_range(AnalogTimeSeries, start_time, end_time) - NEW TimeFrameIndex-based

- Add comprehensive test coverage:
  * Span-based operations with empty/partial spans
  * TimeFrameIndex range operations for sparse and dense storage
  * Boundary approximation when exact times don't exist
  * Consistency verification between all calculation methods
  * Edge cases: negative values, identical values, invalid ranges
  * Vector implementation testing with various range conditions

- Maintain zero-copy efficiency using std::span for range operations
- Ensure all methods return identical results for equivalent data ranges

205 of 207 new or added lines in 4 files covered. (99.03%)

13 existing lines in 2 files now uncovered.

8405 of 12624 relevant lines covered (66.58%)

1215.17 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.26
/src/WhiskerToolbox/DataManager/AnalogTimeSeries/utils/statistics.cpp
1
#include "statistics.hpp"
2

3
#include "AnalogTimeSeries/Analog_Time_Series.hpp"
4

5
#include <algorithm>
6
#include <cmath>
7
#include <limits>
8
#include <numeric>
9

10
// ========== Mean ==========
11

12
float calculate_mean_impl(std::vector<float> const & data, size_t start, size_t end) {
57✔
13
    if (data.empty() || start >= end || start >= data.size() || end > data.size()) {
57✔
14
        return std::numeric_limits<float>::quiet_NaN();
3✔
15
    }
16
    return calculate_mean_impl(data.begin() + start, data.begin() + end);
54✔
17
}
18

19
float calculate_mean(std::span<const float> data_span) {
26✔
20
    return calculate_mean_impl(data_span.begin(), data_span.end());
26✔
21
}
22

23
float calculate_mean(AnalogTimeSeries const & series) {
44✔
24
    auto const & data = series.getAnalogTimeSeries();
44✔
25
    return calculate_mean_impl(data, 0, data.size());
44✔
26
}
27

28
float calculate_mean(AnalogTimeSeries const & series, int64_t start, int64_t end) {
7✔
29
    auto const & data = series.getAnalogTimeSeries();
7✔
30
    if (start < 0 || end < 0 || start >= end) {
7✔
UNCOV
31
        return std::numeric_limits<float>::quiet_NaN();
×
32
    }
33
    return calculate_mean_impl(data, static_cast<size_t>(start), static_cast<size_t>(end));
7✔
34
}
35

36
float calculate_mean_in_time_range(AnalogTimeSeries const & series, TimeFrameIndex start_time, TimeFrameIndex end_time) {
22✔
37

38
    auto data_span = series.getDataInTimeFrameIndexRange(start_time, end_time);
22✔
39
    return calculate_mean(data_span);
44✔
40
}
41

42
// ========== Standard Deviation ==========
43

44
float calculate_std_dev(AnalogTimeSeries const & series) {
25✔
45
    auto const & data = series.getAnalogTimeSeries();
25✔
46
    if (data.empty()) {
25✔
UNCOV
47
        return std::numeric_limits<float>::quiet_NaN();
×
48
    }
49

50
    float const mean = calculate_mean(series);
25✔
51
    float const sum = std::accumulate(data.begin(), data.end(), 0.0f,
25✔
52
                                      [mean](float acc, float val) { return acc + (val - mean) * (val - mean); });
1,218,028✔
53
    return std::sqrt(sum / static_cast<float>(data.size()));
25✔
54
}
55

56
float calculate_std_dev(AnalogTimeSeries const & series, int64_t start, int64_t end) {
4✔
57
    auto const & data = series.getAnalogTimeSeries();
4✔
58
    if (data.empty() || start >= end || start < 0 || end > static_cast<int64_t>(data.size())) {
4✔
UNCOV
59
        return std::numeric_limits<float>::quiet_NaN();
×
60
    }
61

62
    float const mean = calculate_mean(series, start, end);
4✔
63
    float const sum = std::accumulate(data.begin() + start, data.begin() + end, 0.0f,
8✔
64
                                      [mean](float acc, float val) { return acc + (val - mean) * (val - mean); });
216✔
65
    return std::sqrt(sum / static_cast<float>((end - start)));
4✔
66
}
67

68
float calculate_std_dev_approximate(AnalogTimeSeries const & series,
19✔
69
                                    float sample_percentage,
70
                                    size_t min_sample_threshold) {
71
    auto const & data = series.getAnalogTimeSeries();
19✔
72
    if (data.empty()) {
19✔
73
        return std::numeric_limits<float>::quiet_NaN();
1✔
74
    }
75

76
    size_t const data_size = data.size();
18✔
77
    size_t const target_sample_size = static_cast<size_t>(data_size * sample_percentage / 100.0f);
18✔
78

79
    // Fall back to exact calculation if sample would be too small
80
    if (target_sample_size < min_sample_threshold) {
18✔
81
        return calculate_std_dev(series);
16✔
82
    }
83

84
    // Use systematic sampling for better cache performance
85
    size_t const step_size = data_size / target_sample_size;
2✔
86
    if (step_size == 0) {
2✔
UNCOV
87
        return calculate_std_dev(series);
×
88
    }
89

90
    // Calculate mean of sampled data
91
    float sum = 0.0f;
2✔
92
    size_t sample_count = 0;
2✔
93
    for (size_t i = 0; i < data_size; i += step_size) {
1,102✔
94
        sum += data[i];
1,100✔
95
        ++sample_count;
1,100✔
96
    }
97
    float const mean = sum / static_cast<float>(sample_count);
2✔
98

99
    // Calculate variance of sampled data
100
    float variance_sum = 0.0f;
2✔
101
    for (size_t i = 0; i < data_size; i += step_size) {
1,102✔
102
        float const diff = data[i] - mean;
1,100✔
103
        variance_sum += diff * diff;
1,100✔
104
    }
105

106
    return std::sqrt(variance_sum / static_cast<float>(sample_count));
2✔
107
}
108

109
float calculate_std_dev_adaptive(AnalogTimeSeries const & series,
5✔
110
                                 size_t initial_sample_size,
111
                                 size_t max_sample_size,
112
                                 float convergence_tolerance) {
113
    auto const & data = series.getAnalogTimeSeries();
5✔
114
    if (data.empty()) {
5✔
115
        return std::numeric_limits<float>::quiet_NaN();
1✔
116
    }
117

118
    size_t const data_size = data.size();
4✔
119
    if (data_size <= max_sample_size) {
4✔
120
        return calculate_std_dev(series);
2✔
121
    }
122

123
    size_t current_sample_size = std::min(initial_sample_size, data_size);
2✔
124
    float previous_std_dev = 0.0f;
2✔
125
    bool first_iteration = true;
2✔
126

127
    while (current_sample_size <= max_sample_size) {
4✔
128
        // Use systematic sampling
129
        size_t const step_size = data_size / current_sample_size;
4✔
130
        if (step_size == 0) break;
4✔
131

132
        // Calculate mean of current sample
133
        float sum = 0.0f;
4✔
134
        size_t actual_sample_count = 0;
4✔
135
        for (size_t i = 0; i < data_size; i += step_size) {
1,804✔
136
            sum += data[i];
1,800✔
137
            ++actual_sample_count;
1,800✔
138
        }
139
        float const mean = sum / static_cast<float>(actual_sample_count);
4✔
140

141
        // Calculate standard deviation of current sample
142
        float variance_sum = 0.0f;
4✔
143
        for (size_t i = 0; i < data_size; i += step_size) {
1,804✔
144
            float const diff = data[i] - mean;
1,800✔
145
            variance_sum += diff * diff;
1,800✔
146
        }
147
        float const current_std_dev = std::sqrt(variance_sum / static_cast<float>(actual_sample_count));
4✔
148

149
        // Check for convergence (skip first iteration)
150
        if (!first_iteration) {
4✔
151
            float const relative_change = std::abs(current_std_dev - previous_std_dev) /
2✔
152
                                          std::max(current_std_dev, previous_std_dev);
2✔
153
            if (relative_change < convergence_tolerance) {
2✔
154
                return current_std_dev;
2✔
155
            }
156
        }
157

158
        previous_std_dev = current_std_dev;
2✔
159
        first_iteration = false;
2✔
160

161
        // Increase sample size for next iteration (double it)
162
        current_sample_size = std::min(current_sample_size * 2, max_sample_size);
2✔
163
    }
164

165
    return previous_std_dev;
×
166
}
167

168
// ========== Minimum ==========
169

170
float calculate_min_impl(std::vector<float> const & data, size_t start, size_t end) {
16✔
171
    if (data.empty() || start >= end || start >= data.size() || end > data.size()) {
16✔
172
        return std::numeric_limits<float>::quiet_NaN();
3✔
173
    }
174
    return calculate_min_impl(data.begin() + start, data.begin() + end);
13✔
175
}
176

177
float calculate_min(std::span<const float> data_span) {
24✔
178
    return calculate_min_impl(data_span.begin(), data_span.end());
24✔
179
}
180

181
float calculate_min(AnalogTimeSeries const & series) {
7✔
182
    auto const & data = series.getAnalogTimeSeries();
7✔
183
    return calculate_min_impl(data, 0, data.size());
7✔
184
}
185

186
float calculate_min(AnalogTimeSeries const & series, int64_t start, int64_t end) {
3✔
187
    auto const & data = series.getAnalogTimeSeries();
3✔
188
    if (start < 0 || end < 0 || start >= end) {
3✔
UNCOV
189
        return std::numeric_limits<float>::quiet_NaN();
×
190
    }
191
    return calculate_min_impl(data, static_cast<size_t>(start), static_cast<size_t>(end));
3✔
192
}
193

194
float calculate_min_in_time_range(AnalogTimeSeries const & series, TimeFrameIndex start_time, TimeFrameIndex end_time) {
20✔
195
    auto data_span = series.getDataInTimeFrameIndexRange(start_time, end_time);
20✔
196
    return calculate_min(data_span);
40✔
197
}
198

199
// ========== Maximum ==========
200

201
float calculate_max_impl(std::vector<float> const & data, size_t start, size_t end) {
16✔
202
    if (data.empty() || start >= end || start >= data.size() || end > data.size()) {
16✔
203
        return std::numeric_limits<float>::quiet_NaN();
3✔
204
    }
205
    return calculate_max_impl(data.begin() + start, data.begin() + end);
13✔
206
}
207

208
float calculate_max(std::span<const float> data_span) {
23✔
209
    return calculate_max_impl(data_span.begin(), data_span.end());
23✔
210
}
211

212
float calculate_max(AnalogTimeSeries const & series) {
7✔
213
    auto const & data = series.getAnalogTimeSeries();
7✔
214
    return calculate_max_impl(data, 0, data.size());
7✔
215
}
216

217
float calculate_max(AnalogTimeSeries const & series, int64_t start, int64_t end) {
3✔
218
    auto const & data = series.getAnalogTimeSeries();
3✔
219
    if (start < 0 || end < 0 || start >= end) {
3✔
UNCOV
220
        return std::numeric_limits<float>::quiet_NaN();
×
221
    }
222
    return calculate_max_impl(data, static_cast<size_t>(start), static_cast<size_t>(end));
3✔
223
}
224

225
float calculate_max_in_time_range(AnalogTimeSeries const & series, TimeFrameIndex start_time, TimeFrameIndex end_time) {
19✔
226
    auto data_span = series.getDataInTimeFrameIndexRange(start_time, end_time);
19✔
227
    return calculate_max(data_span);
38✔
228
}
229

230

231

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc