• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

paulmthompson / WhiskerToolbox / 17920603410

22 Sep 2025 03:39PM UTC coverage: 71.97% (-0.05%) from 72.02%
17920603410

push

github

paulmthompson
all tests pass

277 of 288 new or added lines in 8 files covered. (96.18%)

520 existing lines in 35 files now uncovered.

40275 of 55961 relevant lines covered (71.97%)

1225.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

66.09
/src/DataManager/utils/TableView/transforms/PCATransform.cpp
1
#include "PCATransform.hpp"
2

3
#include "utils/TableView/columns/Column.h"
4
#include "utils/TableView/core/TableViewBuilder.h"
5
#include "utils/TableView/interfaces/IRowSelector.h"
6
#include "utils/TableView/interfaces/MultiComputerOutputView.hpp"
7

8
#include <algorithm>
9
#include <cmath>
10
#include <sstream>
11
#include <stdexcept>
12

13
using std::size_t;
14

15
// Utility to check numeric type by type_index against supported scalar types
16
static bool isNumericType(std::type_index ti) {
28✔
17
    return ti == std::type_index(typeid(double)) ||
84✔
18
           ti == std::type_index(typeid(float)) ||
28✔
19
           ti == std::type_index(typeid(int)) ||
84✔
20
           ti == std::type_index(typeid(int64_t));
28✔
21
}
22

23
void PCAMultiColumnComputer::fitIfNeeded() const {
2✔
24
    if (m_cache.has_value()) return;
2✔
25

26
    arma::mat X = m_X;// copy local to allow centering/standardizing
2✔
27

28
    // Center / standardize
29
    arma::rowvec mean, stddev;
2✔
30
    if (m_center || m_standardize) {
2✔
31
        mean = arma::mean(X, 0);
4✔
32
        X.each_row() -= mean;
4✔
33
    }
34
    if (m_standardize) {
2✔
35
        stddev = arma::stddev(X, 0, 0);// normalize by N-1 (default unbiased=0)
×
36
        for (arma::uword j = 0; j < X.n_cols; ++j) {
×
37
            double s = stddev(j);
×
38
            if (s > 0) X.col(j) /= s;// avoid divide by zero; constant columns become zeros
×
39
        }
40
    }
41

42
    if (X.n_rows == 0 || X.n_cols == 0) {
2✔
43
        // No valid data
44
        FitCache empty;
×
45
        empty.scores.set_size(0, 0);
×
46
        empty.explained.set_size(0);
×
47
        empty.names.clear();
×
48
        m_cache = std::move(empty);
×
49
        return;
×
50
    }
×
51

52
    // PCA via SVD: X = U S V^T; columns of V are PCs; scores = X * V
53
    arma::mat U, V;
2✔
54
    arma::vec s;
2✔
55
    bool ok = arma::svd_econ(U, s, V, X, "both");
2✔
56
    if (!ok) {
2✔
57
        throw std::runtime_error("PCAMultiColumnComputer: SVD failed");
×
58
    }
59

60
    // Explained variance proportions
61
    arma::vec var = arma::square(s) / static_cast<double>(X.n_rows - (m_center ? 1 : 0));
4✔
62
    double const total_var = arma::accu(var);
2✔
63
    arma::vec explained;
2✔
64
    if (total_var > 0.0) {
2✔
65
        explained = var / total_var;
4✔
66
    } else {
67
        explained = arma::vec(var.n_elem, arma::fill::zeros);
×
68
    }
69

70
    // Scores = X * V (rows x comps)
71
    arma::mat scores = X * V;
4✔
72

73
    // Debug: Print scores matrix dimensions
74
    std::cerr << "DEBUG PCA fit: scores matrix dimensions: " << scores.n_rows << " x " << scores.n_cols << std::endl;
2✔
75
    std::cerr << "DEBUG PCA fit: V matrix dimensions: " << V.n_rows << " x " << V.n_cols << std::endl;
2✔
76
    std::cerr << "DEBUG PCA fit: s vector size: " << s.n_elem << std::endl;
2✔
77

78
    // Names with variance explained
79
    std::vector<std::string> names;
2✔
80
    names.reserve(V.n_cols);
2✔
81
    for (arma::uword j = 0; j < V.n_cols; ++j) {
12✔
82
        double const pct = explained(j) * 100.0;
10✔
83
        std::ostringstream os;
10✔
84
        os.setf(std::ios::fixed);
10✔
85
        os.precision(1);
10✔
86
        os << "PC" << (j + 1) << " (" << pct << "%)";
10✔
87
        names.emplace_back(os.str());
10✔
88
    }
10✔
89

90
    FitCache cache;
2✔
91
    cache.scores = std::move(scores);
2✔
92
    cache.explained = std::move(explained);
2✔
93
    cache.names = std::move(names);
2✔
94
    m_cache = std::move(cache);
2✔
95
}
2✔
96

97
std::pair<std::vector<std::vector<double>>, ColumnEntityIds> PCAMultiColumnComputer::computeBatch(ExecutionPlan const & /*plan*/) const {
×
98
    fitIfNeeded();
×
99
    std::vector<std::vector<double>> outputs;
×
100
    if (!m_cache.has_value()) return {outputs, std::monostate{}};
×
101

102
    arma::mat const & S = m_cache->scores;
×
103
    std::cerr << "DEBUG PCA computeBatch: scores matrix dimensions: " << S.n_rows << " x " << S.n_cols << std::endl;
×
104

105
    outputs.resize(S.n_cols);
×
106
    for (arma::uword j = 0; j < S.n_cols; ++j) {
×
107
        outputs[j].resize(S.n_rows);
×
108
        for (arma::uword i = 0; i < S.n_rows; ++i) {
×
109
            outputs[j][i] = S(i, j);
×
110
        }
111
    }
112

113
    std::cerr << "DEBUG PCA computeBatch: returning " << outputs.size() << " columns with " << (outputs.empty() ? 0 : outputs[0].size()) << " rows each" << std::endl;
×
114
    return {outputs, std::monostate{}};
×
115
}
×
116

117
auto PCAMultiColumnComputer::getOutputNames() const -> std::vector<std::string> {
2✔
118
    fitIfNeeded();
2✔
119
    if (m_cache && !m_cache->names.empty()) return m_cache->names;
2✔
120
    // Fallback naming if unfitted/empty
121
    std::vector<std::string> names;
×
122
    names.reserve(m_X.n_cols);
×
123
    for (size_t i = 0; i < m_X.n_cols; ++i) {
×
124
        names.emplace_back("PC" + std::to_string(i + 1));
×
125
    }
126
    return names;
×
127
}
×
128

129
auto PCATransform::selectNumericColumns(TableView const & source) const -> std::vector<std::string> {
2✔
130
    auto names = source.getColumnNames();
2✔
131
    std::vector<std::string> selected;
2✔
132
    selected.reserve(names.size());
2✔
133

134
    std::set<std::string> includeSet(m_config.include.begin(), m_config.include.end());
2✔
135
    std::set<std::string> excludeSet(m_config.exclude.begin(), m_config.exclude.end());
2✔
136

137
    for (auto const & n: names) {
16✔
138
        if (!m_config.include.empty() && includeSet.find(n) == includeSet.end()) continue;
14✔
139
        if (!m_config.exclude.empty() && excludeSet.find(n) != excludeSet.end()) continue;
14✔
140
        auto ti = source.getColumnTypeIndex(n);
14✔
141
        if (isNumericType(ti)) {
14✔
142
            selected.push_back(n);
14✔
143
        }
144
    }
145

146
    if (!m_config.include.empty()) {
2✔
147
        // Validate all included are numeric
148
        for (auto const & n: m_config.include) {
16✔
149
            if (excludeSet.count(n)) continue;
14✔
150
            if (!source.hasColumn(n)) {
14✔
151
                throw std::runtime_error("PCATransform: Included column does not exist: " + n);
×
152
            }
153
            if (!isNumericType(source.getColumnTypeIndex(n))) {
14✔
154
                throw std::runtime_error("PCATransform: Included column is not numeric: " + n);
×
155
            }
156
        }
157
    }
158

159
    if (selected.empty()) {
2✔
160
        throw std::runtime_error("PCATransform: No numeric columns available for PCA");
×
161
    }
162
    return selected;
4✔
163
}
2✔
164

165
auto PCATransform::apply(TableView const & source) -> TableView {
2✔
166
    // Select numeric feature columns
167
    auto features = selectNumericColumns(source);
2✔
168

169
    // Extract matrix and kept rows first (drop NaN/Inf rows per config)
170
    auto [X, kept] = extractMatrixAndKeptRows(source, features, true);
2✔
171

172
    // Debug: Print kept vector information
173
    std::cerr << "DEBUG PCA: Original rows: " << source.getRowCount() << std::endl;
2✔
174
    std::cerr << "DEBUG PCA: Kept rows count: " << kept.size() << std::endl;
2✔
175
    std::cerr << "DEBUG PCA: Kept indices: ";
2✔
176
    for (size_t idx: kept) {
12✔
177
        std::cerr << idx << " ";
10✔
178
    }
179
    std::cerr << std::endl;
2✔
180
    std::cerr << "DEBUG PCA: X matrix dimensions: " << X.n_rows << " x " << X.n_cols << std::endl;
2✔
181

182
    // Prepare PCA multi-computer with X (kept rows)
183
    auto pcaComputer = std::make_unique<PCAMultiColumnComputer>(std::move(X), m_config.center, m_config.standardize);
2✔
184

185
    TableViewBuilder builder(source.getDataManagerExtension());
2✔
186
    // PCA outputs are derived (no expansion-capable sources). Build rows as a simple
187
    // index space matching the kept rows to ensure 1:1 alignment with preserved EntityIds.
188
    {
189
        std::vector<size_t> indices;
2✔
190
        indices.resize(kept.size());
2✔
191
        for (size_t i = 0; i < indices.size(); ++i) {
12✔
192
            indices[i] = i;
10✔
193
        }
194
        builder.setRowSelector(std::make_unique<IndexSelector>(std::move(indices)));
2✔
195
    }
2✔
196

197
    // Add PCA components as columns via multi-output view; base name empty because names include variance
198
    builder.addColumns<double>("", std::move(pcaComputer));
6✔
199

200
    // Build the table first
201
    auto transformed_table = builder.build();
2✔
202

203
    auto entity_ids = source.getEntityIds();
2✔
204
    transformed_table.setDirectEntityIds(std::move(entity_ids));
2✔
205

206
    return transformed_table;
4✔
207
}
2✔
208

209
auto PCATransform::extractMatrixAndKeptRows(TableView const & source,
2✔
210
                                            std::vector<std::string> const & featureColumns,
211
                                            bool dropNaNInf)
212
        -> std::pair<arma::mat, std::vector<size_t>> {
213
    size_t const nrows = source.getRowCount();
2✔
214
    std::vector<std::vector<double>> cols;
2✔
215
    cols.reserve(featureColumns.size());
2✔
216
    // Need non-const access to source for materialization
217
    auto & nonConstSource = const_cast<TableView &>(source);
2✔
218
    for (auto const & name: featureColumns) {
16✔
219
        auto const ti = source.getColumnTypeIndex(name);
14✔
220
        if (ti == std::type_index(typeid(double))) {
14✔
221
            cols.push_back(nonConstSource.getColumnValues<double>(name));
14✔
UNCOV
222
        } else if (ti == std::type_index(typeid(float))) {
×
UNCOV
223
            auto const & v = nonConstSource.getColumnValues<float>(name);
×
UNCOV
224
            std::vector<double> d;
×
UNCOV
225
            d.reserve(v.size());
×
UNCOV
226
            for (float x: v) d.push_back(static_cast<double>(x));
×
UNCOV
227
            cols.emplace_back(std::move(d));
×
UNCOV
228
        } else if (ti == std::type_index(typeid(int))) {
×
UNCOV
229
            auto const & v = nonConstSource.getColumnValues<int>(name);
×
UNCOV
230
            std::vector<double> d;
×
UNCOV
231
            d.reserve(v.size());
×
UNCOV
232
            for (int x: v) d.push_back(static_cast<double>(x));
×
UNCOV
233
            cols.emplace_back(std::move(d));
×
UNCOV
234
        } else if (ti == std::type_index(typeid(int64_t))) {
×
UNCOV
235
            auto const & v = nonConstSource.getColumnValues<int64_t>(name);
×
UNCOV
236
            std::vector<double> d;
×
UNCOV
237
            d.reserve(v.size());
×
UNCOV
238
            for (int64_t x: v) d.push_back(static_cast<double>(x));
×
UNCOV
239
            cols.emplace_back(std::move(d));
×
UNCOV
240
        } else {
×
UNCOV
241
            throw std::runtime_error("PCATransform: Non-numeric column encountered: " + name);
×
242
        }
243
        if (cols.back().size() != nrows) {
14✔
UNCOV
244
            throw std::runtime_error("PCATransform: Column row count mismatch");
×
245
        }
246
    }
247

248
    std::vector<size_t> kept;
2✔
249
    kept.reserve(nrows);
2✔
250
    for (size_t r = 0; r < nrows; ++r) {
12✔
251
        bool ok = true;
10✔
252
        if (dropNaNInf) {
10✔
253
            for (auto const & c: cols) {
80✔
254
                double v = c[r];
70✔
255
                if (!std::isfinite(v)) {
70✔
256
                    ok = false;
×
257
                    break;
×
258
                }
259
            }
260
        }
261
        if (ok) kept.push_back(r);
10✔
262
    }
263

264
    arma::mat X(kept.size(), cols.size());
2✔
265
    for (size_t j = 0; j < cols.size(); ++j) {
16✔
266
        for (size_t i = 0; i < kept.size(); ++i) {
84✔
267
            X(i, j) = cols[j][kept[i]];
140✔
268
        }
269
    }
270
    return {std::move(X), std::move(kept)};
4✔
271
}
2✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc