• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

markrogoyski / math-php / 4908566749

pending completion
4908566749

push

github

Mark Rogoyski
Github actions changes for static analysis.

7538 of 7544 relevant lines covered (99.92%)

184.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.06
/src/Statistics/Multivariate/PCA.php
1
<?php
2

3
namespace MathPHP\Statistics\Multivariate;
4

5
use MathPHP\Exception;
6
use MathPHP\Functions\Map\Single;
7
use MathPHP\LinearAlgebra\Eigenvalue;
8
use MathPHP\LinearAlgebra\NumericMatrix;
9
use MathPHP\LinearAlgebra\MatrixFactory;
10
use MathPHP\LinearAlgebra\Vector;
11
use MathPHP\Probability\Distribution\Continuous\F;
12
use MathPHP\Probability\Distribution\Continuous\StandardNormal;
13
use MathPHP\Statistics\Descriptive;
14

15
/**
16
 * Principal component analysis
17
 *
18
 * PCA uses the correlation between data vectors to find a transformation that minimizes variability.
19
 *
20
 * https://en.wikipedia.org/wiki/Principal_component_analysis
21
 */
22
class PCA
23
{
24
    /** @var NumericMatrix Dataset */
25
    private $data;
26

27
    /** @var Vector Means */
28
    private $center;
29

30
    /** @var Vector Scale */
31
    private $scale;
32

33
    /** @var Vector $EVal Eigenvalues of the correlation Matrix - Also the Loading Matrix for the PCA */
34
    private $EVal = null;
35

36
    /** @var NumericMatrix $EVec Eigenvectors of the correlation matrix */
37
    private $EVec = null;
38

39
    /**
40
     * Constructor
41
     *
42
     * @param NumericMatrix $M      each row is a sample, each column is a variable
43
     * @param bool          $center - Sets if the columns are to be centered to μ = 0
44
     * @param bool          $scale  - Sets if the columns are to be scaled to σ  = 1
45
     *
46
     * @throws Exception\BadDataException if any rows have a different column count
47
     * @throws Exception\MathException
48
     */
49
    public function __construct(NumericMatrix $M, bool $center = true, bool $scale = true)
50
    {
51
        // Check that there is enough data: at least two columns and rows
52
        if (!($M->getM() > 1) || !($M->getN() > 1)) {
6✔
53
            throw new Exception\BadDataException('Data matrix must be at least 2x2.');
1✔
54
        }
55

56
        $this->center = $center === true
5✔
57
            ? $this->center = $M->columnMeans()
3✔
58
            : $this->center = new Vector(\array_fill(0, $M->getN(), 0));
2✔
59

60
        if ($scale === true) {
5✔
61
            $scaleArray = [];
2✔
62
            for ($i = 0; $i < $M->getN(); $i++) {
2✔
63
                $scaleArray[] = Descriptive::standardDeviation($M->getColumn($i));
2✔
64
            }
65
            $this->scale = new Vector($scaleArray);
2✔
66
        } else {
67
            $this->scale = new Vector(\array_fill(0, $M->getN(), 1));
3✔
68
        }
69

70
        // Save the source data to the class
71
        $this->data = $M;
5✔
72

73
        // Center and scale the data as needed
74
        $this->data = $this->standardizeData();
5✔
75

76
        // Create the correlation / variance-covarience Matrix
77
        $samples       = $M->getM();
5✔
78
        $corrCovMatrix = $this->data->transpose()->multiply($this->data)->scalarDivide($samples - 1);
5✔
79

80
        // Eigenvalues and vectors
81
        $this->EVal = new Vector($corrCovMatrix->eigenvalues(Eigenvalue::JACOBI_METHOD));
5✔
82
        $this->EVec = $corrCovMatrix->eigenvectors(Eigenvalue::JACOBI_METHOD);
5✔
83
    }
5✔
84

85
    /**
86
     * Verify that the matrix has the same number of columns as the original data
87
     *
88
     * @param NumericMatrix $newData
89
     *
90
     * @throws Exception\BadDataException if the matrix is not square
91
     */
92
    private function checkNewData(NumericMatrix $newData): void
93
    {
94
        if ($newData->getN() !== $this->data->getN()) {
13✔
95
            throw new Exception\BadDataException('Data does not have the same number of columns');
1✔
96
        }
97
    }
12✔
98

99
    /**
100
     * Standardize the data
101
     * Use the object $center and $scale Vectors to transform the provided data
102
     *
103
     * @param NumericMatrix $new_data - An optional Matrix of new data which is standardized against the original data
104
     *
105
     * @return NumericMatrix
106
     *
107
     * @throws Exception\MathException
108
     */
109
    public function standardizeData(NumericMatrix $new_data = null): NumericMatrix
110
    {
111
        if ($new_data === null) {
17✔
112
            $X = $this->data;
5✔
113
        } else {
114
            $this->checkNewData($new_data);
12✔
115
            $X = $new_data;
12✔
116
        }
117
        $ones_column = MatrixFactory::one($X->getM(), 1);
17✔
118

119
        // Create a matrix the same dimensions as $new_data, each element is the average of that column in the original data.
120
        /** @var NumericMatrix $mult */
121
        $mult = MatrixFactory::create([$this->center->getVector()]);
17✔
122
        $center_matrix = $ones_column->multiply($mult);
17✔
123
        $scale_matrix  = MatrixFactory::diagonal($this->scale->getVector())->inverse();
17✔
124

125
        // scaled data: ($X - μ) / σ
126
        return $X->subtract($center_matrix)->multiply($scale_matrix);
17✔
127
    }
128

129
    /**
130
     * The loadings are the unit eigenvectors of the correlation matrix
131
     *
132
     * @return NumericMatrix
133
     */
134
    public function getLoadings(): NumericMatrix
135
    {
136
        return $this->EVec;
5✔
137
    }
138

139
    /**
140
     * The eigenvalues of the correlation matrix
141
     *
142
     * @return Vector
143
     *
144
     * @throws Exception\MathException
145
     */
146
    public function getEigenvalues(): Vector
147
    {
148
        $EV = [];
8✔
149
        for ($i = 0; $i < $this->data->getN(); $i++) {
8✔
150
            $EV[] = Descriptive::standardDeviation($this->getScores()->getColumn($i)) ** 2;
8✔
151
        }
152

153
        return new Vector($EV);
8✔
154
    }
155

156
    /**
157
     * Get Scores
158
     *
159
     * Transform the standardized data with the loadings matrix
160
     *
161
     * @param NumericMatrix|null $new_data
162
     *
163
     * @return NumericMatrix
164
     *
165
     * @throws Exception\MathException
166
     */
167
    public function getScores(NumericMatrix $new_data = null): NumericMatrix
168
    {
169
        if ($new_data === null) {
13✔
170
            $scaled_data = $this->data;
12✔
171
        } else {
172
            $this->checkNewData($new_data);
5✔
173
            $scaled_data = $this->standardizeData($new_data);
4✔
174
        }
175

176
        return $scaled_data->multiply($this->EVec);
12✔
177
    }
178

179
    /**
180
     * Get R² Values
181
     *
182
     * R² for each component is eigenvalue divided by the sum of all eigenvalues
183
     *
184
     * @return float[]
185
     */
186
    public function getR2(): array
187
    {
188
        $total_variance = $this->EVal->sum();
8✔
189
        return $this->EVal->scalarDivide($total_variance)->getVector();
8✔
190
    }
191

192
    /**
193
     * Get the cumulative R²
194
     *
195
     * @return float[]
196
     */
197
    public function getCumR2(): array
198
    {
199
        $result = [];
4✔
200
        $sum    = 0;
4✔
201

202
        foreach ($this->getR2() as $R²value) {
4✔
203
            $sum += $R²value;
4✔
204
            $result[] = $sum;
4✔
205
        }
206

207
        return $result;
4✔
208
    }
209

210
    /**
211
     * Get the Q Residuals
212
     *
213
     * The Q residual is the error in the model at a given model complexity.
214
     * For each row (i) in the data Matrix x, and retained components (j):
215
     * Qᵢ = eᵢ'eᵢ = xᵢ(I-PⱼPⱼ')xᵢ'
216
     *
217
     * @param NumericMatrix $new_data - An optional Matrix of new data which is standardized against the original data
218
     *
219
     * @return NumericMatrix of Q residuals
220
     *
221
     * @throws Exception\MathException
222
     */
223
    public function getQResiduals(NumericMatrix $new_data = null): NumericMatrix
224
    {
225
        $vars = $this->data->getN();
8✔
226

227
        if ($new_data === null) {
8✔
228
            $X = $this->data;
4✔
229
        } else {
230
            $this->checkNewData($new_data);
4✔
231
            $X = $this->standardizeData($new_data);
4✔
232
        }
233

234
        $X′ = $X->transpose();
8✔
235
        $I  = MatrixFactory::identity($vars);
8✔
236

237
        // Initial element with initialization of result matrix
238
        $P  = $this->EVec->submatrix(0, 0, $vars - 1, 0);  // Get the first column of the loading matrix
8✔
239
        $P′ = $P->transpose();
8✔
240
        /** @var NumericMatrix $Q */
241
        $Q  = MatrixFactory::create([$X->multiply($I->subtract($P->multiply($P′)))->multiply($X′)->getDiagonalElements()])->transpose();
8✔
242

243
        for ($i = 1; $i < $vars; $i++) {
8✔
244
            // Get the first $i+1 columns of the loading matrix
245
            $P  = $this->EVec->submatrix(0, 0, $vars - 1, $i);
8✔
246
            $P′ = $P->transpose();
8✔
247
            /** @var NumericMatrix $Qᵢ */
248
            $Qᵢ = MatrixFactory::create([$X->multiply($I->subtract($P->multiply($P′)))->multiply($X′)->getDiagonalElements()])->transpose();
8✔
249
            $Q  = $Q->augment($Qᵢ);
8✔
250
        }
251

252
        return $Q;
8✔
253
    }
254

255
    /**
256
     * Get the T² Distance
257
     *
258
     * Get the distance from the score to the center of the model plane.
259
     * For each row (i) in the data matrix, and retained componenets (j)
260
     * Tᵢ² = XᵢPⱼΛⱼ⁻¹Pⱼ'Xᵢ'
261
     *
262
     * @param NumericMatrix $new_data - An optional Matrix of new data which is standardized against the original data
263
     *
264
     * @return NumericMatrix
265
     *
266
     * @throws Exception\MathException
267
     */
268
    public function getT2Distances(NumericMatrix $new_data = null): NumericMatrix
269
    {
270
        $vars = $this->data->getN();
8✔
271

272
        if ($new_data === null) {
8✔
273
            $X = $this->data;
4✔
274
        } else {
275
            $this->checkNewData($new_data);
4✔
276
            $X = $this->standardizeData($new_data);
4✔
277
        }
278

279
        $X′ = $X->transpose();
8✔
280

281
        // Initial element with initialization of result matrix
282
        $P    = $this->EVec->submatrix(0, 0, $vars - 1, 0); // // Get the first column of the loading matrix
8✔
283
        $P′   = $P->transpose();
8✔
284
        $Λⱼ⁻¹ = MatrixFactory::diagonal(\array_slice($this->EVal->getVector(), 0, 0 + 1))->inverse();
8✔
285
        /** @var NumericMatrix $T² */
286
        $T²   = MatrixFactory::create([$X->multiply($P)->multiply($Λⱼ⁻¹)->multiply($P′)->multiply($X′)->getDiagonalElements()])->transpose();
8✔
287

288
        for ($i = 1; $i < $this->data->getN(); $i++) {
8✔
289
            // Get the first $i+1 columns of the loading matrix
290
            $P    = $this->EVec->submatrix(0, 0, $vars - 1, $i);
8✔
291
            $P′   = $P->transpose();
8✔
292
            $Λⱼ⁻¹ = MatrixFactory::diagonal(\array_slice($this->EVal->getVector(), 0, $i + 1))->inverse();
8✔
293
            /** @var NumericMatrix $Tᵢ² */
294
            $Tᵢ²  = MatrixFactory::create([$X->multiply($P)->multiply($Λⱼ⁻¹)->multiply($P′)->multiply($X′)->getDiagonalElements()])->transpose();
8✔
295
            $T²   = $T²->augment($Tᵢ²);
8✔
296
        }
297

298
        return $T²;
8✔
299
    }
300

301
    /**
302
     * Calculate the critical limits of T²
303
     *
304
     * @param float $alpha the probability limit of the critical value
305
     *
306
     * @return float[] Critical values for each model complexity
307
     */
308
    public function getCriticalT2(float $alpha = .05): array
309
    {
310
        $samp = $this->data->getM();
4✔
311
        $vars = $this->data->getN();
4✔
312

313
        $T²Critical = [];
4✔
314
        for ($i = 1; $i <= $vars; $i++) {
4✔
315
            $F = new F($i, $samp - $i);
4✔
316
            $T = $i * ($samp - 1) * $F->inverse(1 - $alpha) / ($samp - $i);
4✔
317
            $T²Critical[] = $T;
4✔
318
        }
319

320
        return $T²Critical;
4✔
321
    }
322

323
    /**
324
     * Calculate the critical limits of Q
325
     *
326
     * @param float $alpha the probability limit of the critical value
327
     *
328
     * @return float[] Critical values for each model complexity
329
     *
330
     * @throws Exception\MathException
331
     */
332
    public function getCriticalQ(float $alpha = .05): array
333
    {
334
        $vars  = $this->data->getN();
4✔
335
        $QCritical = [];
4✔
336

337
        for ($i = 0; $i < $vars - 1; $i++) {
4✔
338
            $evals = \array_slice($this->getEigenvalues()->getVector(), $i + 1);
4✔
339

340
            $t1 = \array_sum($evals);
4✔
341
            $t2 = \array_sum(Single::square($evals));
4✔
342
            $t3 = \array_sum(Single::pow($evals, 3));
4✔
343

344
            $h0 = 1 - 2 * $t1 * $t3 / 3 / $t2 ** 2;
4✔
345
            if ($h0 < .001) {
4✔
346
                $h0 = .001;
×
347
            }
348

349
            $normal = new StandardNormal();
4✔
350
            $ca     = $normal->inverse(1 - $alpha);
4✔
351

352
            $h1 = $ca * \sqrt(2 * $t2 * $h0 ** 2) / $t1;
4✔
353
            $h2 = $t2 * $h0 * ($h0 - 1) / $t1 ** 2;
4✔
354

355
            $QCritical[] = $t1 * (1 + $h1 + $h2) ** (1 / $h0);
4✔
356
        }
357

358
        // The final value is always zero since the model is perfectly fit.
359
        $QCritical[] = 0;
4✔
360

361
        return $QCritical;
4✔
362
    }
363
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc