• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

markrogoyski / math-php / 23440034522

23 Mar 2026 01:35PM UTC coverage: 99.266% (-0.4%) from 99.66%
23440034522

Pull #488

github

web-flow
Merge 7244aa3c4 into 1b53720fc
Pull Request #488: Add multilinear regression

142 of 171 new or added lines in 14 files covered. (83.04%)

7 existing lines in 3 files now uncovered.

8657 of 8721 relevant lines covered (99.27%)

222.35 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.87
/src/Statistics/Regression/Methods/LeastSquares.php
1
<?php
2

3
namespace MathPHP\Statistics\Regression\Methods;
4

5
use LogicException;
6
use MathPHP\Exception;
7
use MathPHP\Exception\BadDataException;
8
use MathPHP\Exception\BadParameterException;
9
use MathPHP\Exception\IncorrectTypeException;
10
use MathPHP\Functions\Map\Multi;
11
use MathPHP\Functions\Map\Single;
12
use MathPHP\LinearAlgebra\MatrixFactory;
13
use MathPHP\LinearAlgebra\NumericMatrix;
14
use MathPHP\Polynomials\MonomialExponentGenerator;
15
use MathPHP\Probability\Distribution\Continuous\F;
16
use MathPHP\Probability\Distribution\Continuous\StudentT;
17
use MathPHP\Statistics\RandomVariable;
18
use MathPHP\Statistics\Regression\Regression;
19
use MathPHP\Util\Script;
20

21
/**
22
 * @phpstan-type SimpleLinearResultModel array{m: float, b: float}
23
 * @phpstan-type PolynomialResultModel array<string, float>
24
 *
25
 * @template-covariant TResultModel of array // SimpleLinearResultModel|PolynomialResultModel
26
 */
27
trait LeastSquares
28
{
29
    /**
30
     * Regression ys
31
     * Since the actual xs may be translated for regression, we need to keep these
32
     * handy for regression statistics.
33
     * @var array<float>
34
     */
35
    private $reg_ys;
36

37
    /**
38
     * Regression xs or xss
39
     * Since the actual xs may be translated for regression, we need to keep these
40
     * handy for regression statistics.
41
     * @var array<float>|array<array<float>>
42
     */
43
    private $reg_xs;
44

45
    /**
46
     * Regression Yhat
47
     * The Yhat for the regression xs.
48
     * @var array<float>
49
     */
50
    private $reg_Yhat;
51

52
    /**
53
     * Projection Matrix
54
     * https://en.wikipedia.org/wiki/Projection_matrix
55
     *
56
     * @var NumericMatrix|null
57
     */
58
    private $reg_P = null;
59

60
    /** @var int */
61
    private $fit_constant = 1;
62

63
    /**
64
     * Number of columns in xss. (Field definition must match Regression::$k.)
65
     * @see Regression::$k
66
     * @var int
67
     */
68
    protected $k;
69

70
    /** @var int */
71
    private $p;
72

73
    /** @var int Degrees of freedom */
74
    private $ν;
75

76
    /** @var int Number of terms */
77
    private $t;
78

79
    /** @var NumericMatrix */
80
    private $⟮XᵀX⟯⁻¹;
81

82
    /**
83
     * Linear least squares fitting using Matrix algebra (Polynomial).
84
     *
85
     * Generalizing from a straight line (first degree polynomial) to a kᵗʰ degree polynomial:
86
     *  y = a₀ + a₁x + ⋯ + akxᵏ
87
     *
88
     * Leads to equations in matrix form:
89
     *  [n    Σxᵢ   ⋯  Σxᵢᵏ  ] [a₀]   [Σyᵢ   ]
90
     *  [Σxᵢ  Σxᵢ²  ⋯  Σxᵢᵏ⁺¹] [a₁]   [Σxᵢyᵢ ]
91
     *  [ ⋮     ⋮    ⋱  ⋮    ] [ ⋮ ] = [ ⋮    ]
92
     *  [Σxᵢᵏ Σxᵢᵏ⁺¹ ⋯ Σxᵢ²ᵏ ] [ak]   [Σxᵢᵏyᵢ]
93
     *
94
     * This is a Vandermonde matrix:
95
     *  [1 x₁ ⋯ x₁ᵏ] [a₀]   [y₁]
96
     *  [1 x₂ ⋯ x₂ᵏ] [a₁]   [y₂]
97
     *  [⋮  ⋮  ⋱ ⋮ ] [ ⋮ ] = [ ⋮]
98
     *  [1 xn ⋯ xnᵏ] [ak]   [yn]
99
     *
100
     * Can write as equation:
101
     *  y = Xa
102
     *
103
     * Solve by premultiplying by transpose Xᵀ:
104
     *  Xᵀy = XᵀXa
105
     *
106
     * Invert to yield vector solution:
107
     *  a = (XᵀX)⁻¹Xᵀy
108
     *
109
     * (http://mathworld.wolfram.com/LeastSquaresFittingPolynomial.html)
110
     *
111
     * For reference, the traditional way to do least squares:
112
     *        _ _   __
113
     *        x y - xy        _    _
114
     *   m = _________    b = y - mx
115
     *        _     __
116
     *       (x)² - x²
117
     *
118
     * @param  array<float> $ys y values
119
     * @param  array<float>|array<array<float>> $xs x values
120
     * @param  int   $order The order of the polynomial. 1 = linear, 2 = x², etc
121
     * @param  int   $fit_constant '1' if we are fitting a constant to the regression.
122
         * @param  bool  $calculate_projection true whether to calculate the projection matrix.
123
     *
124
     * @return NumericMatrix [[m], [b]]
125
     *
126
     * @throws Exception\MathException
127
     */
128
    public function leastSquares(array $ys, array $xs, int $order = 1, int $fit_constant = 1, bool $calculate_projection = true): NumericMatrix
129
    {
130
        $this->reg_ys = $ys;
116✔
131
        $this->reg_xs = $xs;
116✔
132
        $this->fit_constant = $fit_constant;
116✔
133
        $this->p = $order;
116✔
134

135
        $number_of_terms = MonomialExponentGenerator::getNumberOfTerms($this->k, $order);
116✔
136
        if ($fit_constant === 0) {
116✔
137
            $number_of_terms--;
27✔
138
        }
139
        $this->ν = $this->n - $number_of_terms;
116✔
140
        $this->t = $number_of_terms;
116✔
141

142
        if ($this->ν <= 0) {
116✔
143
            throw new Exception\BadDataException('Degrees of freedom ν must be > 0. Computed to be ' . $this->ν);
4✔
144
        }
145

146
        // y = Xa
147
        $X = $this->createDesignMatrix($xs);
112✔
148
        /** @var NumericMatrix $y */
149
        $y = MatrixFactory::createFromColumnVector($ys);
112✔
150

151
        // a = (XᵀX)⁻¹Xᵀy
152
        $Xᵀ           = $X->transpose();
112✔
153
        $this->⟮XᵀX⟯⁻¹ = $Xᵀ->multiply($X)->inverse();
112✔
154
        $temp_matrix  = $this->⟮XᵀX⟯⁻¹->multiply($Xᵀ);
112✔
155
        $this->reg_P  = $calculate_projection ? $X->multiply($temp_matrix) : null;
112✔
156
        $β_hat        = $temp_matrix->multiply($y);
112✔
157

158
        $this->reg_Yhat = $X->multiply($β_hat)->getColumn(0);
112✔
159

160
        return $β_hat;
112✔
161
    }
162

163
    /**
164
     * @param list<float> $array
165
     * @return TResultModel
166
     */
167
    abstract protected function createResultModel(array $array): array;
168

169
    /**
170
     * Creates a result model of shape [m, b] for simple linear regression.
171
     *
172
     * @param list<float> $array
173
     * @return SimpleLinearResultModel
174
     */
175
    protected function createSimpleLinearResultModel(array $array): array
176
    {
177
        return [
1✔
178
            'm' => $array[1],
1✔
179
            'b' => $array[0],
1✔
180
        ];
1✔
181
    }
182

183
    /**
184
     * Creates a result model of shape [β₀, β₁, ..., βn].
185
     *
186
     * @param list<float> $array
187
     * @return PolynomialResultModel
188
     */
189
    protected function createPolynomialResultModel(array $array): array
190
    {
NEW
191
        $keys = \array_map(static function (int $i) {
×
NEW
192
            return 'β' . Script::getSubscript($i);
×
NEW
193
        }, \range(0, \count($array) - 1));
×
NEW
194
        return \array_combine($keys, $array);
×
195
    }
196

197
    /**
198
     * The Design Matrix contains all the independent variables needed for the least squares regression
199
     *
200
     * https://en.wikipedia.org/wiki/Design_matrix
201
     *
202
     * @param mixed $xs
203
     *
204
     * @return NumericMatrix (Vandermonde)
205
     *
206
     * @throws Exception\BadDataException
207
     * @throws Exception\IncorrectTypeException
208
     * @throws Exception\MathException
209
     * @throws Exception\MatrixException
210
     */
211
    public function createDesignMatrix($xs): NumericMatrix
212
    {
213
        if (\is_int($xs) || \is_float($xs)) {
112✔
214
            $xs = [$xs];
13✔
215
        }
216

217
        /** @var array<float> $xs */
218
        $X = MatrixFactory::vandermonde($xs, $this->p + 1);
112✔
219
        if ($this->fit_constant == 0) {
112✔
220
            $X = $X->columnExclude(0);
27✔
221
        }
222

223
        return $X;
112✔
224
    }
225

226
    /**
227
     * Project matrix (influence matrix, hat matrix H)
228
     * Maps the vector of response values (dependent variable values) to the vector of fitted values (or predicted values).
229
     * The diagonal elements of the projection matrix are the leverages.
230
     * https://en.wikipedia.org/wiki/Projection_matrix
231
     *
232
     * H = X⟮XᵀX⟯⁻¹Xᵀ
233
     *   where X is the design matrix
234
     *
235
     * @return NumericMatrix
236
     */
237
    public function getProjectionMatrix(): NumericMatrix
238
    {
239
                $reg_P = $this->reg_P;
6✔
240
                if ($reg_P === null) {
6✔
NEW
241
                        throw new LogicException('Projection matrix is not calculated. Call leastSquares() with calculate_projection=true first.');
×
242
                }
243
        return $reg_P;
6✔
244
    }
245

246
    /**
247
     * Regression Leverages
248
     * A measure of how far away the independent variable values of an observation are from those of the other observations.
249
     * https://en.wikipedia.org/wiki/Leverage_(statistics)
250
     *
251
     * Leverage score for the i-th data unit is defined as:
252
     * hᵢᵢ = [H]ᵢᵢ
253
     * which is the i-th diagonal element of the project matrix H,
254
     * where H = X⟮XᵀX⟯⁻¹Xᵀ where X is the design matrix.
255
     *
256
     * @return array<int|float>
257
     */
258
    public function leverages(): array
259
    {
260
        return $this->getProjectionMatrix()->getDiagonalElements();
5✔
261
    }
262

263
    /**************************************************************************
264
     * Sum Of Squares
265
     *************************************************************************/
266

267
    /**
268
     * SSreg - The Sum Squares of the regression (Explained sum of squares)
269
     *
270
     * The sum of the squares of the deviations of the predicted values from
271
     * the mean value of a response variable, in a standard regression model.
272
     * https://en.wikipedia.org/wiki/Explained_sum_of_squares
273
     *
274
     * SSreg = ∑(ŷᵢ - ȳ)²
275
     * When a constant is fit to the regression, the average of y = average of ŷ.
276
     *
277
     * In the case where the constant is not fit, we use the sum of squares of the predicted value
278
     * SSreg = ∑ŷᵢ²
279
     *
280
     * @return float
281
     *
282
     * @throws Exception\BadDataException
283
     */
284
    public function sumOfSquaresRegression(): float
285
    {
286
        if ($this->fit_constant == 1) {
20✔
287
            return RandomVariable::sumOfSquaresDeviations($this->yHat());
18✔
288
        }
289
        return \array_sum(Single::square($this->reg_Yhat));
2✔
290
    }
291

292
    /**
293
     * SSres - The Sum Squares of the residuals (RSS - Residual sum of squares)
294
     *
295
     * The sum of the squares of residuals (deviations predicted from actual
296
     * empirical values of data). It is a measure of the discrepancy between
297
     * the data and an estimation model.
298
     * https://en.wikipedia.org/wiki/Residual_sum_of_squares
299
     *
300
     * SSres = ∑(yᵢ - f(xᵢ))²
301
     *       = ∑(yᵢ - ŷᵢ)²
302
     *
303
     *  where yᵢ is an observed value
304
     *        ŷᵢ is a value predicted by the regression model
305
     *
306
     * @return float
307
     */
308
    public function sumOfSquaresResidual(): float
309
    {
310
        $Ŷ = $this->reg_Yhat;
38✔
311
        return \array_sum(\array_map(
38✔
312
            function ($yᵢ, $ŷᵢ) {
38✔
313
                return ($yᵢ - $ŷᵢ) ** 2;
38✔
314
            },
38✔
315
            $this->reg_ys,
38✔
316
            $Ŷ
38✔
317
        ));
38✔
318
    }
319

320
    /**
321
     * SStot - The total Sum Squares
322
     *
323
     * the sum, over all observations, of the squared differences of
324
     * each observation from the overall mean.
325
     * https://en.wikipedia.org/wiki/Total_sum_of_squares
326
     *
327
     * For Simple Linear Regression
328
     * SStot = ∑(yᵢ - ȳ)²
329
     *
330
     * For Regression through a point
331
     * SStot = ∑yᵢ²
332
     *
333
     * @return float
334
     *
335
     * @throws Exception\BadDataException
336
     */
337
    public function sumOfSquaresTotal(): float
338
    {
339
        return $this->sumOfSquaresResidual() + $this->sumOfSquaresRegression();
8✔
340
    }
341

342
    /***************************************************************************
343
     * Mean Square Errors
344
     *
345
     * The mean square errors are the sum of squares divided by their
346
     * individual degrees of freedom.
347
     *
348
     * Source    |     df
349
     * ----------|--------------
350
     * SSTO      |    n - 1
351
     * SSE       |    n - p - 1
352
     * SSR       |    p
353
     **************************************************************************/
354

355
    /**
356
     * Mean square regression
357
     * MSR = SSᵣ / k
358
     *
359
     * @return float
360
     *
361
     * @throws Exception\BadDataException
362
     */
363
    public function meanSquareRegression(): float
364
    {
365
        $k   = $this->k;
3✔
366
        $SSᵣ = $this->sumOfSquaresRegression();
3✔
367
        $MSR = $SSᵣ / $k;
3✔
368

369
        return $MSR;
3✔
370
    }
371

372
    /**
373
     * Mean of squares for error
374
     * MSE = SSₑ / ν
375
     *
376
     * @return float
377
     */
378
    public function meanSquareResidual(): float
379
    {
380
        $ν   = $this->ν;
21✔
381
        $SSₑ = $this->sumOfSquaresResidual();
21✔
382
        $MSE = $SSₑ / $ν;
21✔
383

384
        return $MSE;
21✔
385
    }
386

387
    /**
388
     * Mean of squares total
389
     * MSTO = SSOT / (n - 1)
390
     *
391
     * @return float
392
     *
393
     * @throws Exception\BadDataException
394
     */
395
    public function meanSquareTotal(): float
396
    {
397
        $MSTO = $this->sumOfSquaresTotal() / ($this->n - $this->fit_constant);
1✔
398

399
        return $MSTO;
1✔
400
    }
401

402
    /**
403
     * Error Standard Deviation
404
     *
405
     * Also called the standard error of the residuals
406
     *
407
     * @return float
408
     */
409
    public function errorSd(): float
410
    {
411
        return \sqrt($this->meanSquareResidual());
1✔
412
    }
413

414
    /**
415
     * The degrees of freedom of the regression
416
     *
417
     * @return float
418
     */
419
    public function degreesOfFreedom(): float
420
    {
421
        return $this->ν;
5✔
422
    }
423

424
    /**
425
     * Standard error of the regression parameters (coefficients)
426
     *
427
     *              _________
428
     *             /  ∑eᵢ²
429
     *            /  -----
430
     * se(m) =   /     ν
431
     *          /  ---------
432
     *         √   ∑⟮xᵢ - μ⟯²
433
     *
434
     *  where
435
     *    eᵢ = residual (difference between observed value and value predicted by the model)
436
     *    ν  = n - 2  degrees of freedom
437
     *
438
     *           ______
439
     *          / ∑xᵢ²
440
     * se(b) = /  ----
441
     *        √    n
442
     *
443
     * @return TResultModel E.g., [m => se(m), b => se(b)] for simple linear regression
444
     *
445
     * @throws Exception\BadParameterException
446
     * @throws Exception\IncorrectTypeException
447
     */
448
    public function standardErrors(): array
449
    {
NEW
450
        return $this->createResultModel($this->standardErrorsArray());
×
451
    }
452

453
    /**
454
     * @return list<float>
455
     * @throws BadParameterException
456
     * @throws IncorrectTypeException
457
     */
458
    public function standardErrorsArray(): array
459
    {
460
        $⟮XᵀX⟯⁻¹ = $this->⟮XᵀX⟯⁻¹;
1✔
461
        $σ²     = $this->meanSquareResidual();
1✔
462

463
        $standard_error_matrix = $⟮XᵀX⟯⁻¹->scalarMultiply($σ²);
1✔
464
        return Single::sqrt($standard_error_matrix->getDiagonalElements());
1✔
465
    }
466

467
    /**
468
     * Regression variance
469
     *
470
     * @param  float $x
471
     *
472
     * @return float
473
     *
474
     * @throws Exception\MatrixException
475
     * @throws Exception\IncorrectTypeException
476
     */
477
    public function regressionVariance(float $x): float
478
    {
479
        $X      = $this->createDesignMatrix($x);
13✔
480
        $⟮XᵀX⟯⁻¹ = $this->⟮XᵀX⟯⁻¹;
13✔
481
        $M      = $X->multiply($⟮XᵀX⟯⁻¹)->multiply($X->transpose());
13✔
482

483
        return $M[0][0];
13✔
484
    }
485

486
    /**
487
     * Get the regression residuals
488
     * eᵢ = yᵢ - ŷᵢ
489
     * or in matrix form
490
     * e = (I - H)y
491
     *
492
     * @return array<float>
493
     *
494
     * @throws BadDataException
495
     */
496
    public function residuals(): array
497
    {
498
        return Multi::subtract($this->reg_ys, $this->reg_Yhat);
4✔
499
    }
500

501
    /**
502
     * Cook's Distance
503
     * A measures of the influence of each data point on the regression.
504
     * Points with excessive influence may be outliers, or may warrent a closer look.
505
     *
506
     * https://en.wikipedia.org/wiki/Cook%27s_distance
507
     *
508
     *           _         _
509
     *      eᵢ² |     hᵢ    |
510
     * Dᵢ = --- | --------- |
511
     *      s²p |_(1 - hᵢ)²_|
512
     *
513
     *   where s ≡ (n - p)⁻¹eᵀ  (mean square residuals)
514
     *         e is the mean square error of the residual model
515
     *
516
     * @return array<float>
517
     *
518
     * @throws BadDataException
519
     */
520
    public function cooksD(): array
521
    {
522
        $e   = $this->residuals();
4✔
523
        $h   = $this->leverages();
4✔
524
        $mse = $this->meanSquareResidual();
4✔
525
        $t   = $this->t;
4✔
526

527
        return \array_map(
4✔
528
            function ($eᵢ, $hᵢ) use ($mse, $t) {
4✔
529
                if ((1 - $hᵢ) == 0) {
4✔
NEW
530
                    return INF;
×
531
                }
532
                return ($eᵢ ** 2 / $mse / $t) * ($hᵢ / (1 - $hᵢ) ** 2);
4✔
533
            },
4✔
534
            $e,
4✔
535
            $h
4✔
536
        );
4✔
537
    }
538

539
    /**
540
     * DFFITS
541
     * Measures the effect on the regression if each data point is excluded.
542
     * https://en.wikipedia.org/wiki/DFFITS
543
     *
544
     *          ŷᵢ - ŷᵢ₍ᵢ₎
545
     * DFFITS = ----------
546
     *          s₍ᵢ₎ √hᵢᵢ
547
     *
548
     *   where ŷᵢ    is the prediction for point i with i included in the regression
549
     *         ŷᵢ₍ᵢ₎ is the prediction for point i without i included in the regression
550
     *         s₍ᵢ₎  is the standard error estimated without the point in question
551
     *         hᵢᵢ   is the leverage for the point
552
     *
553
     * Putting it another way:
554
     *
555
     * sᵢ is the studentized residual
556
     *
557
     *             eᵢ
558
     * sᵢ = --------------
559
     *        √(MSₑ(1 - hᵢ))
560
     *
561
     *   where eᵢ  is the residual
562
     *         MSₑ is the mean squares residual
563
     *
564
     * Then, s₍ᵢ₎ is the studentized residual with the i-th observation removed:
565
     *
566
     *               eᵢ
567
     * s₍ᵢ₎ = -----------------
568
     *        √(MSₑ₍ᵢ₎(1 - hᵢ))
569
     *
570
     * where
571
     *          _                _
572
     *         |           eᵢ²    |   ν
573
     * MSₑ₍ᵢ₎ =|  MSₑ - --------  | -----
574
     *         |_       (1 - h)ν _| ν - 1
575
     *
576
     * Then,
577
     *                  ______
578
     *                 /  hᵢ
579
     * DFFITS = s₍ᵢ₎  / ------
580
     *               √  1 - hᵢ
581
     *
582
     * @return array<float>
583
     *
584
     * @throws BadDataException
585
     */
586
    public function dffits(): array
587
    {
588
        $ν   = $this->ν;
1✔
589
        $h   = $this->leverages();
1✔
590
        $e   = $this->residuals();
1✔
591
        $MSₑ = $this->meanSquareResidual();
1✔
592

593
        // Mean square residuals with the the i-th observation removed
594
        $MSₑ₍ᵢ₎ = \array_map(
1✔
595
            function ($eᵢ, $hᵢ) use ($MSₑ, $ν) {
1✔
596
                return ($MSₑ - ($eᵢ ** 2 / ((1 - $hᵢ) * $ν))) * ($ν / ($ν - 1));
1✔
597
            },
1✔
598
            $e,
1✔
599
            $h
1✔
600
        );
1✔
601

602
        // Studentized residual with the i-th observation removed
603
        $s = \array_map(
1✔
604
            function ($eᵢ, $mseᵢ, $hᵢ) {
1✔
605
                return $eᵢ / \sqrt($mseᵢ * (1 - $hᵢ));
1✔
606
            },
1✔
607
            $e,
1✔
608
            $MSₑ₍ᵢ₎,
1✔
609
            $h
1✔
610
        );
1✔
611

612
        $DFFITS = \array_map(
1✔
613
            function ($s₍ᵢ₎, $hᵢ) {
1✔
614
                return $s₍ᵢ₎ * \sqrt($hᵢ / (1 - $hᵢ));
1✔
615
            },
1✔
616
            $s,
1✔
617
            $h
1✔
618
        );
1✔
619

620
        return $DFFITS;
1✔
621
    }
622

623
    /**
624
     * R - correlation coefficient (Pearson's r)
625
     *
626
     * A measure of the strength and direction of the linear relationship
627
     * between two variables
628
     * that is defined as the (sample) covariance of the variables
629
     * divided by the product of their (sample) standard deviations.
630
     *
631
     *      n∑⟮xy⟯ − ∑⟮x⟯∑⟮y⟯
632
     * --------------------------------
633
     * √[(n∑x² − ⟮∑x⟯²)(n∑y² − ⟮∑y⟯²)]
634
     *
635
     * @return float
636
     *
637
     * @throws BadDataException
638
     */
639
    public function correlationCoefficient(): float
640
    {
641
        return \sqrt($this->coefficientOfDetermination());
4✔
642
    }
643

644
    /**
645
     * R - correlation coefficient
646
     * Convenience wrapper for correlationCoefficient
647
     *
648
     * @return float
649
     *
650
     * @throws BadDataException
651
     */
652
    public function r(): float
653
    {
654
        return $this->correlationCoefficient();
2✔
655
    }
656

657
    /**
658
     * R² - coefficient of determination
659
     *
660
     * Indicates the proportion of the variance in the dependent variable
661
     * that is predictable from the independent variable.
662
     * Range of 0 - 1. Close to 1 means the regression line is a good fit
663
     * https://en.wikipedia.org/wiki/Coefficient_of_determination
664
     *
665
     * @return float
666
     *
667
     * @throws BadDataException
668
     */
669
    public function coefficientOfDetermination(): float
670
    {
671
        return $this->sumOfSquaresRegression() / ($this->sumOfSquaresRegression() + $this->sumOfSquaresResidual());
8✔
672
    }
673

674
    /**
675
     * R² - coefficient of determination
676
     * Convenience wrapper for coefficientOfDetermination
677
     *
678
     * @return float
679
     *
680
     * @throws BadDataException
681
     */
682
    public function r2(): float
683
    {
684
        return $this->coefficientOfDetermination();
2✔
685
    }
686

687
    /**
688
     * The t values associated with each of the regression parameters (coefficients)
689
     *
690
     *       β
691
     * t = -----
692
     *     se(β)
693
     *
694
     *  where:
695
     *    β     = regression parameter (coefficient)
696
     *    se(β) = standard error of the regression parameter (coefficient)
697
     *
698
     * @return TResultModel E.g., [m => t, b => t] for simple linear regression
699
     *
700
     * @throws BadParameterException
701
     * @throws IncorrectTypeException
702
     */
703
    public function tValues(): array
704
    {
NEW
705
        return $this->createResultModel($this->tValuesArray());
×
706
    }
707

708
    /**
709
     * @return list<float>
710
     * @throws BadParameterException
711
     * @throws IncorrectTypeException
712
     */
713
    public function tValuesArray(): array
714
    {
715
        $se = $this->standardErrorsArray();
1✔
716
        $parameters = $this->parameters;
1✔
717

718
        return \array_map(
1✔
719
            static function (float $parameter, float $se) {
1✔
720
                return $parameter / $se;
1✔
721
            },
1✔
722
            $parameters,
1✔
723
            $se
1✔
724
        );
1✔
725
    }
726

727
    /**
728
     * The probabilty associated with each parameter's t value
729
     *
730
     * t probability = Student's T CDF(t,ν)
731
     *
732
     *  where:
733
     *    t = t value
734
     *    ν = n - p - alpha  degrees of freedom
735
     *
736
     *  alpha = 1 if the regression includes a constant term
737
     *
738
     * @return TResultModel E.g., [m => p, b => p] for simple linear regression
739
     *
740
     * @throws BadParameterException
741
     * @throws IncorrectTypeException
742
     */
743
    public function tProbability(): array
744
    {
745
        return $this->createResultModel($this->tProbabilityArray());
1✔
746
    }
747

748
    /**
749
     * @return list<float>
750
     * @throws BadParameterException
751
     * @throws IncorrectTypeException
752
     */
753
    public function tProbabilityArray(): array
754
    {
755
        $ν  = $this->ν;
1✔
756
        $t  = $this->tValuesArray();
1✔
757

758
        $studentT = new StudentT($ν);
1✔
759
        return \array_map(
1✔
760
            static function (float $value) use ($studentT) {
1✔
761
                return $studentT->cdf($value);
1✔
762
            },
1✔
763
            $t
1✔
764
        );
1✔
765
    }
766

767
    /**
768
     * The F statistic of the regression (F test)
769
     *
770
     *      MSm      SSᵣ/p
771
     * F₀ = --- = -----------
772
     *      MSₑ   SSₑ/(n - p - α)
773
     *
774
     *  where:
775
     *    MSm = mean square model (regression mean square) = SSᵣ / df(SSᵣ) = SSᵣ/p
776
     *    MSₑ = mean square error (estimate of variance σ² of the random error)
777
     *        = SSₑ/(n - p - α)
778
     *    p   = the order of the fitted polynomial
779
     *    α   = 1 if the model includes a constant term, 0 otherwise. (p+α = total number of model parameters)
780
     *    SSᵣ = sum of squares of the regression
781
     *    SSₑ = sum of squares of residuals
782
     *
783
     * @return float
784
     *
785
     * @throws BadDataException
786
     */
787
    public function fStatistic(): float
788
    {
789
        $F = $this->meanSquareRegression() / $this->meanSquareResidual();
2✔
790
        return $F;
2✔
791
    }
792

793
    /**
794
     * The probabilty associated with the regression F Statistic
795
     *
796
     * F probability = F distribution CDF(F,d₁,d₂)
797
     *
798
     *  where:
799
     *    F  = F statistic
800
     *    d₁ = degrees of freedom 1
801
     *    d₂ = degrees of freedom 2
802
     *
803
     *    ν  = degrees of freedom
804
     *
805
     * @return float
806
     *
807
     * @throws BadDataException
808
     */
809
    public function fProbability(): float
810
    {
811
        $F = $this->fStatistic();
1✔
812
        $n = $this->n;
1✔
813

814
        // Degrees of freedom
815
        // Need to make sure the 1 in $d₁ should not be $this->fit_parameters;
816
        $ν  = $this->ν;
1✔
817
        $d₁ = $n - $ν - 1;
1✔
818
        $d₂ = $ν;
1✔
819

820
        $fDist = new F($d₁, $d₂);
1✔
821
        return ($fDist->cdf($F));
1✔
822
    }
823

824
    /**
825
     * The confidence interval of the regression for Simple Linear Regression
826
     *                      ______________
827
     *                     /1   (x - x̄)²
828
     * CI(x,p) = t * sy * / - + --------
829
     *                   √  n     SSx
830
     *
831
     * Where:
832
     *   t is the critical t for the p value
833
     *   sy is the estimated standard deviation of y
834
     *   n is the number of data points
835
     *   x̄ is the average of the x values
836
     *   SSx = ∑(x - x̄)²
837
     *
838
     * If $p = .05, then we can say we are 95% confidence the actual regression line
839
     * will be within an interval of evaluate($x) ± CI($x, .05).
840
     *
841
     * @param float $x
842
     * @param float $p:  0 < p < 1 The P value to use
843
     *
844
     * @return float
845
     *
846
     * @throws Exception\MatrixException
847
     * @throws Exception\IncorrectTypeException
848
     */
849
    public function ci(float $x, float $p): float
850
    {
851
        $V  = $this->regressionVariance($x);
7✔
852
        $σ² = $this->meanSquareResidual();
7✔
853

854
        // The t-value
855
        $studentT = new StudentT($this->ν);
7✔
856
        $t = $studentT->inverse2Tails($p);
7✔
857

858
        return $t * \sqrt($σ² * $V);
7✔
859
    }
860

861
    /**
862
     * The prediction interval of the regression
863
     *                        _________________
864
     *                       /1    1   (x - x̄)²
865
     * PI(x,p,q) = t * sy * / - +  - + --------
866
     *                     √  q    n     SSx
867
     *
868
     * Where:
869
     *   t is the critical t for the p value
870
     *   sy is the estimated standard deviation of y
871
     *   q is the number of replications
872
     *   n is the number of data points
873
     *   x̄ is the average of the x values
874
     *   SSx = ∑(x - x̄)²
875
     *
876
     * If $p = .05, then we can say we are 95% confidence that the future averages of $q trials at $x
877
     * will be within an interval of evaluate($x) ± PI($x, .05, $q).
878
     *
879
     * @param float $x
880
     * @param float $p  0 < p < 1 The P value to use
881
     * @param int   $q  Number of trials
882
     *
883
     * @return float
884
     *
885
     * @throws Exception\MatrixException
886
     * @throws Exception\IncorrectTypeException
887
     */
888
    public function pi(float $x, float $p, int $q = 1): float
889
    {
890
        $V  = $this->regressionVariance($x) + 1 / $q;
6✔
891
        $σ² = $this->meanSquareResidual();
6✔
892

893
        // The t-value
894
        $studentT = new StudentT($this->ν);
6✔
895
        $t = $studentT->inverse2Tails($p);
6✔
896

897
        return $t * \sqrt($σ² * $V);
6✔
898
    }
899
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc