• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

INGEOTEC / CompStats / 21404899284

27 Jan 2026 04:10PM UTC coverage: 97.809% (+0.004%) from 97.805%
21404899284

push

github

mgraffg
Bug

2 of 2 new or added lines in 2 files covered. (100.0%)

10 existing lines in 1 file now uncovered.

1518 of 1552 relevant lines covered (97.81%)

2.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.48
/CompStats/interface.py
1
# Copyright 2025 Sergio Nava Muñoz and Mario Graff Guerrero
2

3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6

7
#     http://www.apache.org/licenses/LICENSE-2.0
8

9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14
from dataclasses import dataclass
3✔
15
from sklearn.metrics import balanced_accuracy_score
3✔
16
from sklearn.base import clone
3✔
17
import pandas as pd
3✔
18
import numpy as np
3✔
19
from CompStats.bootstrap import StatisticSamples
3✔
20
from CompStats.utils import progress_bar
3✔
21
from CompStats import measurements
3✔
22
from CompStats.measurements import SE, CI
3✔
23
from CompStats.utils import dataframe
3✔
24

25

26
class Perf(object):
3✔
27
    """Perf is an entry point to CompStats
28

29
    :param y_true: True measurement or could be a pandas.DataFrame where column label 'y' corresponds to the true measurement.
30
    :type y_true: numpy.ndarray or pandas.DataFrame
31
    :param score_func: Function to measure the performance, it is assumed that the best algorithm has the highest value.
32
    :type score_func: Function where the first argument is :math:`y` and the second is :math:`\\hat{y}.`
33
    :param error_func: Function to measure the performance where the best algorithm has the lowest value.
34
    :type error_func: Function where the first argument is :math:`y` and the second is :math:`\\hat{y}.` 
35
    :param y_pred: Predictions, the algorithms will be identified with alg-k where k=1 is the first argument included in :py:attr:`args.`
36
    :type y_pred: numpy.ndarray
37
    :param kwargs: Predictions, the algorithms will be identified using the keyword
38
    :type kwargs: numpy.ndarray
39
    :param n_jobs: Number of jobs to compute the statistic, default=-1 corresponding to use all threads.
40
    :type n_jobs: int
41
    :param num_samples: Number of bootstrap samples, default=500.
42
    :type num_samples: int
43
    :param use_tqdm: Whether to use tqdm.tqdm to visualize the progress, default=True.
44
    :type use_tqdm: bool
45

46

47
    >>> from sklearn.svm import LinearSVC
48
    >>> from sklearn.linear_model import LogisticRegression
49
    >>> from sklearn.ensemble import RandomForestClassifier
50
    >>> from sklearn.datasets import load_iris
51
    >>> from sklearn.model_selection import train_test_split
52
    >>> from sklearn.base import clone
53
    >>> from CompStats.interface import Perf
54
    >>> X, y = load_iris(return_X_y=True)
55
    >>> _ = train_test_split(X, y, test_size=0.3)
56
    >>> X_train, X_val, y_train, y_val = _
57
    >>> m = LinearSVC().fit(X_train, y_train)
58
    >>> hy = m.predict(X_val)
59
    >>> ens = RandomForestClassifier().fit(X_train, y_train)
60
    >>> perf = Perf(y_val, hy, forest=ens.predict(X_val))
61
    >>> perf
62
    <Perf>
63
    Statistic with its standard error (se)
64
    statistic (se)
65
    0.9792 (0.0221) <= alg-1
66
    0.9744 (0.0246) <= forest
67
    
68
    If an algorithm's prediction is missing, this can be included by calling the instance, as can be seen in the following instruction. Note that the algorithm's name can also be given with the keyword :py:attr:`name.`
69

70
    >>> lr = LogisticRegression().fit(X_train, y_train)
71
    >>> perf(lr.predict(X_val), name='Log. Reg.')
72
    <Perf>
73
    Statistic with its standard error (se)
74
    statistic (se)
75
    1.0000 (0.0000) <= Log. Reg.
76
    0.9792 (0.0221) <= alg-1
77
    0.9744 (0.0246) <= forest
78
    
79
    The performance function used to compare the algorithms can be changed, and the same bootstrap samples would be used if the instance were cloned. Consequently, the values are computed using the same samples, as can be seen in the following example.
80

81
    >>> perf_error = clone(perf)
82
    >>> perf_error.error_func = lambda y, hy: (y != hy).mean()
83
    >>> perf_error
84
    <Perf>
85
    Statistic with its standard error (se)
86
    statistic (se)
87
    0.0000 (0.0000) <= Log. Reg.
88
    0.0222 (0.0237) <= alg-1
89
    0.0222 (0.0215) <= forest
90

91
    """
92
    def __init__(self, y_true, *y_pred,
3✔
93
                 name:str=None,
94
                 score_func=balanced_accuracy_score,
95
                 error_func=None,
96
                 num_samples: int=500,
97
                 n_jobs: int=-1,
98
                 use_tqdm=True,
99
                 **kwargs):
100
        assert (score_func is None) ^ (error_func is None)
3✔
101
        self.score_func = score_func
3✔
102
        self.error_func = error_func
3✔
103
        algs = {}
3✔
104
        if name is not None:
3✔
105
            if isinstance(name, str):
3✔
106
                name = [name]
3✔
107
        else:
108
            name = [f'alg-{k+1}' for k, _ in enumerate(y_pred)]
3✔
109
        for key, v in zip(name, y_pred):
3✔
110
            algs[key] = np.asanyarray(v)
3✔
111
        algs.update(**kwargs)
3✔
112
        self.predictions = algs
3✔
113
        self.y_true = y_true
3✔
114
        self.num_samples = num_samples
3✔
115
        self.n_jobs = n_jobs
3✔
116
        self.use_tqdm = use_tqdm
3✔
117
        self.sorting_func = np.linalg.norm
3✔
118
        self._init()
3✔
119

120
    def _init(self):
3✔
121
        """Compute the bootstrap statistic"""
122

123
        bib = True if self.score_func is not None else False
3✔
124
        if hasattr(self, '_statistic_samples'):
3✔
125
            _ = self.statistic_samples
×
126
            _.BiB = bib
×
127
        else:
128
            _ = StatisticSamples(statistic=self.statistic_func,
3✔
129
                                 n_jobs=self.n_jobs,
130
                                 num_samples=self.num_samples,
131
                                 BiB=bib)
132
            _.samples(N=self.y_true.shape[0])
3✔
133
        self.statistic_samples = _
3✔
134

135
    def get_params(self):
3✔
136
        """Parameters"""
137

138
        return dict(y_true=self.y_true,
3✔
139
                    score_func=self.score_func,
140
                    error_func=self.error_func,
141
                    num_samples=self.num_samples,
142
                    n_jobs=self.n_jobs)
143

144
    def __sklearn_clone__(self):
3✔
145
        klass = self.__class__
3✔
146
        params = self.get_params()
3✔
147
        ins = klass(**params)
3✔
148
        ins.predictions = dict(self.predictions)
3✔
149
        ins._statistic_samples._samples = self.statistic_samples._samples
3✔
150
        ins.sorting_func = self.sorting_func
3✔
151
        return ins
3✔
152

153
    def __repr__(self):
3✔
154
        """Prediction statistics with standard error in parenthesis"""
155
        arg = 'score_func' if self.error_func is None else 'error_func'
3✔
156
        func_name = self.statistic_func.__name__
3✔
157
        statistic = self.statistic
3✔
158
        if isinstance(statistic, dict):
3✔
159
            return f"<{self.__class__.__name__}({arg}={func_name})>\n{self}"
×
160
        elif isinstance(statistic, float):
3✔
161
            return f"<{self.__class__.__name__}({arg}={func_name}, statistic={statistic:0.4f}, se={self.se:0.4f})>"
3✔
162
        desc = [f'{k:0.4f}' for k in statistic]
3✔
163
        desc = ', '.join(desc)
3✔
164
        desc_se = [f'{k:0.4f}' for k in self.se]
3✔
165
        desc_se = ', '.join(desc_se)
3✔
166
        return f"<{self.__class__.__name__}({arg}={func_name}, statistic=[{desc}], se=[{desc_se}])>"
3✔
167

168
    def __str__(self):
3✔
169
        """Prediction statistics with standard error in parenthesis"""
170
        if not isinstance(self.statistic, dict):
3✔
171
            return self.__repr__()
3✔
172

173
        se = self.se
3✔
174
        output = ["Statistic with its standard error (se)"]
3✔
175
        output.append("statistic (se)")
3✔
176
        for key, value in self.statistic.items():
3✔
177
            if isinstance(value, float):
3✔
178
                desc = f'{value:0.4f} ({se[key]:0.4f}) <= {key}'
3✔
179
            else:
180
                desc = [f'{v:0.4f} ({k:0.4f})'
3✔
181
                        for v, k in zip(value, se[key])]
182
                desc = ', '.join(desc)
3✔
183
                desc = f'{desc} <= {key}'
3✔
184
            output.append(desc)
3✔
185
        return "\n".join(output)
3✔
186

187
    def __call__(self, y_pred, name=None):
3✔
188
        """Add predictions"""
189
        if name is None:
3✔
190
            k = len(self.predictions) + 1
3✔
191
            if k == 0:
3✔
192
                k = 1
×
193
            name = f'alg-{k}'
3✔
194
        self.best = None
3✔
195
        self.statistic = None
3✔
196
        self.predictions[name] = np.asanyarray(y_pred)
3✔
197
        samples = self._statistic_samples
3✔
198
        calls = samples.calls
3✔
199
        if name in calls:
3✔
200
            del calls[name]
3✔
201
        return self
3✔
202

203
    def difference(self, wrt: str=None):
3✔
204
        """Compute the difference w.r.t any algorithm by default is the best
205

206
        >>> from sklearn.svm import LinearSVC
207
        >>> from sklearn.ensemble import RandomForestClassifier
208
        >>> from sklearn.datasets import load_iris
209
        >>> from sklearn.model_selection import train_test_split
210
        >>> from sklearn.base import clone
211
        >>> from CompStats.interface import Perf
212
        >>> X, y = load_iris(return_X_y=True)
213
        >>> _ = train_test_split(X, y, test_size=0.3)
214
        >>> X_train, X_val, y_train, y_val = _
215
        >>> m = LinearSVC().fit(X_train, y_train)
216
        >>> hy = m.predict(X_val)
217
        >>> ens = RandomForestClassifier().fit(X_train, y_train)
218
        >>> perf = Perf(y_val, hy, forest=ens.predict(X_val))
219
        >>> perf.difference()
220
        <Difference>
221
        difference p-values w.r.t alg-1
222
        forest 0.06        
223
        """
224
        if wrt is None:
3✔
225
            wrt = self.best
3✔
226
        if isinstance(wrt, str):
3✔
227
            base = self.statistic_samples.calls[wrt]
3✔
228
        else:
229
            base = np.array([self.statistic_samples.calls[key][:, col]
3✔
230
                            for col, key in enumerate(wrt)]).T       
231
        sign = 1 if self.statistic_samples.BiB else -1
3✔
232
        diff = dict()
3✔
233
        for k, v in self.statistic_samples.calls.items():
3✔
234
            if base.ndim == 1 and k == wrt:
3✔
235
                continue
3✔
236
            diff[k] = sign * (base - v)
3✔
237
        diff_ins = Difference(statistic_samples=clone(self.statistic_samples),
3✔
238
                              statistic=self.statistic)
239
        diff_ins.sorting_func = self.sorting_func
3✔
240
        diff_ins.statistic_samples.calls = diff
3✔
241
        diff_ins.statistic_samples.info['best'] = self.best
3✔
242
        diff_ins.best = self.best
3✔
243
        return diff_ins
3✔
244

245
    @property
3✔
246
    def best(self):
3✔
247
        """System with best performance"""
248
        if hasattr(self, '_best') and self._best is not None:
3✔
249
            return self._best
3✔
250
        if not isinstance(self.statistic, dict):
3✔
251
            key, value = list(self.statistic_samples.calls.items())[0]
3✔
252
            if value.ndim == 1:
3✔
253
                self._best = key
3✔
254
            else:
255
                self._best = np.array([key] * value.shape[1])
3✔
256
            return self._best
3✔
257
        BiB = bool(self.statistic_samples.BiB)
3✔
258
        keys = np.array(list(self.statistic.keys()))
3✔
259
        data = np.asanyarray([self.statistic[k]
3✔
260
                              for k in keys])        
261
        if isinstance(self.statistic[keys[0]], np.ndarray):
3✔
262
            if BiB:
3✔
263
                best = data.argmax(axis=0)
3✔
264
            else:
265
                best = data.argmin(axis=0)
×
266
        else:
267
            if BiB:
3✔
268
                best = data.argmax()
3✔
269
            else:
270
                best = data.argmin()
×
271
        self._best = keys[best]
3✔
272
        return self._best
3✔
273

274
    @best.setter
3✔
275
    def best(self, value):
3✔
276
        self._best = value
3✔
277

278
    @property
3✔
279
    def sorting_func(self):
3✔
280
        """Rank systems when multiple performances are used"""
281
        return self._sorting_func
3✔
282

283
    @sorting_func.setter
3✔
284
    def sorting_func(self, value):
3✔
285
        self._sorting_func = value
3✔
286

287
    @property
3✔
288
    def statistic(self):
3✔
289
        """Statistic
290

291
        >>> from sklearn.svm import LinearSVC
292
        >>> from sklearn.ensemble import RandomForestClassifier
293
        >>> from sklearn.datasets import load_iris
294
        >>> from sklearn.model_selection import train_test_split
295
        >>> from CompStats.interface import Perf
296
        >>> X, y = load_iris(return_X_y=True)
297
        >>> _ = train_test_split(X, y, test_size=0.3)
298
        >>> X_train, X_val, y_train, y_val = _
299
        >>> m = LinearSVC().fit(X_train, y_train)
300
        >>> hy = m.predict(X_val)
301
        >>> ens = RandomForestClassifier().fit(X_train, y_train)
302
        >>> perf = Perf(y_val, hy, forest=ens.predict(X_val))
303
        >>> perf.statistic
304
        {'alg-1': 1.0, 'forest': 0.9500891265597148}     
305
        """
306
        if hasattr(self, '_statistic') and self._statistic is not None:
3✔
307
            return self._statistic
3✔
308
        BiB = True if self.score_func is not None else False
3✔
309
        data = sorted([(k, self.statistic_func(self.y_true, v))
3✔
310
                       for k, v in self.predictions.items()],
311
                      key=lambda x: self.sorting_func(x[1]),
312
                      reverse=BiB)
313
        if len(data) == 1:
3✔
314
            self._statistic = data[0][1]
3✔
315
        else:
316
            self._statistic = dict(data)
3✔
317
        return self._statistic
3✔
318

319
    @statistic.setter
3✔
320
    def statistic(self, value):
3✔
321
        """statistic setter"""
322
        self._statistic = value
3✔
323

324
    @property
3✔
325
    def se(self):
3✔
326
        """Standard Error
327
    
328
        >>> from sklearn.svm import LinearSVC
329
        >>> from sklearn.ensemble import RandomForestClassifier
330
        >>> from sklearn.datasets import load_iris
331
        >>> from sklearn.model_selection import train_test_split
332
        >>> from CompStats.interface import Perf
333
        >>> X, y = load_iris(return_X_y=True)
334
        >>> _ = train_test_split(X, y, test_size=0.3)
335
        >>> X_train, X_val, y_train, y_val = _
336
        >>> m = LinearSVC().fit(X_train, y_train)
337
        >>> hy = m.predict(X_val)
338
        >>> ens = RandomForestClassifier().fit(X_train, y_train)
339
        >>> perf = Perf(y_val, hy, forest=ens.predict(X_val))
340
        >>> perf.se
341
        {'alg-1': 0.0, 'forest': 0.026945730782184187}
342
        """
343

344
        output = SE(self.statistic_samples)
3✔
345
        if len(output) == 1:
3✔
346
            return list(output.values())[0]
3✔
347
        return output
3✔
348

349
    @property
3✔
350
    def ci(self):
3✔
351
        """Confidence interval
352
    
353
        >>> from sklearn.svm import LinearSVC
354
        >>> from sklearn.datasets import load_iris
355
        >>> from sklearn.model_selection import train_test_split
356
        >>> from CompStats.interface import Perf
357
        >>> X, y = load_iris(return_X_y=True)
358
        >>> _ = train_test_split(X, y, test_size=0.3)
359
        >>> X_train, X_val, y_train, y_val = _
360
        >>> m = LinearSVC().fit(X_train, y_train)
361
        >>> hy = m.predict(X_val)
362
        >>> ens = RandomForestClassifier().fit(X_train, y_train)
363
        >>> perf = Perf(y_val, hy, name='LinearSVC')
364
        >>> perf.ci
365
        (np.float64(0.9333333333333332), np.float64(1.0))
366
        """
367

368
        output = CI(self.statistic_samples)
3✔
369
        if len(output) == 1:
3✔
370
            return list(output.values())[0]
3✔
371
        return output
×
372

373
    def plot(self, value_name:str=None,
3✔
374
             var_name:str='Performance',
375
             alg_legend:str='Algorithm',
376
             perf_names:list=None,
377
             CI:float=0.05,
378
             kind:str='point', linestyle:str='none',
379
             col_wrap:int=3, capsize:float=0.2,
380
             comparison:bool=True,
381
             right:bool=True,
382
             comp_legend:str='Comparison',
383
             winner_legend:str='Best',
384
             tie_legend:str='Equivalent',
385
             loser_legend:str='Different',
386
             palette:object=None,
387
             **kwargs):
388
        """plot with seaborn
389

390
        >>> from sklearn.svm import LinearSVC
391
        >>> from sklearn.ensemble import RandomForestClassifier
392
        >>> from sklearn.datasets import load_iris
393
        >>> from sklearn.model_selection import train_test_split
394
        >>> from CompStats.interface import Perf
395
        >>> X, y = load_iris(return_X_y=True)
396
        >>> _ = train_test_split(X, y, test_size=0.3)
397
        >>> X_train, X_val, y_train, y_val = _
398
        >>> m = LinearSVC().fit(X_train, y_train)
399
        >>> hy = m.predict(X_val)
400
        >>> ens = RandomForestClassifier().fit(X_train, y_train)
401
        >>> perf = Perf(y_val, hy, score_func=None,
402
                        error_func=lambda y, hy: (y != hy).mean(),
403
                        forest=ens.predict(X_val))
404
        >>> perf.plot()
405
        """
406
        import seaborn as sns
3✔
407
        if value_name is None:
3✔
408
            if self.score_func is not None:
3✔
409
                value_name = 'Score'
3✔
410
            else:
UNCOV
411
                value_name = 'Error'
×
412
        if not isinstance(self.statistic, dict):
3✔
413
            comparison = False
3✔
414
        best = self.best
3✔
415
        if isinstance(best, np.ndarray):
3✔
416
            if best.shape[0] < col_wrap:
3✔
417
                col_wrap = best.shape[0]
3✔
418
        df = self.dataframe(value_name=value_name, var_name=var_name,
3✔
419
                            alg_legend=alg_legend, perf_names=perf_names,
420
                            comparison=comparison, alpha=CI, right=right,
421
                            comp_legend=comp_legend, 
422
                            winner_legend=winner_legend,
423
                            tie_legend=tie_legend,
424
                            loser_legend=loser_legend)
425
        if var_name not in df.columns:
3✔
426
            var_name = None
3✔
427
            col_wrap = None
3✔
428
        ci = lambda x: measurements.CI(x, alpha=CI)
3✔
429
        if comparison:
3✔
430
            kwargs.update(dict(hue=comp_legend))
3✔
431
            if palette is None:
3✔
432
                pal = sns.color_palette("Paired")
3✔
433
                palette = {winner_legend:pal[1],
3✔
434
                        tie_legend:pal[3],
435
                        loser_legend: pal[5]}
436
        f_grid = sns.catplot(df, x=value_name, errorbar=ci,
3✔
437
                             y=alg_legend, col=var_name,
438
                             kind=kind, linestyle=linestyle,
439
                             col_wrap=col_wrap, capsize=capsize,
440
                             palette=palette,
441
                             **kwargs)
442
        return f_grid
3✔
443

444
    def dataframe(self, comparison:bool=False,
3✔
445
                  right:bool=True,
446
                  alpha:float=0.05,
447
                  value_name:str='Score',
448
                  var_name:str='Performance',
449
                  alg_legend:str='Algorithm',
450
                  comp_legend:str='Comparison',
451
                  winner_legend:str='Best',
452
                  tie_legend:str='Equivalent',
453
                  loser_legend:str='Different',
454
                  perf_names:str=None):
455
        """Dataframe
456
        
457
        >>> from sklearn.svm import LinearSVC
458
        >>> from sklearn.ensemble import RandomForestClassifier
459
        >>> from sklearn.datasets import load_iris
460
        >>> from sklearn.model_selection import train_test_split
461
        >>> from CompStats.interface import Perf
462
        >>> X, y = load_iris(return_X_y=True)
463
        >>> _ = train_test_split(X, y, test_size=0.3)
464
        >>> X_train, X_val, y_train, y_val = _
465
        >>> m = LinearSVC().fit(X_train, y_train)
466
        >>> hy = m.predict(X_val)
467
        >>> ens = RandomForestClassifier().fit(X_train, y_train)
468
        >>> perf = Perf(y_val, hy, forest=ens.predict(X_val))
469
        >>> df = perf.dataframe()
470
        """
471
        if perf_names is None and isinstance(self.best, np.ndarray):
3✔
472
            func_name = self.statistic_func.__name__
3✔
473
            perf_names = [f'{func_name}({i})'
3✔
474
                          for i, k in enumerate(self.best)]
475
        df = dataframe(self, value_name=value_name,
3✔
476
                       var_name=var_name,
477
                       alg_legend=alg_legend,
478
                       perf_names=perf_names)
479
        if not comparison:
3✔
480
            return df
3✔
481
        df[comp_legend] = tie_legend
3✔
482
        diff = self.difference()
3✔
483
        best = self.best
3✔
484
        if isinstance(best, str):
3✔
UNCOV
485
            for name, p in diff.p_value(right=right).items():
×
UNCOV
486
                if p >= alpha:
×
UNCOV
487
                    continue
×
UNCOV
488
                df.loc[df[alg_legend] == name, comp_legend] = loser_legend
×
UNCOV
489
            df.loc[df[alg_legend] == best, comp_legend] = winner_legend
×
490
        else:
491
            p_values = diff.p_value(right=right)
3✔
492
            systems = list(p_values.keys())
3✔
493
            p_values = np.array([p_values[k] for k in systems])
3✔
494
            for name, p_value, winner in zip(perf_names,
3✔
495
                                             p_values.T,
496
                                             best):
497
                mask = df[var_name] == name
3✔
498
                for alg, p in zip(systems, p_value):
3✔
499
                    if p >= alpha and winner != alg:
3✔
500
                        continue
3✔
501
                    _ = mask & (df[alg_legend] == alg)
3✔
502
                    if winner == alg:
3✔
503
                        df.loc[_, comp_legend] = winner_legend
3✔
504
                    else:
505
                        df.loc[_, comp_legend] = loser_legend
3✔
506
        return df
3✔
507

508
    @property
3✔
509
    def n_jobs(self):
3✔
510
        """Number of jobs to compute the statistics"""
511
        return self._n_jobs
3✔
512

513
    @n_jobs.setter
3✔
514
    def n_jobs(self, value):
3✔
515
        self._n_jobs = value
3✔
516

517
    @property
3✔
518
    def statistic_func(self):
3✔
519
        """Statistic function"""
520
        if self.score_func is not None:
3✔
521
            return self.score_func
3✔
522
        return self.error_func
3✔
523

524
    @property
3✔
525
    def statistic_samples(self):
3✔
526
        """Statistic Samples"""
527

528
        samples = self._statistic_samples
3✔
529
        algs = set(samples.calls.keys())
3✔
530
        algs = set(self.predictions.keys()) - algs
3✔
531
        if len(algs):
3✔
532
            for key in progress_bar(algs, use_tqdm=self.use_tqdm):
3✔
533
                samples(self.y_true, self.predictions[key], name=key)
3✔
534
        return self._statistic_samples
3✔
535

536
    @statistic_samples.setter
3✔
537
    def statistic_samples(self, value):
3✔
538
        self._statistic_samples = value
3✔
539

540
    @property
3✔
541
    def num_samples(self):
3✔
542
        """Number of bootstrap samples"""
543
        return self._num_samples
3✔
544

545
    @num_samples.setter
3✔
546
    def num_samples(self, value):
3✔
547
        self._num_samples = value
3✔
548

549
    @property
3✔
550
    def predictions(self):
3✔
551
        """Predictions"""
552
        return self._predictions
3✔
553

554
    @predictions.setter
3✔
555
    def predictions(self, value):
3✔
556
        self._predictions = value
3✔
557

558
    @property
3✔
559
    def y_true(self):
3✔
560
        """True output, gold standard o :math:`y`"""
561

562
        return self._y_true
3✔
563

564
    @y_true.setter
3✔
565
    def y_true(self, value):
3✔
566
        if isinstance(value, pd.DataFrame):
3✔
567
            self._y_true = value['y'].to_numpy()
3✔
568
            algs = {}
3✔
569
            for c in value.columns:
3✔
570
                if c == 'y':
3✔
571
                    continue
3✔
572
                algs[c] = value[c].to_numpy()
3✔
573
            self.predictions.update(algs)
3✔
574
            return
3✔
575
        self._y_true = np.asanyarray(value)
3✔
576

577
    @property
3✔
578
    def score_func(self):
3✔
579
        """Score function"""
580
        return self._score_func
3✔
581

582
    @score_func.setter
3✔
583
    def score_func(self, value):
3✔
584
        self._score_func = value
3✔
585
        if value is not None:
3✔
586
            self.error_func = None
3✔
587
            if hasattr(self, '_statistic_samples'):
3✔
UNCOV
588
                self._statistic_samples.statistic = value
×
UNCOV
589
                self._statistic_samples.BiB = True
×
590

591
    @property
3✔
592
    def error_func(self):
3✔
593
        """Error function"""
594
        return self._error_func
3✔
595

596
    @error_func.setter
3✔
597
    def error_func(self, value):
3✔
598
        self._error_func = value
3✔
599
        if value is not None:
3✔
600
            self.score_func = None
3✔
601
            if hasattr(self, '_statistic_samples'):
3✔
602
                self._statistic_samples.statistic = value
3✔
603
                self._statistic_samples.BiB = False
3✔
604

605

606
@dataclass
3✔
607
class Difference:
3✔
608
    """Difference
609
    
610
    >>> from sklearn.svm import LinearSVC
611
    >>> from sklearn.ensemble import RandomForestClassifier
612
    >>> from sklearn.datasets import load_iris
613
    >>> from sklearn.model_selection import train_test_split
614
    >>> from sklearn.base import clone
615
    >>> from CompStats.interface import Perf
616
    >>> X, y = load_iris(return_X_y=True)
617
    >>> _ = train_test_split(X, y, test_size=0.3)
618
    >>> X_train, X_val, y_train, y_val = _
619
    >>> m = LinearSVC().fit(X_train, y_train)
620
    >>> hy = m.predict(X_val)
621
    >>> ens = RandomForestClassifier().fit(X_train, y_train)
622
    >>> perf = Perf(y_val, hy, forest=ens.predict(X_val))
623
    >>> diff = perf.difference()
624
    >>> diff
625
    <Difference>
626
    difference p-values w.r.t alg-1
627
    0.0780 <= forest
628
    """
629

630
    statistic_samples:StatisticSamples=None
3✔
631
    statistic:dict=None
3✔
632
    best:str=None
3✔
633

634
    @property
3✔
635
    def sorting_func(self):
3✔
636
        """Rank systems when multiple performances are used"""
637
        return self._sorting_func
3✔
638
    
639
    @sorting_func.setter
3✔
640
    def sorting_func(self, value):
3✔
641
        self._sorting_func = value    
3✔
642

643
    def __repr__(self):
3✔
644
        """p-value"""
UNCOV
645
        return f"<{self.__class__.__name__}>\n{self}"
×
646

647
    def __str__(self):
3✔
648
        """p-value"""
649
        if isinstance(self.best, str):
3✔
650
            best = f' w.r.t {self.best}'
3✔
651
        else:
652
            best = ''
3✔
653
        output = [f"difference p-values {best}"]
3✔
654
        best = self.best
3✔
655
        if isinstance(best, np.ndarray):
3✔
656
            desc = ', '.join(best)
3✔
657
            output.append(f'{desc} <= Best')
3✔
658
        for key, value in self.p_value().items():
3✔
659
            if isinstance(value, float):
3✔
660
                output.append(f'{value:0.4f} <= {key}')
3✔
661
            else:
662
                desc = [f'{v:0.4f}' for v in value]
3✔
663
                desc = ', '.join(desc)
3✔
664
                desc = f'{desc} <= {key}'
3✔
665
                output.append(desc)
3✔
666
        return "\n".join(output)
3✔
667

668
    def _delta_best(self):
3✔
669
        """Compute multiple delta"""
670
        if isinstance(self.best, str):
3✔
671
            return self.statistic[self.best]
3✔
672
        keys = np.unique(self.best)
3✔
673
        statistic = np.array([self.statistic[k]
3✔
674
                                for k in keys])
675
        m = {v: k for k, v in enumerate(keys)}
3✔
676
        best = np.array([m[x] for x in self.best])
3✔
677
        return statistic[best, np.arange(best.shape[0])]
3✔
678

679
    def p_value(self, right:bool=True):
3✔
680
        """Compute p_value of the differences
681

682
        :param right: Estimate the p-value using :math:`\\text{sample} \\geq 2\\delta`
683
        :type right: bool  
684
        
685
        >>> from sklearn.svm import LinearSVC
686
        >>> from sklearn.ensemble import RandomForestClassifier
687
        >>> from sklearn.datasets import load_iris
688
        >>> from sklearn.model_selection import train_test_split
689
        >>> from sklearn.base import clone
690
        >>> from CompStats.interface import Perf
691
        >>> X, y = load_iris(return_X_y=True)
692
        >>> _ = train_test_split(X, y, test_size=0.3)
693
        >>> X_train, X_val, y_train, y_val = _
694
        >>> m = LinearSVC().fit(X_train, y_train)
695
        >>> hy = m.predict(X_val)
696
        >>> ens = RandomForestClassifier().fit(X_train, y_train)
697
        >>> perf = Perf(y_val, hy, forest=ens.predict(X_val))
698
        >>> diff = perf.difference()
699
        >>> diff.p_value()
700
        {'forest': np.float64(0.3)}
701
        """
702
        values = []
3✔
703
        sign = 1 if self.statistic_samples.BiB else -1
3✔
704
        delta_best = self._delta_best()
3✔
705
        for k, v in self.statistic_samples.calls.items():
3✔
706
            delta = 2 * sign * (delta_best - self.statistic[k])
3✔
707
            if not isinstance(delta_best, np.ndarray):
3✔
708
                if right:
3✔
709
                    values.append((k, (v >= delta).mean()))
3✔
710
                else:
UNCOV
711
                    values.append((k, (v <= 0).mean()))
×
712
            else:
713
                if right:
3✔
714
                    values.append((k, (v >= delta).mean(axis=0)))
3✔
715
                else:
716
                    values.append((k, (v <= 0).mean(axis=0)))
3✔
717
        values.sort(key=lambda x: self.sorting_func(x[1]))
3✔
718
        return dict(values)
3✔
719

720
    def dataframe(self, value_name:str='Score',
3✔
721
                  var_name:str='Best',
722
                  alg_legend:str='Algorithm',
723
                  sig_legend:str='Significant',
724
                  perf_names:str=None,
725
                  right:bool=True,
726
                  alpha:float=0.05):
727
        """Dataframe"""
728
        if perf_names is None and isinstance(self.best, np.ndarray):
3✔
729
            perf_names = [f'{alg}({k})'
3✔
730
                          for k, alg in enumerate(self.best)]
731
        df = dataframe(self, value_name=value_name,
3✔
732
                       var_name=var_name,
733
                       alg_legend=alg_legend,
734
                       perf_names=perf_names)
735
        df[sig_legend] = False
3✔
736
        if isinstance(self.best, str):
3✔
737
            for name, p in self.p_value(right=right).items():
3✔
738
                if p >= alpha:
3✔
739
                    continue
3✔
740
                df.loc[df[alg_legend] == name, sig_legend] = True
3✔
741
        else:
742
            p_values = self.p_value(right=right)
3✔
743
            systems = list(p_values.keys())
3✔
744
            p_values = np.array([p_values[k] for k in systems])
3✔
745
            for name, p_value in zip(perf_names, p_values.T):
3✔
746
                mask = df[var_name] == name
3✔
747
                for alg, p in zip(systems, p_value):
3✔
748
                    if p >= alpha:
3✔
749
                        continue
3✔
750
                    _ = mask & (df[alg_legend] == alg)
3✔
751
                    df.loc[_, sig_legend] = True
3✔
752
        return df
3✔
753

754
    def plot(self, value_name:str='Difference',
3✔
755
             var_name:str='Best',
756
             alg_legend:str='Algorithm',
757
             sig_legend:str='Significant',
758
             perf_names:list=None,
759
             alpha:float=0.05,
760
             right:bool=True,
761
             kind:str='point', linestyle:str='none',
762
             col_wrap:int=3, capsize:float=0.2,
763
             set_refline:bool=True,
764
             **kwargs):
765
        """Plot
766

767
        >>> from sklearn.svm import LinearSVC
768
        >>> from sklearn.ensemble import RandomForestClassifier
769
        >>> from sklearn.datasets import load_iris
770
        >>> from sklearn.model_selection import train_test_split
771
        >>> from sklearn.base import clone
772
        >>> from CompStats.interface import Perf
773
        >>> X, y = load_iris(return_X_y=True)
774
        >>> _ = train_test_split(X, y, test_size=0.3)
775
        >>> X_train, X_val, y_train, y_val = _
776
        >>> m = LinearSVC().fit(X_train, y_train)
777
        >>> hy = m.predict(X_val)
778
        >>> ens = RandomForestClassifier().fit(X_train, y_train)
779
        >>> perf = Perf(y_val, hy, forest=ens.predict(X_val))
780
        >>> diff = perf.difference()
781
        >>> diff.plot()
782
        """
783
        import seaborn as sns
3✔
784
        df = self.dataframe(value_name=value_name,
3✔
785
                            var_name=var_name,
786
                            alg_legend=alg_legend,
787
                            sig_legend=sig_legend,
788
                            perf_names=perf_names,
789
                            alpha=alpha, right=right)
790
        title = var_name         
3✔
791
        if var_name not in df.columns:
3✔
792
            var_name = None
3✔
793
            col_wrap = None
3✔
794
        ci = lambda x: measurements.CI(x, alpha=2*alpha)
3✔
795
        f_grid = sns.catplot(df, x=value_name, errorbar=ci,
3✔
796
                             y=alg_legend, col=var_name,
797
                             kind=kind, linestyle=linestyle,
798
                             col_wrap=col_wrap, capsize=capsize,
799
                             hue=sig_legend,
800
                             **kwargs)
801
        if set_refline:
3✔
802
            f_grid.refline(x=0)
3✔
803
        if isinstance(self.best, str):
3✔
804
            f_grid.facet_axis(0, 0).set_title(f'{title} = {self.best}')
3✔
805
        return f_grid
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc