• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

adaptive-machine-learning / CapyMOA / 24504997141

16 Apr 2026 10:20AM UTC coverage: 74.708% (+0.002%) from 74.706%
24504997141

push

github

web-flow
feat: add DEMS algorithm (#348)

36 of 48 new or added lines in 2 files covered. (75.0%)

6714 of 8987 relevant lines covered (74.71%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.47
/src/capymoa/classifier/_dems.py
1
from __future__ import annotations
1✔
2

3
from typing import Optional
1✔
4

5
import numpy as np
1✔
6

7
from capymoa.base import MOAClassifier
1✔
8
from capymoa.stream import Schema
1✔
9
from capymoa._utils import build_cli_str_from_mapping_and_locals
1✔
10
from capymoa.type_alias import LabelProbabilities
1✔
11

12
from moa.classifiers.meta import DynamicEnsembleMemberSelection as _MOA_DEMS
1✔
13

14

15
class DynamicEnsembleMemberSelection(MOAClassifier):
1✔
16
    """Dynamic Ensemble Member Selection (DEMS).
17

18
    Dynamic Ensemble Member Selection (DEMS) [#0]_ dynamically selects a subset of ensemble members based on their estimated performance and tree-level information.
19
    Only SRP and ARF are included here because of the performance significance.
20

21
    >>> from capymoa.classifier import DynamicEnsembleMemberSelection
22
    >>> from capymoa.datasets import ElectricityTiny
23
    >>> from capymoa.evaluation import prequential_evaluation
24
    >>>
25
    >>> stream = ElectricityTiny()
26
    >>> classifier = DynamicEnsembleMemberSelection(stream.get_schema())
27
    >>> results = prequential_evaluation(stream, classifier, max_instances=1000)
28
    >>> print(f"{results.accuracy():.1f}")
29
    90.6
30

31
    .. [#0] `Dynamic Ensemble Member Selection for Data Stream Classification.
32
             Yibin Sun, Bernhard Pfahringer, Heitor Murilo Gomes, Albert Bifet.
33
             ACM Conference on Information and Knowledge Management (CIKM), 2025.
34
             <https://doi.org/10.1145/3746252.3761072>`_
35
    """
36

37
    def __init__(
1✔
38
        self,
39
        schema: Schema | None = None,
40
        random_seed: int = 0,
41
        ensemble_class: str = "StreamingRandomPatches",  # StreamingRandomPatches / SRP or AdaptiveRandomForest / ARF
42
        base_learner: str = "trees.HoeffdingTree -g 50 -c 0.01",
43
        tree_learner: str = "ARFHoeffdingTree -e 2000000 -g 50 -c 0.01",
44
        ensemble_size: int = 100,
45
        max_features=0.6,
46
        training_method: str = "RandomPatches",
47
        lambda_param: float = 6.0,
48
        number_of_jobs: int = 1,
49
        drift_detection_method: str = "ADWINChangeDetector -a 1.0E-5",
50
        warning_detection_method: str = "ADWINChangeDetector -a 1.0E-4",
51
        disable_weighted_vote: bool = False,
52
        disable_drift_detection: bool = False,
53
        disable_background_learner: bool = False,
54
        k_value: int = 5,
55
        disable_self_optimising: bool = False,
56
    ):
57
        """Dynamic Ensemble Member Selection (DEMS) Classifier.
58

59
        :param ensemble_class: which ensemble to use ("StreamingRandomPatches" or "AdaptiveRandomForest").
60
        :param base_learner: base classifier (used by SRP).
61
        :param tree_learner: ARF tree learner (only used by ARF, cannot be changed).
62
        :param ensemble_size: number of ensemble members.
63
        :param max_features: subspace size configuration, similar to SRP:
64
            float in [0, 1]: percentage of features (e.g. 0.6 = 60%).
65
            int: exact number of features.
66
            "sqrt": use sqrt(M)+1.
67
            None: default (60%).
68
        :param training_method: "RandomSubspaces", "Resampling", or "RandomPatches" (SRP).
69
        :param lambda_param: Poisson lambda for bagging.
70
        :param number_of_jobs: number of parallel jobs for ARF (-1 = as many as possible).
71
        :param drift_detection_method: MOA CLI string for drift detector.
72
        :param warning_detection_method: MOA CLI string for warning detector.
73
        :param disable_weighted_vote: if True, disables accuracy-weighted voting.
74
        :param disable_drift_detection: if True, turns off drift detectors (and bkg learners).
75
        :param disable_background_learner: if True, turns off background learners.
76
        :param k_value: fixed K for DEMS when self-optimising is disabled.
77
        :param disable_self_optimising: if True, use the fixed k_value instead of self-optimising.
78
        """
79
        ensemble_class_map = {
1✔
80
            "StreamingRandomPatches": "StreamingRandomPatches",
81
            "SRP": "StreamingRandomPatches",
82
            "AdaptiveRandomForest": "AdaptiveRandomForest",
83
            "ARF": "AdaptiveRandomForest",
84
        }
85
        assert ensemble_class in ensemble_class_map, (
1✔
86
            f"{ensemble_class} is not a valid ensemble_class. Choose from {list(ensemble_class_map.keys())}"
87
        )
88
        ensemble_class_str = ensemble_class_map[ensemble_class]
1✔
89

90
        # --- Training method (-t) ---
91
        training_method_map = {
1✔
92
            "RandomSubspaces": "RandomSubspaces",
93
            "Resampling": "Resampling (bagging)",
94
            "RandomPatches": "Random Patches",
95
        }
96
        assert training_method in training_method_map, (
1✔
97
            f"{training_method} is not a valid training method. "
98
            f"Choose from {list(training_method_map.keys())}"
99
        )
100
        training_method_str = training_method_map[training_method]
1✔
101

102
        # --- Subspace configuration (-o, -m) ---
103
        # We mimic SRP wrapper semantics:
104
        #   feature_mode: one of the textual choices expected by MOA's subspaceModeOption
105
        #   max_features_per_ensemble_item: integer "m" value
106
        if isinstance(max_features, float) and 0.0 <= max_features <= 1.0:
1✔
107
            # Percentage mode
108
            feature_mode = "Percentage (M * (m / 100))"
1✔
109
            max_features_per_ensemble_item = int(max_features * 100)
1✔
NEW
110
        elif isinstance(max_features, int):
×
111
            # Exact integer
NEW
112
            feature_mode = "Specified m (integer value)"
×
NEW
113
            max_features_per_ensemble_item = max_features
×
NEW
114
        elif max_features in ["sqrt"]:
×
NEW
115
            feature_mode = "sqrt(M)+1"
×
NEW
116
            max_features_per_ensemble_item = -1  # MOA interprets this with the mode
×
NEW
117
        elif max_features is None:
×
118
            # Default: 60% of features
NEW
119
            feature_mode = "Percentage (M * (m / 100))"
×
NEW
120
            max_features_per_ensemble_item = 60
×
121
        else:
NEW
122
            raise ValueError(
×
123
                "Invalid value for max_features. Valid options:\n"
124
                "  * float between 0.0 and 1.0 representing a percentage,\n"
125
                "  * an integer specifying exact number, or\n"
126
                "  * 'sqrt' for square root of total features."
127
            )
128

129
        # Simple sanity check for k_value relative to ensemble_size
130
        if k_value < 1:
1✔
NEW
131
            raise ValueError("k_value must be >= 1")
×
132
        if k_value > ensemble_size:
1✔
133
            # We don't hard-fail, but you may want to be stricter:
NEW
134
            k_value = ensemble_size
×
135

136
        # Mapping from local variable names to MOA CLI flags
137
        mapping = {
1✔
138
            "ensemble_class_str": "-e",
139
            "base_learner": "-l",
140
            "ensemble_size": "-s",
141
            "feature_mode": "-o",
142
            "max_features_per_ensemble_item": "-m",
143
            "training_method_str": "-t",
144
            "lambda_param": "-a",
145
            "number_of_jobs": "-j",
146
            "drift_detection_method": "-x",
147
            "warning_detection_method": "-p",
148
            "disable_weighted_vote": "-w",
149
            "disable_drift_detection": "-u",
150
            "disable_background_learner": "-q",
151
            "k_value": "-k",
152
            "disable_self_optimising": "-f",
153
            "tree_learner": "-1",
154
        }
155

156
        config_str = build_cli_str_from_mapping_and_locals(mapping, locals())
1✔
157

158
        moa_learner = _MOA_DEMS()
1✔
159

160
        super(DynamicEnsembleMemberSelection, self).__init__(
1✔
161
            moa_learner=moa_learner,
162
            schema=schema,
163
            CLI=config_str,
164
            random_seed=random_seed,
165
        )
166

167
    def predict_proba(self, instance) -> Optional[LabelProbabilities]:
1✔
168
        votes = np.array(
1✔
169
            self.moa_learner.getVotesForInstance(instance.java_instance),
170
            dtype=np.float64,
171
        )
172

173
        if self.schema is not None:
1✔
174
            num_classes = self.schema.get_num_classes()
1✔
175
            if votes.shape[0] < num_classes:
1✔
176
                votes = np.pad(votes, (0, num_classes - votes.shape[0]))
1✔
177

178
        total = sum(votes)
1✔
179
        if total <= 1e-2 or np.isnan(total) or np.isinf(total):
1✔
180
            return None
1✔
181
        return votes / total
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc