• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

freqtrade / freqtrade / 4131164979

pending completion
4131164979

push

github-actions

Matthias
filled-date shouldn't update again

1 of 1 new or added line in 1 file covered. (100.0%)

17024 of 17946 relevant lines covered (94.86%)

0.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

67.58
/freqtrade/freqai/freqai_interface.py
1
import inspect
1✔
2
import logging
1✔
3
import threading
1✔
4
import time
1✔
5
from abc import ABC, abstractmethod
1✔
6
from collections import deque
1✔
7
from datetime import datetime, timezone
1✔
8
from pathlib import Path
1✔
9
from typing import Any, Dict, List, Literal, Optional, Tuple
1✔
10

11
import numpy as np
1✔
12
import pandas as pd
1✔
13
import psutil
1✔
14
from numpy.typing import NDArray
1✔
15
from pandas import DataFrame
1✔
16

17
from freqtrade.configuration import TimeRange
1✔
18
from freqtrade.constants import Config
1✔
19
from freqtrade.data.dataprovider import DataProvider
1✔
20
from freqtrade.enums import RunMode
1✔
21
from freqtrade.exceptions import OperationalException
1✔
22
from freqtrade.exchange import timeframe_to_seconds
1✔
23
from freqtrade.freqai.data_drawer import FreqaiDataDrawer
1✔
24
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
1✔
25
from freqtrade.freqai.utils import plot_feature_importance, record_params
1✔
26
from freqtrade.strategy.interface import IStrategy
1✔
27

28

29
pd.options.mode.chained_assignment = None
1✔
30
logger = logging.getLogger(__name__)
1✔
31

32

33
class IFreqaiModel(ABC):
1✔
34
    """
35
    Class containing all tools for training and prediction in the strategy.
36
    Base*PredictionModels inherit from this class.
37

38
    Record of contribution:
39
    FreqAI was developed by a group of individuals who all contributed specific skillsets to the
40
    project.
41

42
    Conception and software development:
43
    Robert Caulk @robcaulk
44

45
    Theoretical brainstorming:
46
    Elin Törnquist @th0rntwig
47

48
    Code review, software architecture brainstorming:
49
    @xmatthias
50

51
    Beta testing and bug reporting:
52
    @bloodhunter4rc, Salah Lamkadem @ikonx, @ken11o2, @longyu, @paranoidandy, @smidelis, @smarm
53
    Juha Nykänen @suikula, Wagner Costa @wagnercosta, Johan Vlugt @Jooopieeert
54
    """
55

56
    def __init__(self, config: Config) -> None:
1✔
57

58
        self.config = config
1✔
59
        self.assert_config(self.config)
1✔
60
        self.freqai_info: Dict[str, Any] = config["freqai"]
1✔
61
        self.data_split_parameters: Dict[str, Any] = config.get("freqai", {}).get(
1✔
62
            "data_split_parameters", {})
63
        self.model_training_parameters: Dict[str, Any] = config.get("freqai", {}).get(
1✔
64
            "model_training_parameters", {})
65
        self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
1✔
66
        self.retrain = False
1✔
67
        self.first = True
1✔
68
        self.set_full_path()
1✔
69
        self.follow_mode: bool = self.freqai_info.get("follow_mode", False)
1✔
70
        self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True)
1✔
71
        if self.save_backtest_models:
1✔
72
            logger.info('Backtesting module configured to save all models.')
1✔
73

74
        self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
1✔
75
        # set current candle to arbitrary historical date
76
        self.current_candle: datetime = datetime.fromtimestamp(637887600, tz=timezone.utc)
1✔
77
        self.dd.current_candle = self.current_candle
1✔
78
        self.scanning = False
1✔
79
        self.ft_params = self.freqai_info["feature_parameters"]
1✔
80
        self.corr_pairlist: List[str] = self.ft_params.get("include_corr_pairlist", [])
1✔
81
        self.keras: bool = self.freqai_info.get("keras", False)
1✔
82
        if self.keras and self.ft_params.get("DI_threshold", 0):
1✔
83
            self.ft_params["DI_threshold"] = 0
×
84
            logger.warning("DI threshold is not configured for Keras models yet. Deactivating.")
×
85
        self.CONV_WIDTH = self.freqai_info.get('conv_width', 1)
1✔
86
        if self.ft_params.get("inlier_metric_window", 0):
1✔
87
            self.CONV_WIDTH = self.ft_params.get("inlier_metric_window", 0) * 2
×
88
        self.pair_it = 0
1✔
89
        self.pair_it_train = 0
1✔
90
        self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist"))
1✔
91
        self.train_queue = self._set_train_queue()
1✔
92
        self.inference_time: float = 0
1✔
93
        self.train_time: float = 0
1✔
94
        self.begin_time: float = 0
1✔
95
        self.begin_time_train: float = 0
1✔
96
        self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe'])
1✔
97
        self.continual_learning = self.freqai_info.get('continual_learning', False)
1✔
98
        self.plot_features = self.ft_params.get("plot_feature_importances", 0)
1✔
99
        self.corr_dataframes: Dict[str, DataFrame] = {}
1✔
100
        # get_corr_dataframes is controlling the caching of corr_dataframes
101
        # for improved performance. Careful with this boolean.
102
        self.get_corr_dataframes: bool = True
1✔
103
        self._threads: List[threading.Thread] = []
1✔
104
        self._stop_event = threading.Event()
1✔
105
        self.metadata: Dict[str, Any] = self.dd.load_global_metadata_from_disk()
1✔
106
        self.data_provider: Optional[DataProvider] = None
1✔
107
        self.max_system_threads = max(int(psutil.cpu_count() * 2 - 2), 1)
1✔
108
        self.can_short = True  # overridden in start() with strategy.can_short
1✔
109

110
        self.warned_deprecated_populate_any_indicators = False
1✔
111

112
        record_params(config, self.full_path)
1✔
113

114
    def __getstate__(self):
1✔
115
        """
116
        Return an empty state to be pickled in hyperopt
117
        """
118
        return ({})
×
119

120
    def assert_config(self, config: Config) -> None:
1✔
121

122
        if not config.get("freqai", {}):
1✔
123
            raise OperationalException("No freqai parameters found in configuration file.")
×
124

125
    def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame:
1✔
126
        """
127
        Entry point to the FreqaiModel from a specific pair, it will train a new model if
128
        necessary before making the prediction.
129

130
        :param dataframe: Full dataframe coming from strategy - it contains entire
131
                           backtesting timerange + additional historical data necessary to train
132
        the model.
133
        :param metadata: pair metadata coming from strategy.
134
        :param strategy: Strategy to train on
135
        """
136

137
        self.live = strategy.dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE)
×
138
        self.dd.set_pair_dict_info(metadata)
×
139
        self.data_provider = strategy.dp
×
140
        self.can_short = strategy.can_short
×
141

142
        # check if the strategy has deprecated populate_any_indicators function
143
        self.check_deprecated_populate_any_indicators(strategy)
×
144

145
        if self.live:
×
146
            self.inference_timer('start')
×
147
            self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
×
148
            dk = self.start_live(dataframe, metadata, strategy, self.dk)
×
149
            dataframe = dk.remove_features_from_df(dk.return_dataframe)
×
150

151
        # For backtesting, each pair enters and then gets trained for each window along the
152
        # sliding window defined by "train_period_days" (training window) and "live_retrain_hours"
153
        # (backtest window, i.e. window immediately following the training window).
154
        # FreqAI slides the window and sequentially builds the backtesting results before returning
155
        # the concatenated results for the full backtesting period back to the strategy.
156
        elif not self.follow_mode:
×
157
            self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
×
158
            if not self.config.get("freqai_backtest_live_models", False):
×
159
                logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
×
160
                dk = self.start_backtesting(dataframe, metadata, self.dk, strategy)
×
161
                dataframe = dk.remove_features_from_df(dk.return_dataframe)
×
162
            else:
163
                logger.info(
×
164
                    "Backtesting using historic predictions (live models)")
165
                dk = self.start_backtesting_from_historic_predictions(
×
166
                    dataframe, metadata, self.dk)
167
                dataframe = dk.return_dataframe
×
168

169
        self.clean_up()
×
170
        if self.live:
×
171
            self.inference_timer('stop', metadata["pair"])
×
172

173
        return dataframe
×
174

175
    def clean_up(self):
1✔
176
        """
177
        Objects that should be handled by GC already between coins, but
178
        are explicitly shown here to help demonstrate the non-persistence of these
179
        objects.
180
        """
181
        self.model = None
×
182
        self.dk = None
×
183

184
    def _on_stop(self):
1✔
185
        """
186
        Callback for Subclasses to override to include logic for shutting down resources
187
        when SIGINT is sent.
188
        """
189
        return
×
190

191
    def shutdown(self):
1✔
192
        """
193
        Cleans up threads on Shutdown, set stop event. Join threads to wait
194
        for current training iteration.
195
        """
196
        logger.info("Stopping FreqAI")
×
197
        self._stop_event.set()
×
198

199
        self.data_provider = None
×
200
        self._on_stop()
×
201

202
        logger.info("Waiting on Training iteration")
×
203
        for _thread in self._threads:
×
204
            _thread.join()
×
205

206
    def start_scanning(self, *args, **kwargs) -> None:
1✔
207
        """
208
        Start `self._start_scanning` in a separate thread
209
        """
210
        _thread = threading.Thread(target=self._start_scanning, args=args, kwargs=kwargs)
×
211
        self._threads.append(_thread)
×
212
        _thread.start()
×
213

214
    def _start_scanning(self, strategy: IStrategy) -> None:
1✔
215
        """
216
        Function designed to constantly scan pairs for retraining on a separate thread (intracandle)
217
        to improve model youth. This function is agnostic to data preparation/collection/storage,
218
        it simply trains on what ever data is available in the self.dd.
219
        :param strategy: IStrategy = The user defined strategy class
220
        """
221
        while not self._stop_event.is_set():
×
222
            time.sleep(1)
×
223
            pair = self.train_queue[0]
×
224

225
            # ensure pair is avaialble in dp
226
            if pair not in strategy.dp.current_whitelist():
×
227
                self.train_queue.popleft()
×
228
                logger.warning(f'{pair} not in current whitelist, removing from train queue.')
×
229
                continue
×
230

231
            (_, trained_timestamp, _) = self.dd.get_pair_dict_info(pair)
×
232

233
            dk = FreqaiDataKitchen(self.config, self.live, pair)
×
234
            (
×
235
                retrain,
236
                new_trained_timerange,
237
                data_load_timerange,
238
            ) = dk.check_if_new_training_required(trained_timestamp)
239

240
            if retrain:
×
241
                self.train_timer('start')
×
242
                dk.set_paths(pair, new_trained_timerange.stopts)
×
243
                try:
×
244
                    self.extract_data_and_train_model(
×
245
                        new_trained_timerange, pair, strategy, dk, data_load_timerange
246
                    )
247
                except Exception as msg:
×
248
                    logger.warning(f"Training {pair} raised exception {msg.__class__.__name__}. "
×
249
                                   f"Message: {msg}, skipping.")
250

251
                self.train_timer('stop', pair)
×
252

253
                # only rotate the queue after the first has been trained.
254
                self.train_queue.rotate(-1)
×
255

256
                self.dd.save_historic_predictions_to_disk()
×
257
                if self.freqai_info.get('write_metrics_to_disk', False):
×
258
                    self.dd.save_metric_tracker_to_disk()
×
259

260
    def start_backtesting(
1✔
261
        self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen, strategy: IStrategy
262
    ) -> FreqaiDataKitchen:
263
        """
264
        The main broad execution for backtesting. For backtesting, each pair enters and then gets
265
        trained for each window along the sliding window defined by "train_period_days"
266
        (training window) and "backtest_period_days" (backtest window, i.e. window immediately
267
        following the training window). FreqAI slides the window and sequentially builds
268
        the backtesting results before returning the concatenated results for the full
269
        backtesting period back to the strategy.
270
        :param dataframe: DataFrame = strategy passed dataframe
271
        :param metadata: Dict = pair metadata
272
        :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
273
        :param strategy: Strategy to train on
274
        :return:
275
            FreqaiDataKitchen = Data management/analysis tool associated to present pair only
276
        """
277

278
        self.pair_it += 1
1✔
279
        train_it = 0
1✔
280
        pair = metadata["pair"]
1✔
281
        populate_indicators = True
1✔
282
        check_features = True
1✔
283
        # Loop enforcing the sliding window training/backtesting paradigm
284
        # tr_train is the training time range e.g. 1 historical month
285
        # tr_backtest is the backtesting time range e.g. the week directly
286
        # following tr_train. Both of these windows slide through the
287
        # entire backtest
288
        for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges):
1✔
289
            (_, _, _) = self.dd.get_pair_dict_info(pair)
1✔
290
            train_it += 1
1✔
291
            total_trains = len(dk.backtesting_timeranges)
1✔
292
            self.training_timerange = tr_train
1✔
293
            len_backtest_df = len(dataframe.loc[(dataframe["date"] >= tr_backtest.startdt) & (
1✔
294
                                  dataframe["date"] < tr_backtest.stopdt), :])
295

296
            if not self.ensure_data_exists(len_backtest_df, tr_backtest, pair):
1✔
297
                continue
×
298

299
            self.log_backtesting_progress(tr_train, pair, train_it, total_trains)
1✔
300

301
            timestamp_model_id = int(tr_train.stopts)
1✔
302
            if dk.backtest_live_models:
1✔
303
                timestamp_model_id = int(tr_backtest.startts)
×
304

305
            dk.set_paths(pair, timestamp_model_id)
1✔
306

307
            dk.set_new_model_names(pair, timestamp_model_id)
1✔
308

309
            if dk.check_if_backtest_prediction_is_valid(len_backtest_df):
1✔
310
                if check_features:
1✔
311
                    self.dd.load_metadata(dk)
1✔
312
                    dataframe_dummy_features = self.dk.use_strategy_to_populate_indicators(
1✔
313
                        strategy, prediction_dataframe=dataframe.tail(1), pair=metadata["pair"]
314
                    )
315
                    dk.find_features(dataframe_dummy_features)
1✔
316
                    self.check_if_feature_list_matches_strategy(dk)
1✔
317
                    check_features = False
1✔
318
                append_df = dk.get_backtesting_prediction()
1✔
319
                dk.append_predictions(append_df)
1✔
320
            else:
321
                if populate_indicators:
1✔
322
                    dataframe = self.dk.use_strategy_to_populate_indicators(
1✔
323
                        strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
324
                    )
325
                    populate_indicators = False
1✔
326

327
                dataframe_base_train = dataframe.loc[dataframe["date"] < tr_train.stopdt, :]
1✔
328
                dataframe_base_train = strategy.set_freqai_targets(dataframe_base_train)
1✔
329
                dataframe_base_backtest = dataframe.loc[dataframe["date"] < tr_backtest.stopdt, :]
1✔
330
                dataframe_base_backtest = strategy.set_freqai_targets(dataframe_base_backtest)
1✔
331

332
                dataframe_train = dk.slice_dataframe(tr_train, dataframe_base_train)
1✔
333
                dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe_base_backtest)
1✔
334

335
                if not self.model_exists(dk):
1✔
336
                    dk.find_features(dataframe_train)
1✔
337
                    dk.find_labels(dataframe_train)
1✔
338

339
                    try:
1✔
340
                        self.model = self.train(dataframe_train, pair, dk)
1✔
341
                    except Exception as msg:
×
342
                        logger.warning(
×
343
                            f"Training {pair} raised exception {msg.__class__.__name__}. "
344
                            f"Message: {msg}, skipping.")
345

346
                    self.dd.pair_dict[pair]["trained_timestamp"] = int(
1✔
347
                        tr_train.stopts)
348
                    if self.plot_features:
1✔
349
                        plot_feature_importance(self.model, pair, dk, self.plot_features)
×
350
                    if self.save_backtest_models:
1✔
351
                        logger.info('Saving backtest model to disk.')
1✔
352
                        self.dd.save_data(self.model, pair, dk)
1✔
353
                    else:
354
                        logger.info('Saving metadata to disk.')
×
355
                        self.dd.save_metadata(dk)
×
356
                else:
357
                    self.model = self.dd.load_data(pair, dk)
×
358

359
                pred_df, do_preds = self.predict(dataframe_backtest, dk)
1✔
360
                append_df = dk.get_predictions_to_append(pred_df, do_preds, dataframe_backtest)
1✔
361
                dk.append_predictions(append_df)
1✔
362
                dk.save_backtesting_prediction(append_df)
1✔
363

364
        self.backtesting_fit_live_predictions(dk)
1✔
365
        dk.fill_predictions(dataframe)
1✔
366

367
        return dk
1✔
368

369
    def start_live(
1✔
370
        self, dataframe: DataFrame, metadata: dict, strategy: IStrategy, dk: FreqaiDataKitchen
371
    ) -> FreqaiDataKitchen:
372
        """
373
        The main broad execution for dry/live. This function will check if a retraining should be
374
        performed, and if so, retrain and reset the model.
375
        :param dataframe: DataFrame = strategy passed dataframe
376
        :param metadata: Dict = pair metadata
377
        :param strategy: IStrategy = currently employed strategy
378
        dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
379
        :returns:
380
        dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
381
        """
382
        # update follower
383
        if self.follow_mode:
1✔
384
            self.dd.update_follower_metadata()
1✔
385

386
        # get the model metadata associated with the current pair
387
        (_, trained_timestamp, return_null_array) = self.dd.get_pair_dict_info(metadata["pair"])
1✔
388

389
        # if the metadata doesn't exist, the follower returns null arrays to strategy
390
        if self.follow_mode and return_null_array:
1✔
391
            logger.info("Returning null array from follower to strategy")
×
392
            self.dd.return_null_values_to_strategy(dataframe, dk)
×
393
            return dk
×
394

395
        # append the historic data once per round
396
        if self.dd.historic_data:
1✔
397
            self.dd.update_historic_data(strategy, dk)
1✔
398
            logger.debug(f'Updating historic data on pair {metadata["pair"]}')
1✔
399
            self.track_current_candle()
1✔
400

401
        if not self.follow_mode:
1✔
402

403
            (_, new_trained_timerange, data_load_timerange) = dk.check_if_new_training_required(
×
404
                trained_timestamp
405
            )
406
            dk.set_paths(metadata["pair"], new_trained_timerange.stopts)
×
407

408
            # load candle history into memory if it is not yet.
409
            if not self.dd.historic_data:
×
410
                self.dd.load_all_pair_histories(data_load_timerange, dk)
×
411

412
            if not self.scanning:
×
413
                self.scanning = True
×
414
                self.start_scanning(strategy)
×
415

416
        elif self.follow_mode:
1✔
417
            dk.set_paths(metadata["pair"], trained_timestamp)
1✔
418
            logger.info(
1✔
419
                "FreqAI instance set to follow_mode, finding existing pair "
420
                f"using { self.identifier }"
421
            )
422

423
        # load the model and associated data into the data kitchen
424
        self.model = self.dd.load_data(metadata["pair"], dk)
1✔
425

426
        dataframe = dk.use_strategy_to_populate_indicators(
1✔
427
            strategy, prediction_dataframe=dataframe, pair=metadata["pair"],
428
            do_corr_pairs=self.get_corr_dataframes
429
        )
430

431
        if not self.model:
1✔
432
            logger.warning(
×
433
                f"No model ready for {metadata['pair']}, returning null values to strategy."
434
            )
435
            self.dd.return_null_values_to_strategy(dataframe, dk)
×
436
            return dk
×
437

438
        if self.corr_pairlist:
1✔
439
            dataframe = self.cache_corr_pairlist_dfs(dataframe, dk)
1✔
440

441
        dk.find_labels(dataframe)
1✔
442

443
        self.build_strategy_return_arrays(dataframe, dk, metadata["pair"], trained_timestamp)
1✔
444

445
        return dk
1✔
446

447
    def build_strategy_return_arrays(
1✔
448
        self, dataframe: DataFrame, dk: FreqaiDataKitchen, pair: str, trained_timestamp: int
449
    ) -> None:
450

451
        # hold the historical predictions in memory so we are sending back
452
        # correct array to strategy
453

454
        if pair not in self.dd.model_return_values:
1✔
455
            # first predictions are made on entire historical candle set coming from strategy. This
456
            # allows FreqUI to show full return values.
457
            pred_df, do_preds = self.predict(dataframe, dk)
1✔
458
            if pair not in self.dd.historic_predictions:
1✔
459
                self.set_initial_historic_predictions(pred_df, dk, pair, dataframe)
1✔
460
            self.dd.set_initial_return_values(pair, pred_df)
1✔
461

462
            dk.return_dataframe = self.dd.attach_return_values_to_return_dataframe(pair, dataframe)
1✔
463
            return
1✔
464
        elif self.dk.check_if_model_expired(trained_timestamp):
×
465
            pred_df = DataFrame(np.zeros((2, len(dk.label_list))), columns=dk.label_list)
×
466
            do_preds = np.ones(2, dtype=np.int_) * 2
×
467
            dk.DI_values = np.zeros(2)
×
468
            logger.warning(
×
469
                f"Model expired for {pair}, returning null values to strategy. Strategy "
470
                "construction should take care to consider this event with "
471
                "prediction == 0 and do_predict == 2"
472
            )
473
        else:
474
            # remaining predictions are made only on the most recent candles for performance and
475
            # historical accuracy reasons.
476
            pred_df, do_preds = self.predict(dataframe.iloc[-self.CONV_WIDTH:], dk, first=False)
×
477

478
        if self.freqai_info.get('fit_live_predictions_candles', 0) and self.live:
×
479
            self.fit_live_predictions(dk, pair)
×
480
        self.dd.append_model_predictions(pair, pred_df, do_preds, dk, dataframe)
×
481
        dk.return_dataframe = self.dd.attach_return_values_to_return_dataframe(pair, dataframe)
×
482

483
        return
×
484

485
    def check_if_feature_list_matches_strategy(
1✔
486
        self, dk: FreqaiDataKitchen
487
    ) -> None:
488
        """
489
        Ensure user is passing the proper feature set if they are reusing an `identifier` pointing
490
        to a folder holding existing models.
491
        :param dataframe: DataFrame = strategy provided dataframe
492
        :param dk: FreqaiDataKitchen = non-persistent data container/analyzer for
493
                   current coin/bot loop
494
        """
495

496
        if "training_features_list_raw" in dk.data:
1✔
497
            feature_list = dk.data["training_features_list_raw"]
×
498
        else:
499
            feature_list = dk.data['training_features_list']
1✔
500

501
        if dk.training_features_list != feature_list:
1✔
502
            raise OperationalException(
×
503
                "Trying to access pretrained model with `identifier` "
504
                "but found different features furnished by current strategy."
505
                "Change `identifier` to train from scratch, or ensure the"
506
                "strategy is furnishing the same features as the pretrained"
507
                "model. In case of --strategy-list, please be aware that FreqAI "
508
                "requires all strategies to maintain identical "
509
                "populate_any_indicator() functions"
510
            )
511

512
    def data_cleaning_train(self, dk: FreqaiDataKitchen) -> None:
1✔
513
        """
514
        Base data cleaning method for train.
515
        Functions here improve/modify the input data by identifying outliers,
516
        computing additional metrics, adding noise, reducing dimensionality etc.
517
        """
518

519
        ft_params = self.freqai_info["feature_parameters"]
1✔
520

521
        if ft_params.get('inlier_metric_window', 0):
1✔
522
            dk.compute_inlier_metric(set_='train')
×
523
            if self.freqai_info["data_split_parameters"]["test_size"] > 0:
×
524
                dk.compute_inlier_metric(set_='test')
×
525

526
        if ft_params.get(
1✔
527
            "principal_component_analysis", False
528
        ):
529
            dk.principal_component_analysis()
1✔
530

531
        if ft_params.get("use_SVM_to_remove_outliers", False):
1✔
532
            dk.use_SVM_to_remove_outliers(predict=False)
1✔
533

534
        if ft_params.get("DI_threshold", 0):
1✔
535
            dk.data["avg_mean_dist"] = dk.compute_distances()
1✔
536

537
        if ft_params.get("use_DBSCAN_to_remove_outliers", False):
1✔
538
            if dk.pair in self.dd.old_DBSCAN_eps:
1✔
539
                eps = self.dd.old_DBSCAN_eps[dk.pair]
×
540
            else:
541
                eps = None
1✔
542
            dk.use_DBSCAN_to_remove_outliers(predict=False, eps=eps)
1✔
543
            self.dd.old_DBSCAN_eps[dk.pair] = dk.data['DBSCAN_eps']
1✔
544

545
        if self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0):
1✔
546
            dk.add_noise_to_training_features()
×
547

548
    def data_cleaning_predict(self, dk: FreqaiDataKitchen) -> None:
1✔
549
        """
550
        Base data cleaning method for predict.
551
        Functions here are complementary to the functions of data_cleaning_train.
552
        """
553
        ft_params = self.freqai_info["feature_parameters"]
1✔
554

555
        # ensure user is feeding the correct indicators to the model
556
        self.check_if_feature_list_matches_strategy(dk)
1✔
557

558
        if ft_params.get('inlier_metric_window', 0):
1✔
559
            dk.compute_inlier_metric(set_='predict')
×
560

561
        if ft_params.get(
1✔
562
            "principal_component_analysis", False
563
        ):
564
            dk.pca_transform(dk.data_dictionary['prediction_features'])
×
565

566
        if ft_params.get("use_SVM_to_remove_outliers", False):
1✔
567
            dk.use_SVM_to_remove_outliers(predict=True)
1✔
568

569
        if ft_params.get("DI_threshold", 0):
1✔
570
            dk.check_if_pred_in_training_spaces()
1✔
571

572
        if ft_params.get("use_DBSCAN_to_remove_outliers", False):
1✔
573
            dk.use_DBSCAN_to_remove_outliers(predict=True)
×
574

575
    def model_exists(self, dk: FreqaiDataKitchen) -> bool:
1✔
576
        """
577
        Given a pair and path, check if a model already exists
578
        :param pair: pair e.g. BTC/USD
579
        :param path: path to model
580
        :return:
581
        :boolean: whether the model file exists or not.
582
        """
583
        path_to_modelfile = Path(dk.data_path / f"{dk.model_filename}_model.joblib")
1✔
584
        file_exists = path_to_modelfile.is_file()
1✔
585
        if file_exists:
1✔
586
            logger.info("Found model at %s", dk.data_path / dk.model_filename)
×
587
        else:
588
            logger.info("Could not find model at %s", dk.data_path / dk.model_filename)
1✔
589
        return file_exists
1✔
590

591
    def set_full_path(self) -> None:
1✔
592
        """
593
        Creates and sets the full path for the identifier
594
        """
595
        self.full_path = Path(
1✔
596
            self.config["user_data_dir"] / "models" / f"{self.identifier}"
597
        )
598
        self.full_path.mkdir(parents=True, exist_ok=True)
1✔
599

600
    def extract_data_and_train_model(
1✔
601
        self,
602
        new_trained_timerange: TimeRange,
603
        pair: str,
604
        strategy: IStrategy,
605
        dk: FreqaiDataKitchen,
606
        data_load_timerange: TimeRange,
607
    ):
608
        """
609
        Retrieve data and train model.
610
        :param new_trained_timerange: TimeRange = the timerange to train the model on
611
        :param metadata: dict = strategy provided metadata
612
        :param strategy: IStrategy = user defined strategy object
613
        :param dk: FreqaiDataKitchen = non-persistent data container for current coin/loop
614
        :param data_load_timerange: TimeRange = the amount of data to be loaded
615
                                    for populate_any_indicators
616
                                    (larger than new_trained_timerange so that
617
                                    new_trained_timerange does not contain any NaNs)
618
        """
619

620
        corr_dataframes, base_dataframes = self.dd.get_base_and_corr_dataframes(
1✔
621
            data_load_timerange, pair, dk
622
        )
623

624
        unfiltered_dataframe = dk.use_strategy_to_populate_indicators(
1✔
625
            strategy, corr_dataframes, base_dataframes, pair
626
        )
627

628
        unfiltered_dataframe = dk.slice_dataframe(new_trained_timerange, unfiltered_dataframe)
1✔
629

630
        # find the features indicated by strategy and store in datakitchen
631
        dk.find_features(unfiltered_dataframe)
1✔
632
        dk.find_labels(unfiltered_dataframe)
1✔
633

634
        model = self.train(unfiltered_dataframe, pair, dk)
1✔
635

636
        self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
1✔
637
        dk.set_new_model_names(pair, new_trained_timerange.stopts)
1✔
638
        self.dd.save_data(model, pair, dk)
1✔
639

640
        if self.plot_features:
1✔
641
            plot_feature_importance(model, pair, dk, self.plot_features)
×
642

643
        if self.freqai_info.get("purge_old_models", False):
1✔
644
            self.dd.purge_old_models()
1✔
645

646
    def set_initial_historic_predictions(
1✔
647
        self, pred_df: DataFrame, dk: FreqaiDataKitchen, pair: str, strat_df: DataFrame
648
    ) -> None:
649
        """
650
        This function is called only if the datadrawer failed to load an
651
        existing set of historic predictions. In this case, it builds
652
        the structure and sets fake predictions off the first training
653
        data. After that, FreqAI will append new real predictions to the
654
        set of historic predictions.
655

656
        These values are used to generate live statistics which can be used
657
        in the strategy for adaptive values. E.g. &*_mean/std are quantities
658
        that can computed based on live predictions from the set of historical
659
        predictions. Those values can be used in the user strategy to better
660
        assess prediction rarity, and thus wait for probabilistically favorable
661
        entries relative to the live historical predictions.
662

663
        If the user reuses an identifier on a subsequent instance,
664
        this function will not be called. In that case, "real" predictions
665
        will be appended to the loaded set of historic predictions.
666
        :param df: DataFrame = the dataframe containing the training feature data
667
        :param model: Any = A model which was `fit` using a common library such as
668
                      catboost or lightgbm
669
        :param dk: FreqaiDataKitchen = object containing methods for data analysis
670
        :param pair: str = current pair
671
        """
672

673
        self.dd.historic_predictions[pair] = pred_df
1✔
674
        hist_preds_df = self.dd.historic_predictions[pair]
1✔
675

676
        self.set_start_dry_live_date(strat_df)
1✔
677

678
        for label in hist_preds_df.columns:
1✔
679
            if hist_preds_df[label].dtype == object:
1✔
680
                continue
×
681
            hist_preds_df[f'{label}_mean'] = 0
1✔
682
            hist_preds_df[f'{label}_std'] = 0
1✔
683

684
        hist_preds_df['do_predict'] = 0
1✔
685

686
        if self.freqai_info['feature_parameters'].get('DI_threshold', 0) > 0:
1✔
687
            hist_preds_df['DI_values'] = 0
1✔
688

689
        for return_str in dk.data['extra_returns_per_train']:
1✔
690
            hist_preds_df[return_str] = dk.data['extra_returns_per_train'][return_str]
×
691

692
        hist_preds_df['close_price'] = strat_df['close']
1✔
693
        hist_preds_df['date_pred'] = strat_df['date']
1✔
694

695
        # # for keras type models, the conv_window needs to be prepended so
696
        # # viewing is correct in frequi
697
        if self.freqai_info.get('keras', False) or self.ft_params.get('inlier_metric_window', 0):
1✔
698
            n_lost_points = self.freqai_info.get('conv_width', 2)
×
699
            zeros_df = DataFrame(np.zeros((n_lost_points, len(hist_preds_df.columns))),
×
700
                                 columns=hist_preds_df.columns)
701
            self.dd.historic_predictions[pair] = pd.concat(
×
702
                [zeros_df, hist_preds_df], axis=0, ignore_index=True)
703

704
    def fit_live_predictions(self, dk: FreqaiDataKitchen, pair: str) -> None:
1✔
705
        """
706
        Fit the labels with a gaussian distribution
707
        """
708
        import scipy as spy
1✔
709

710
        # add classes from classifier label types if used
711
        full_labels = dk.label_list + dk.unique_class_list
1✔
712

713
        num_candles = self.freqai_info.get("fit_live_predictions_candles", 100)
1✔
714
        dk.data["labels_mean"], dk.data["labels_std"] = {}, {}
1✔
715
        for label in full_labels:
1✔
716
            if self.dd.historic_predictions[dk.pair][label].dtype == object:
1✔
717
                continue
×
718
            f = spy.stats.norm.fit(
1✔
719
                self.dd.historic_predictions[dk.pair][label].tail(num_candles))
720
            dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1]
1✔
721

722
        return
1✔
723

724
    def inference_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''):
1✔
725
        """
726
        Timer designed to track the cumulative time spent in FreqAI for one pass through
727
        the whitelist. This will check if the time spent is more than 1/4 the time
728
        of a single candle, and if so, it will warn the user of degraded performance
729
        """
730
        if do == 'start':
×
731
            self.pair_it += 1
×
732
            self.begin_time = time.time()
×
733
        elif do == 'stop':
×
734
            end = time.time()
×
735
            time_spent = (end - self.begin_time)
×
736
            if self.freqai_info.get('write_metrics_to_disk', False):
×
737
                self.dd.update_metric_tracker('inference_time', time_spent, pair)
×
738
            self.inference_time += time_spent
×
739
            if self.pair_it == self.total_pairs:
×
740
                logger.info(
×
741
                    f'Total time spent inferencing pairlist {self.inference_time:.2f} seconds')
742
                if self.inference_time > 0.25 * self.base_tf_seconds:
×
743
                    logger.warning("Inference took over 25% of the candle time. Reduce pairlist to"
×
744
                                   " avoid blinding open trades and degrading performance.")
745
                self.pair_it = 0
×
746
                self.inference_time = 0
×
747
        return
×
748

749
    def train_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''):
1✔
750
        """
751
        Timer designed to track the cumulative time spent training the full pairlist in
752
        FreqAI.
753
        """
754
        if do == 'start':
1✔
755
            self.pair_it_train += 1
1✔
756
            self.begin_time_train = time.time()
1✔
757
        elif do == 'stop':
1✔
758
            end = time.time()
1✔
759
            time_spent = (end - self.begin_time_train)
1✔
760
            if self.freqai_info.get('write_metrics_to_disk', False):
1✔
761
                self.dd.collect_metrics(time_spent, pair)
×
762

763
            self.train_time += time_spent
1✔
764
            if self.pair_it_train == self.total_pairs:
1✔
765
                logger.info(
×
766
                    f'Total time spent training pairlist {self.train_time:.2f} seconds')
767
                self.pair_it_train = 0
×
768
                self.train_time = 0
×
769
        return
1✔
770

771
    def get_init_model(self, pair: str) -> Any:
1✔
772
        if pair not in self.dd.model_dictionary or not self.continual_learning:
1✔
773
            init_model = None
1✔
774
        else:
775
            init_model = self.dd.model_dictionary[pair]
×
776

777
        return init_model
1✔
778

779
    def _set_train_queue(self):
1✔
780
        """
781
        Sets train queue from existing train timestamps if they exist
782
        otherwise it sets the train queue based on the provided whitelist.
783
        """
784
        current_pairlist = self.config.get("exchange", {}).get("pair_whitelist")
1✔
785
        if not self.dd.pair_dict:
1✔
786
            logger.info('Set fresh train queue from whitelist. '
1✔
787
                        f'Queue: {current_pairlist}')
788
            return deque(current_pairlist)
1✔
789

790
        best_queue = deque()
1✔
791

792
        pair_dict_sorted = sorted(self.dd.pair_dict.items(),
1✔
793
                                  key=lambda k: k[1]['trained_timestamp'])
794
        for pair in pair_dict_sorted:
1✔
795
            if pair[0] in current_pairlist:
1✔
796
                best_queue.append(pair[0])
1✔
797
        for pair in current_pairlist:
1✔
798
            if pair not in best_queue:
1✔
799
                best_queue.appendleft(pair)
1✔
800

801
        logger.info('Set existing queue from trained timestamps. '
1✔
802
                    f'Best approximation queue: {best_queue}')
803
        return best_queue
1✔
804

805
    def cache_corr_pairlist_dfs(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
1✔
806
        """
807
        Cache the corr_pairlist dfs to speed up performance for subsequent pairs during the
808
        current candle.
809
        :param dataframe: strategy fed dataframe
810
        :param dk: datakitchen object for current asset
811
        :return: dataframe to attach/extract cached corr_pair dfs to/from.
812
        """
813

814
        if self.get_corr_dataframes:
1✔
815
            self.corr_dataframes = dk.extract_corr_pair_columns_from_populated_indicators(dataframe)
1✔
816
            if not self.corr_dataframes:
1✔
817
                logger.warning("Couldn't cache corr_pair dataframes for improved performance. "
×
818
                               "Consider ensuring that the full coin/stake, e.g. XYZ/USD, "
819
                               "is included in the column names when you are creating features "
820
                               "in `populate_any_indicators()`.")
821
            self.get_corr_dataframes = not bool(self.corr_dataframes)
1✔
822
        elif self.corr_dataframes:
×
823
            dataframe = dk.attach_corr_pair_columns(
×
824
                dataframe, self.corr_dataframes, dk.pair)
825

826
        return dataframe
1✔
827

828
    def track_current_candle(self):
1✔
829
        """
830
        Checks if the latest candle appended by the datadrawer is
831
        equivalent to the latest candle seen by FreqAI. If not, it
832
        asks to refresh the cached corr_dfs, and resets the pair
833
        counter.
834
        """
835
        if self.dd.current_candle > self.current_candle:
1✔
836
            self.get_corr_dataframes = True
1✔
837
            self.pair_it = 1
1✔
838
            self.current_candle = self.dd.current_candle
1✔
839

840
    def ensure_data_exists(self, len_dataframe_backtest: int,
1✔
841
                           tr_backtest: TimeRange, pair: str) -> bool:
842
        """
843
        Check if the dataframe is empty, if not, report useful information to user.
844
        :param len_dataframe_backtest: the len of backtesting dataframe
845
        :param tr_backtest: current backtesting timerange.
846
        :param pair: current pair
847
        :return: if the data exists or not
848
        """
849
        if self.config.get("freqai_backtest_live_models", False) and len_dataframe_backtest == 0:
1✔
850
            logger.info(f"No data found for pair {pair} from "
×
851
                        f"from { tr_backtest.start_fmt} to {tr_backtest.stop_fmt}. "
852
                        "Probably more than one training within the same candle period.")
853
            return False
×
854
        return True
1✔
855

856
    def log_backtesting_progress(self, tr_train: TimeRange, pair: str,
1✔
857
                                 train_it: int, total_trains: int):
858
        """
859
        Log the backtesting progress so user knows how many pairs have been trained and
860
        how many more pairs/trains remain.
861
        :param tr_train: the training timerange
862
        :param train_it: the train iteration for the current pair (the sliding window progress)
863
        :param pair: the current pair
864
        :param total_trains: total trains (total number of slides for the sliding window)
865
        """
866
        if not self.config.get("freqai_backtest_live_models", False):
1✔
867
            logger.info(
1✔
868
                f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs"
869
                f" from {tr_train.start_fmt} "
870
                f"to {tr_train.stop_fmt}, {train_it}/{total_trains} "
871
                "trains"
872
            )
873

874
    def backtesting_fit_live_predictions(self, dk: FreqaiDataKitchen):
1✔
875
        """
876
        Apply fit_live_predictions function in backtesting with a dummy historic_predictions
877
        The loop is required to simulate dry/live operation, as it is not possible to predict
878
        the type of logic implemented by the user.
879
        :param dk: datakitchen object
880
        """
881
        fit_live_predictions_candles = self.freqai_info.get("fit_live_predictions_candles", 0)
1✔
882
        if fit_live_predictions_candles:
1✔
883
            logger.info("Applying fit_live_predictions in backtesting")
1✔
884
            label_columns = [col for col in dk.full_df.columns if (
1✔
885
                col.startswith("&") and
886
                not (col.startswith("&") and col.endswith("_mean")) and
887
                not (col.startswith("&") and col.endswith("_std")) and
888
                col not in self.dk.data["extra_returns_per_train"])
889
            ]
890

891
            for index in range(len(dk.full_df)):
1✔
892
                if index >= fit_live_predictions_candles:
1✔
893
                    self.dd.historic_predictions[self.dk.pair] = (
1✔
894
                        dk.full_df.iloc[index - fit_live_predictions_candles:index])
895
                    self.fit_live_predictions(self.dk, self.dk.pair)
1✔
896
                    for label in label_columns:
1✔
897
                        if dk.full_df[label].dtype == object:
1✔
898
                            continue
×
899
                        if "labels_mean" in self.dk.data:
1✔
900
                            dk.full_df.at[index, f"{label}_mean"] = (
1✔
901
                                self.dk.data["labels_mean"][label])
902
                        if "labels_std" in self.dk.data:
1✔
903
                            dk.full_df.at[index, f"{label}_std"] = self.dk.data["labels_std"][label]
1✔
904

905
                    for extra_col in self.dk.data["extra_returns_per_train"]:
1✔
906
                        dk.full_df.at[index, f"{extra_col}"] = (
×
907
                            self.dk.data["extra_returns_per_train"][extra_col])
908

909
        return
1✔
910

911
    def update_metadata(self, metadata: Dict[str, Any]):
1✔
912
        """
913
        Update global metadata and save the updated json file
914
        :param metadata: new global metadata dict
915
        """
916
        self.dd.save_global_metadata_to_disk(metadata)
1✔
917
        self.metadata = metadata
1✔
918

919
    def set_start_dry_live_date(self, live_dataframe: DataFrame):
1✔
920
        key_name = "start_dry_live_date"
1✔
921
        if key_name not in self.metadata:
1✔
922
            metadata = self.metadata
1✔
923
            metadata[key_name] = int(
1✔
924
                pd.to_datetime(live_dataframe.tail(1)["date"].values[0]).timestamp())
925
            self.update_metadata(metadata)
1✔
926

927
    def start_backtesting_from_historic_predictions(
1✔
928
        self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
929
    ) -> FreqaiDataKitchen:
930
        """
931
        :param dataframe: DataFrame = strategy passed dataframe
932
        :param metadata: Dict = pair metadata
933
        :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
934
        :return:
935
            FreqaiDataKitchen = Data management/analysis tool associated to present pair only
936
        """
937
        pair = metadata["pair"]
×
938
        dk.return_dataframe = dataframe
×
939
        saved_dataframe = self.dd.historic_predictions[pair]
×
940
        columns_to_drop = list(set(saved_dataframe.columns).intersection(
×
941
            dk.return_dataframe.columns))
942
        dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop))
×
943
        dk.return_dataframe = pd.merge(
×
944
            dk.return_dataframe, saved_dataframe, how='left', left_on='date', right_on="date_pred")
945
        return dk
×
946

947
    def check_deprecated_populate_any_indicators(self, strategy: IStrategy):
1✔
948
        """
949
        Check and warn if the deprecated populate_any_indicators function is used.
950
        :param strategy: strategy object
951
        """
952

953
        if not self.warned_deprecated_populate_any_indicators:
×
954
            self.warned_deprecated_populate_any_indicators = True
×
955
            old_version = inspect.getsource(strategy.populate_any_indicators) != (
×
956
                inspect.getsource(IStrategy.populate_any_indicators))
957

958
            if old_version:
×
959
                logger.warning("DEPRECATION WARNING: "
×
960
                               "You are using the deprecated populate_any_indicators function. "
961
                               "This function will raise an error on March 1 2023. "
962
                               "Please update your strategy by using "
963
                               "the new feature_engineering functions. See \n"
964
                               "https://www.freqtrade.io/en/latest/freqai-feature-engineering/"
965
                               "for details.")
966

967
    # Following methods which are overridden by user made prediction models.
968
    # See freqai/prediction_models/CatboostPredictionModel.py for an example.
969

970
    @abstractmethod
1✔
971
    def train(self, unfiltered_df: DataFrame, pair: str,
1✔
972
              dk: FreqaiDataKitchen, **kwargs) -> Any:
973
        """
974
        Filter the training data and train a model to it. Train makes heavy use of the datahandler
975
        for storing, saving, loading, and analyzing the data.
976
        :param unfiltered_df: Full dataframe for the current training period
977
        :param metadata: pair metadata from strategy.
978
        :return: Trained model which can be used to inference (self.predict)
979
        """
980

981
    @abstractmethod
1✔
982
    def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen, **kwargs) -> Any:
1✔
983
        """
984
        Most regressors use the same function names and arguments e.g. user
985
        can drop in LGBMRegressor in place of CatBoostRegressor and all data
986
        management will be properly handled by Freqai.
987
        :param data_dictionary: Dict = the dictionary constructed by DataHandler to hold
988
                                all the training and test data/labels.
989
        """
990

991
        return
×
992

993
    @abstractmethod
1✔
994
    def predict(
1✔
995
        self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
996
    ) -> Tuple[DataFrame, NDArray[np.int_]]:
997
        """
998
        Filter the prediction features data and predict with it.
999
        :param unfiltered_df: Full dataframe for the current backtest period.
1000
        :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
1001
        :param first: boolean = whether this is the first prediction or not.
1002
        :return:
1003
        :predictions: np.array of predictions
1004
        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
1005
        data (NaNs) or felt uncertain about data (i.e. SVM and/or DI index)
1006
        """
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc