• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

freqtrade / freqtrade / 4131167254

pending completion
4131167254

push

github-actions

GitHub
Merge pull request #7983 from stash86/bt-metrics

16866 of 17748 relevant lines covered (95.03%)

0.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

67.7
/freqtrade/freqai/freqai_interface.py
1
import logging
1✔
2
import threading
1✔
3
import time
1✔
4
from abc import ABC, abstractmethod
1✔
5
from collections import deque
1✔
6
from datetime import datetime, timezone
1✔
7
from pathlib import Path
1✔
8
from typing import Any, Dict, List, Literal, Optional, Tuple
1✔
9

10
import numpy as np
1✔
11
import pandas as pd
1✔
12
import psutil
1✔
13
from numpy.typing import NDArray
1✔
14
from pandas import DataFrame
1✔
15

16
from freqtrade.configuration import TimeRange
1✔
17
from freqtrade.constants import Config
1✔
18
from freqtrade.data.dataprovider import DataProvider
1✔
19
from freqtrade.enums import RunMode
1✔
20
from freqtrade.exceptions import OperationalException
1✔
21
from freqtrade.exchange import timeframe_to_seconds
1✔
22
from freqtrade.freqai.data_drawer import FreqaiDataDrawer
1✔
23
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
1✔
24
from freqtrade.freqai.utils import plot_feature_importance, record_params
1✔
25
from freqtrade.strategy.interface import IStrategy
1✔
26

27

28
pd.options.mode.chained_assignment = None
1✔
29
logger = logging.getLogger(__name__)
1✔
30

31

32
class IFreqaiModel(ABC):
1✔
33
    """
34
    Class containing all tools for training and prediction in the strategy.
35
    Base*PredictionModels inherit from this class.
36

37
    Record of contribution:
38
    FreqAI was developed by a group of individuals who all contributed specific skillsets to the
39
    project.
40

41
    Conception and software development:
42
    Robert Caulk @robcaulk
43

44
    Theoretical brainstorming:
45
    Elin Törnquist @th0rntwig
46

47
    Code review, software architecture brainstorming:
48
    @xmatthias
49

50
    Beta testing and bug reporting:
51
    @bloodhunter4rc, Salah Lamkadem @ikonx, @ken11o2, @longyu, @paranoidandy, @smidelis, @smarm
52
    Juha Nykänen @suikula, Wagner Costa @wagnercosta, Johan Vlugt @Jooopieeert
53
    """
54

55
    def __init__(self, config: Config) -> None:
1✔
56

57
        self.config = config
1✔
58
        self.assert_config(self.config)
1✔
59
        self.freqai_info: Dict[str, Any] = config["freqai"]
1✔
60
        self.data_split_parameters: Dict[str, Any] = config.get("freqai", {}).get(
1✔
61
            "data_split_parameters", {})
62
        self.model_training_parameters: Dict[str, Any] = config.get("freqai", {}).get(
1✔
63
            "model_training_parameters", {})
64
        self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
1✔
65
        self.retrain = False
1✔
66
        self.first = True
1✔
67
        self.set_full_path()
1✔
68
        self.follow_mode: bool = self.freqai_info.get("follow_mode", False)
1✔
69
        self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True)
1✔
70
        if self.save_backtest_models:
1✔
71
            logger.info('Backtesting module configured to save all models.')
1✔
72

73
        self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
1✔
74
        # set current candle to arbitrary historical date
75
        self.current_candle: datetime = datetime.fromtimestamp(637887600, tz=timezone.utc)
1✔
76
        self.dd.current_candle = self.current_candle
1✔
77
        self.scanning = False
1✔
78
        self.ft_params = self.freqai_info["feature_parameters"]
1✔
79
        self.corr_pairlist: List[str] = self.ft_params.get("include_corr_pairlist", [])
1✔
80
        self.keras: bool = self.freqai_info.get("keras", False)
1✔
81
        if self.keras and self.ft_params.get("DI_threshold", 0):
1✔
82
            self.ft_params["DI_threshold"] = 0
×
83
            logger.warning("DI threshold is not configured for Keras models yet. Deactivating.")
×
84
        self.CONV_WIDTH = self.freqai_info.get('conv_width', 1)
1✔
85
        if self.ft_params.get("inlier_metric_window", 0):
1✔
86
            self.CONV_WIDTH = self.ft_params.get("inlier_metric_window", 0) * 2
×
87
        self.pair_it = 0
1✔
88
        self.pair_it_train = 0
1✔
89
        self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist"))
1✔
90
        self.train_queue = self._set_train_queue()
1✔
91
        self.inference_time: float = 0
1✔
92
        self.train_time: float = 0
1✔
93
        self.begin_time: float = 0
1✔
94
        self.begin_time_train: float = 0
1✔
95
        self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe'])
1✔
96
        self.continual_learning = self.freqai_info.get('continual_learning', False)
1✔
97
        self.plot_features = self.ft_params.get("plot_feature_importances", 0)
1✔
98
        self.corr_dataframes: Dict[str, DataFrame] = {}
1✔
99
        # get_corr_dataframes is controlling the caching of corr_dataframes
100
        # for improved performance. Careful with this boolean.
101
        self.get_corr_dataframes: bool = True
1✔
102
        self._threads: List[threading.Thread] = []
1✔
103
        self._stop_event = threading.Event()
1✔
104
        self.metadata: Dict[str, Any] = self.dd.load_global_metadata_from_disk()
1✔
105
        self.data_provider: Optional[DataProvider] = None
1✔
106
        self.max_system_threads = max(int(psutil.cpu_count() * 2 - 2), 1)
1✔
107
        self.can_short = True  # overridden in start() with strategy.can_short
1✔
108

109
        record_params(config, self.full_path)
1✔
110

111
    def __getstate__(self):
1✔
112
        """
113
        Return an empty state to be pickled in hyperopt
114
        """
115
        return ({})
×
116

117
    def assert_config(self, config: Config) -> None:
1✔
118

119
        if not config.get("freqai", {}):
1✔
120
            raise OperationalException("No freqai parameters found in configuration file.")
×
121

122
    def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame:
1✔
123
        """
124
        Entry point to the FreqaiModel from a specific pair, it will train a new model if
125
        necessary before making the prediction.
126

127
        :param dataframe: Full dataframe coming from strategy - it contains entire
128
                           backtesting timerange + additional historical data necessary to train
129
        the model.
130
        :param metadata: pair metadata coming from strategy.
131
        :param strategy: Strategy to train on
132
        """
133

134
        self.live = strategy.dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE)
×
135
        self.dd.set_pair_dict_info(metadata)
×
136
        self.data_provider = strategy.dp
×
137
        self.can_short = strategy.can_short
×
138

139
        if self.live:
×
140
            self.inference_timer('start')
×
141
            self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
×
142
            dk = self.start_live(dataframe, metadata, strategy, self.dk)
×
143
            dataframe = dk.remove_features_from_df(dk.return_dataframe)
×
144

145
        # For backtesting, each pair enters and then gets trained for each window along the
146
        # sliding window defined by "train_period_days" (training window) and "live_retrain_hours"
147
        # (backtest window, i.e. window immediately following the training window).
148
        # FreqAI slides the window and sequentially builds the backtesting results before returning
149
        # the concatenated results for the full backtesting period back to the strategy.
150
        elif not self.follow_mode:
×
151
            self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
×
152
            dataframe = self.dk.use_strategy_to_populate_indicators(
×
153
                strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
154
            )
155
            if not self.config.get("freqai_backtest_live_models", False):
×
156
                logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
×
157
                dk = self.start_backtesting(dataframe, metadata, self.dk)
×
158
                dataframe = dk.remove_features_from_df(dk.return_dataframe)
×
159
            else:
160
                logger.info(
×
161
                    "Backtesting using historic predictions (live models)")
162
                dk = self.start_backtesting_from_historic_predictions(
×
163
                    dataframe, metadata, self.dk)
164
                dataframe = dk.return_dataframe
×
165

166
        self.clean_up()
×
167
        if self.live:
×
168
            self.inference_timer('stop', metadata["pair"])
×
169

170
        return dataframe
×
171

172
    def clean_up(self):
1✔
173
        """
174
        Objects that should be handled by GC already between coins, but
175
        are explicitly shown here to help demonstrate the non-persistence of these
176
        objects.
177
        """
178
        self.model = None
×
179
        self.dk = None
×
180

181
    def _on_stop(self):
1✔
182
        """
183
        Callback for Subclasses to override to include logic for shutting down resources
184
        when SIGINT is sent.
185
        """
186
        return
×
187

188
    def shutdown(self):
1✔
189
        """
190
        Cleans up threads on Shutdown, set stop event. Join threads to wait
191
        for current training iteration.
192
        """
193
        logger.info("Stopping FreqAI")
×
194
        self._stop_event.set()
×
195

196
        self.data_provider = None
×
197
        self._on_stop()
×
198

199
        logger.info("Waiting on Training iteration")
×
200
        for _thread in self._threads:
×
201
            _thread.join()
×
202

203
    def start_scanning(self, *args, **kwargs) -> None:
1✔
204
        """
205
        Start `self._start_scanning` in a separate thread
206
        """
207
        _thread = threading.Thread(target=self._start_scanning, args=args, kwargs=kwargs)
×
208
        self._threads.append(_thread)
×
209
        _thread.start()
×
210

211
    def _start_scanning(self, strategy: IStrategy) -> None:
1✔
212
        """
213
        Function designed to constantly scan pairs for retraining on a separate thread (intracandle)
214
        to improve model youth. This function is agnostic to data preparation/collection/storage,
215
        it simply trains on what ever data is available in the self.dd.
216
        :param strategy: IStrategy = The user defined strategy class
217
        """
218
        while not self._stop_event.is_set():
×
219
            time.sleep(1)
×
220
            pair = self.train_queue[0]
×
221

222
            # ensure pair is avaialble in dp
223
            if pair not in strategy.dp.current_whitelist():
×
224
                self.train_queue.popleft()
×
225
                logger.warning(f'{pair} not in current whitelist, removing from train queue.')
×
226
                continue
×
227

228
            (_, trained_timestamp, _) = self.dd.get_pair_dict_info(pair)
×
229

230
            dk = FreqaiDataKitchen(self.config, self.live, pair)
×
231
            (
×
232
                retrain,
233
                new_trained_timerange,
234
                data_load_timerange,
235
            ) = dk.check_if_new_training_required(trained_timestamp)
236

237
            if retrain:
×
238
                self.train_timer('start')
×
239
                dk.set_paths(pair, new_trained_timerange.stopts)
×
240
                try:
×
241
                    self.extract_data_and_train_model(
×
242
                        new_trained_timerange, pair, strategy, dk, data_load_timerange
243
                    )
244
                except Exception as msg:
×
245
                    logger.warning(f"Training {pair} raised exception {msg.__class__.__name__}. "
×
246
                                   f"Message: {msg}, skipping.")
247

248
                self.train_timer('stop', pair)
×
249

250
                # only rotate the queue after the first has been trained.
251
                self.train_queue.rotate(-1)
×
252

253
                self.dd.save_historic_predictions_to_disk()
×
254
                if self.freqai_info.get('write_metrics_to_disk', False):
×
255
                    self.dd.save_metric_tracker_to_disk()
×
256

257
    def start_backtesting(
1✔
258
        self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
259
    ) -> FreqaiDataKitchen:
260
        """
261
        The main broad execution for backtesting. For backtesting, each pair enters and then gets
262
        trained for each window along the sliding window defined by "train_period_days"
263
        (training window) and "backtest_period_days" (backtest window, i.e. window immediately
264
        following the training window). FreqAI slides the window and sequentially builds
265
        the backtesting results before returning the concatenated results for the full
266
        backtesting period back to the strategy.
267
        :param dataframe: DataFrame = strategy passed dataframe
268
        :param metadata: Dict = pair metadata
269
        :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
270
        :return:
271
            FreqaiDataKitchen = Data management/analysis tool associated to present pair only
272
        """
273

274
        self.pair_it += 1
1✔
275
        train_it = 0
1✔
276
        # Loop enforcing the sliding window training/backtesting paradigm
277
        # tr_train is the training time range e.g. 1 historical month
278
        # tr_backtest is the backtesting time range e.g. the week directly
279
        # following tr_train. Both of these windows slide through the
280
        # entire backtest
281
        for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges):
1✔
282
            pair = metadata["pair"]
1✔
283
            (_, _, _) = self.dd.get_pair_dict_info(pair)
1✔
284
            train_it += 1
1✔
285
            total_trains = len(dk.backtesting_timeranges)
1✔
286
            self.training_timerange = tr_train
1✔
287
            len_backtest_df = len(dataframe.loc[(dataframe["date"] >= tr_backtest.startdt) & (
1✔
288
                                  dataframe["date"] < tr_backtest.stopdt), :])
289

290
            if not self.ensure_data_exists(len_backtest_df, tr_backtest, pair):
1✔
291
                continue
×
292

293
            self.log_backtesting_progress(tr_train, pair, train_it, total_trains)
1✔
294

295
            timestamp_model_id = int(tr_train.stopts)
1✔
296
            if dk.backtest_live_models:
1✔
297
                timestamp_model_id = int(tr_backtest.startts)
×
298

299
            dk.set_paths(pair, timestamp_model_id)
1✔
300

301
            dk.set_new_model_names(pair, timestamp_model_id)
1✔
302

303
            if dk.check_if_backtest_prediction_is_valid(len_backtest_df):
1✔
304
                self.dd.load_metadata(dk)
1✔
305
                dk.find_features(dataframe)
1✔
306
                self.check_if_feature_list_matches_strategy(dk)
1✔
307
                append_df = dk.get_backtesting_prediction()
1✔
308
                dk.append_predictions(append_df)
1✔
309
            else:
310
                dataframe_train = dk.slice_dataframe(tr_train, dataframe)
1✔
311
                dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)
1✔
312
                if not self.model_exists(dk):
1✔
313
                    dk.find_features(dataframe_train)
1✔
314
                    dk.find_labels(dataframe_train)
1✔
315
                    self.model = self.train(dataframe_train, pair, dk)
1✔
316
                    self.dd.pair_dict[pair]["trained_timestamp"] = int(
1✔
317
                        tr_train.stopts)
318
                    if self.plot_features:
1✔
319
                        plot_feature_importance(self.model, pair, dk, self.plot_features)
×
320
                    if self.save_backtest_models:
1✔
321
                        logger.info('Saving backtest model to disk.')
1✔
322
                        self.dd.save_data(self.model, pair, dk)
1✔
323
                    else:
324
                        logger.info('Saving metadata to disk.')
×
325
                        self.dd.save_metadata(dk)
×
326
                else:
327
                    self.model = self.dd.load_data(pair, dk)
×
328

329
                pred_df, do_preds = self.predict(dataframe_backtest, dk)
1✔
330
                append_df = dk.get_predictions_to_append(pred_df, do_preds, dataframe_backtest)
1✔
331
                dk.append_predictions(append_df)
1✔
332
                dk.save_backtesting_prediction(append_df)
1✔
333

334
        self.backtesting_fit_live_predictions(dk)
1✔
335
        dk.fill_predictions(dataframe)
1✔
336

337
        return dk
1✔
338

339
    def start_live(
1✔
340
        self, dataframe: DataFrame, metadata: dict, strategy: IStrategy, dk: FreqaiDataKitchen
341
    ) -> FreqaiDataKitchen:
342
        """
343
        The main broad execution for dry/live. This function will check if a retraining should be
344
        performed, and if so, retrain and reset the model.
345
        :param dataframe: DataFrame = strategy passed dataframe
346
        :param metadata: Dict = pair metadata
347
        :param strategy: IStrategy = currently employed strategy
348
        dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
349
        :returns:
350
        dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
351
        """
352

353
        # update follower
354
        if self.follow_mode:
1✔
355
            self.dd.update_follower_metadata()
1✔
356

357
        # get the model metadata associated with the current pair
358
        (_, trained_timestamp, return_null_array) = self.dd.get_pair_dict_info(metadata["pair"])
1✔
359

360
        # if the metadata doesn't exist, the follower returns null arrays to strategy
361
        if self.follow_mode and return_null_array:
1✔
362
            logger.info("Returning null array from follower to strategy")
×
363
            self.dd.return_null_values_to_strategy(dataframe, dk)
×
364
            return dk
×
365

366
        # append the historic data once per round
367
        if self.dd.historic_data:
1✔
368
            self.dd.update_historic_data(strategy, dk)
1✔
369
            logger.debug(f'Updating historic data on pair {metadata["pair"]}')
1✔
370
            self.track_current_candle()
1✔
371

372
        if not self.follow_mode:
1✔
373

374
            (_, new_trained_timerange, data_load_timerange) = dk.check_if_new_training_required(
×
375
                trained_timestamp
376
            )
377
            dk.set_paths(metadata["pair"], new_trained_timerange.stopts)
×
378

379
            # load candle history into memory if it is not yet.
380
            if not self.dd.historic_data:
×
381
                self.dd.load_all_pair_histories(data_load_timerange, dk)
×
382

383
            if not self.scanning:
×
384
                self.scanning = True
×
385
                self.start_scanning(strategy)
×
386

387
        elif self.follow_mode:
1✔
388
            dk.set_paths(metadata["pair"], trained_timestamp)
1✔
389
            logger.info(
1✔
390
                "FreqAI instance set to follow_mode, finding existing pair "
391
                f"using { self.identifier }"
392
            )
393

394
        # load the model and associated data into the data kitchen
395
        self.model = self.dd.load_data(metadata["pair"], dk)
1✔
396

397
        dataframe = dk.use_strategy_to_populate_indicators(
1✔
398
            strategy, prediction_dataframe=dataframe, pair=metadata["pair"],
399
            do_corr_pairs=self.get_corr_dataframes
400
        )
401

402
        if not self.model:
1✔
403
            logger.warning(
×
404
                f"No model ready for {metadata['pair']}, returning null values to strategy."
405
            )
406
            self.dd.return_null_values_to_strategy(dataframe, dk)
×
407
            return dk
×
408

409
        if self.corr_pairlist:
1✔
410
            dataframe = self.cache_corr_pairlist_dfs(dataframe, dk)
1✔
411

412
        dk.find_labels(dataframe)
1✔
413

414
        self.build_strategy_return_arrays(dataframe, dk, metadata["pair"], trained_timestamp)
1✔
415

416
        return dk
1✔
417

418
    def build_strategy_return_arrays(
1✔
419
        self, dataframe: DataFrame, dk: FreqaiDataKitchen, pair: str, trained_timestamp: int
420
    ) -> None:
421

422
        # hold the historical predictions in memory so we are sending back
423
        # correct array to strategy
424

425
        if pair not in self.dd.model_return_values:
1✔
426
            # first predictions are made on entire historical candle set coming from strategy. This
427
            # allows FreqUI to show full return values.
428
            pred_df, do_preds = self.predict(dataframe, dk)
1✔
429
            if pair not in self.dd.historic_predictions:
1✔
430
                self.set_initial_historic_predictions(pred_df, dk, pair, dataframe)
1✔
431
            self.dd.set_initial_return_values(pair, pred_df)
1✔
432

433
            dk.return_dataframe = self.dd.attach_return_values_to_return_dataframe(pair, dataframe)
1✔
434
            return
1✔
435
        elif self.dk.check_if_model_expired(trained_timestamp):
×
436
            pred_df = DataFrame(np.zeros((2, len(dk.label_list))), columns=dk.label_list)
×
437
            do_preds = np.ones(2, dtype=np.int_) * 2
×
438
            dk.DI_values = np.zeros(2)
×
439
            logger.warning(
×
440
                f"Model expired for {pair}, returning null values to strategy. Strategy "
441
                "construction should take care to consider this event with "
442
                "prediction == 0 and do_predict == 2"
443
            )
444
        else:
445
            # remaining predictions are made only on the most recent candles for performance and
446
            # historical accuracy reasons.
447
            pred_df, do_preds = self.predict(dataframe.iloc[-self.CONV_WIDTH:], dk, first=False)
×
448

449
        if self.freqai_info.get('fit_live_predictions_candles', 0) and self.live:
×
450
            self.fit_live_predictions(dk, pair)
×
451
        self.dd.append_model_predictions(pair, pred_df, do_preds, dk, dataframe)
×
452
        dk.return_dataframe = self.dd.attach_return_values_to_return_dataframe(pair, dataframe)
×
453

454
        return
×
455

456
    def check_if_feature_list_matches_strategy(
1✔
457
        self, dk: FreqaiDataKitchen
458
    ) -> None:
459
        """
460
        Ensure user is passing the proper feature set if they are reusing an `identifier` pointing
461
        to a folder holding existing models.
462
        :param dataframe: DataFrame = strategy provided dataframe
463
        :param dk: FreqaiDataKitchen = non-persistent data container/analyzer for
464
                   current coin/bot loop
465
        """
466

467
        if "training_features_list_raw" in dk.data:
1✔
468
            feature_list = dk.data["training_features_list_raw"]
×
469
        else:
470
            feature_list = dk.data['training_features_list']
1✔
471

472
        if dk.training_features_list != feature_list:
1✔
473
            raise OperationalException(
×
474
                "Trying to access pretrained model with `identifier` "
475
                "but found different features furnished by current strategy."
476
                "Change `identifier` to train from scratch, or ensure the"
477
                "strategy is furnishing the same features as the pretrained"
478
                "model. In case of --strategy-list, please be aware that FreqAI "
479
                "requires all strategies to maintain identical "
480
                "populate_any_indicator() functions"
481
            )
482

483
    def data_cleaning_train(self, dk: FreqaiDataKitchen) -> None:
1✔
484
        """
485
        Base data cleaning method for train.
486
        Functions here improve/modify the input data by identifying outliers,
487
        computing additional metrics, adding noise, reducing dimensionality etc.
488
        """
489

490
        ft_params = self.freqai_info["feature_parameters"]
1✔
491

492
        if ft_params.get('inlier_metric_window', 0):
1✔
493
            dk.compute_inlier_metric(set_='train')
×
494
            if self.freqai_info["data_split_parameters"]["test_size"] > 0:
×
495
                dk.compute_inlier_metric(set_='test')
×
496

497
        if ft_params.get(
1✔
498
            "principal_component_analysis", False
499
        ):
500
            dk.principal_component_analysis()
1✔
501

502
        if ft_params.get("use_SVM_to_remove_outliers", False):
1✔
503
            dk.use_SVM_to_remove_outliers(predict=False)
1✔
504

505
        if ft_params.get("DI_threshold", 0):
1✔
506
            dk.data["avg_mean_dist"] = dk.compute_distances()
1✔
507

508
        if ft_params.get("use_DBSCAN_to_remove_outliers", False):
1✔
509
            if dk.pair in self.dd.old_DBSCAN_eps:
1✔
510
                eps = self.dd.old_DBSCAN_eps[dk.pair]
×
511
            else:
512
                eps = None
1✔
513
            dk.use_DBSCAN_to_remove_outliers(predict=False, eps=eps)
1✔
514
            self.dd.old_DBSCAN_eps[dk.pair] = dk.data['DBSCAN_eps']
1✔
515

516
        if self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0):
1✔
517
            dk.add_noise_to_training_features()
×
518

519
    def data_cleaning_predict(self, dk: FreqaiDataKitchen) -> None:
1✔
520
        """
521
        Base data cleaning method for predict.
522
        Functions here are complementary to the functions of data_cleaning_train.
523
        """
524
        ft_params = self.freqai_info["feature_parameters"]
1✔
525

526
        # ensure user is feeding the correct indicators to the model
527
        self.check_if_feature_list_matches_strategy(dk)
1✔
528

529
        if ft_params.get('inlier_metric_window', 0):
1✔
530
            dk.compute_inlier_metric(set_='predict')
×
531

532
        if ft_params.get(
1✔
533
            "principal_component_analysis", False
534
        ):
535
            dk.pca_transform(dk.data_dictionary['prediction_features'])
×
536

537
        if ft_params.get("use_SVM_to_remove_outliers", False):
1✔
538
            dk.use_SVM_to_remove_outliers(predict=True)
1✔
539

540
        if ft_params.get("DI_threshold", 0):
1✔
541
            dk.check_if_pred_in_training_spaces()
1✔
542

543
        if ft_params.get("use_DBSCAN_to_remove_outliers", False):
1✔
544
            dk.use_DBSCAN_to_remove_outliers(predict=True)
×
545

546
    def model_exists(self, dk: FreqaiDataKitchen) -> bool:
1✔
547
        """
548
        Given a pair and path, check if a model already exists
549
        :param pair: pair e.g. BTC/USD
550
        :param path: path to model
551
        :return:
552
        :boolean: whether the model file exists or not.
553
        """
554
        path_to_modelfile = Path(dk.data_path / f"{dk.model_filename}_model.joblib")
1✔
555
        file_exists = path_to_modelfile.is_file()
1✔
556
        if file_exists:
1✔
557
            logger.info("Found model at %s", dk.data_path / dk.model_filename)
×
558
        else:
559
            logger.info("Could not find model at %s", dk.data_path / dk.model_filename)
1✔
560
        return file_exists
1✔
561

562
    def set_full_path(self) -> None:
1✔
563
        """
564
        Creates and sets the full path for the identifier
565
        """
566
        self.full_path = Path(
1✔
567
            self.config["user_data_dir"] / "models" / f"{self.identifier}"
568
        )
569
        self.full_path.mkdir(parents=True, exist_ok=True)
1✔
570

571
    def extract_data_and_train_model(
1✔
572
        self,
573
        new_trained_timerange: TimeRange,
574
        pair: str,
575
        strategy: IStrategy,
576
        dk: FreqaiDataKitchen,
577
        data_load_timerange: TimeRange,
578
    ):
579
        """
580
        Retrieve data and train model.
581
        :param new_trained_timerange: TimeRange = the timerange to train the model on
582
        :param metadata: dict = strategy provided metadata
583
        :param strategy: IStrategy = user defined strategy object
584
        :param dk: FreqaiDataKitchen = non-persistent data container for current coin/loop
585
        :param data_load_timerange: TimeRange = the amount of data to be loaded
586
                                    for populate_any_indicators
587
                                    (larger than new_trained_timerange so that
588
                                    new_trained_timerange does not contain any NaNs)
589
        """
590

591
        corr_dataframes, base_dataframes = self.dd.get_base_and_corr_dataframes(
1✔
592
            data_load_timerange, pair, dk
593
        )
594

595
        unfiltered_dataframe = dk.use_strategy_to_populate_indicators(
1✔
596
            strategy, corr_dataframes, base_dataframes, pair
597
        )
598

599
        unfiltered_dataframe = dk.slice_dataframe(new_trained_timerange, unfiltered_dataframe)
1✔
600

601
        # find the features indicated by strategy and store in datakitchen
602
        dk.find_features(unfiltered_dataframe)
1✔
603
        dk.find_labels(unfiltered_dataframe)
1✔
604

605
        model = self.train(unfiltered_dataframe, pair, dk)
1✔
606

607
        self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
1✔
608
        dk.set_new_model_names(pair, new_trained_timerange.stopts)
1✔
609
        self.dd.save_data(model, pair, dk)
1✔
610

611
        if self.plot_features:
1✔
612
            plot_feature_importance(model, pair, dk, self.plot_features)
×
613

614
        if self.freqai_info.get("purge_old_models", False):
1✔
615
            self.dd.purge_old_models()
1✔
616

617
    def set_initial_historic_predictions(
1✔
618
        self, pred_df: DataFrame, dk: FreqaiDataKitchen, pair: str, strat_df: DataFrame
619
    ) -> None:
620
        """
621
        This function is called only if the datadrawer failed to load an
622
        existing set of historic predictions. In this case, it builds
623
        the structure and sets fake predictions off the first training
624
        data. After that, FreqAI will append new real predictions to the
625
        set of historic predictions.
626

627
        These values are used to generate live statistics which can be used
628
        in the strategy for adaptive values. E.g. &*_mean/std are quantities
629
        that can computed based on live predictions from the set of historical
630
        predictions. Those values can be used in the user strategy to better
631
        assess prediction rarity, and thus wait for probabilistically favorable
632
        entries relative to the live historical predictions.
633

634
        If the user reuses an identifier on a subsequent instance,
635
        this function will not be called. In that case, "real" predictions
636
        will be appended to the loaded set of historic predictions.
637
        :param df: DataFrame = the dataframe containing the training feature data
638
        :param model: Any = A model which was `fit` using a common library such as
639
                      catboost or lightgbm
640
        :param dk: FreqaiDataKitchen = object containing methods for data analysis
641
        :param pair: str = current pair
642
        """
643

644
        self.dd.historic_predictions[pair] = pred_df
1✔
645
        hist_preds_df = self.dd.historic_predictions[pair]
1✔
646

647
        self.set_start_dry_live_date(strat_df)
1✔
648

649
        for label in hist_preds_df.columns:
1✔
650
            if hist_preds_df[label].dtype == object:
1✔
651
                continue
×
652
            hist_preds_df[f'{label}_mean'] = 0
1✔
653
            hist_preds_df[f'{label}_std'] = 0
1✔
654

655
        hist_preds_df['do_predict'] = 0
1✔
656

657
        if self.freqai_info['feature_parameters'].get('DI_threshold', 0) > 0:
1✔
658
            hist_preds_df['DI_values'] = 0
1✔
659

660
        for return_str in dk.data['extra_returns_per_train']:
1✔
661
            hist_preds_df[return_str] = dk.data['extra_returns_per_train'][return_str]
×
662

663
        hist_preds_df['close_price'] = strat_df['close']
1✔
664
        hist_preds_df['date_pred'] = strat_df['date']
1✔
665

666
        # # for keras type models, the conv_window needs to be prepended so
667
        # # viewing is correct in frequi
668
        if self.freqai_info.get('keras', False) or self.ft_params.get('inlier_metric_window', 0):
1✔
669
            n_lost_points = self.freqai_info.get('conv_width', 2)
×
670
            zeros_df = DataFrame(np.zeros((n_lost_points, len(hist_preds_df.columns))),
×
671
                                 columns=hist_preds_df.columns)
672
            self.dd.historic_predictions[pair] = pd.concat(
×
673
                [zeros_df, hist_preds_df], axis=0, ignore_index=True)
674

675
    def fit_live_predictions(self, dk: FreqaiDataKitchen, pair: str) -> None:
1✔
676
        """
677
        Fit the labels with a gaussian distribution
678
        """
679
        import scipy as spy
1✔
680

681
        # add classes from classifier label types if used
682
        full_labels = dk.label_list + dk.unique_class_list
1✔
683

684
        num_candles = self.freqai_info.get("fit_live_predictions_candles", 100)
1✔
685
        dk.data["labels_mean"], dk.data["labels_std"] = {}, {}
1✔
686
        for label in full_labels:
1✔
687
            if self.dd.historic_predictions[dk.pair][label].dtype == object:
1✔
688
                continue
×
689
            f = spy.stats.norm.fit(
1✔
690
                self.dd.historic_predictions[dk.pair][label].tail(num_candles))
691
            dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1]
1✔
692

693
        return
1✔
694

695
    def inference_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''):
1✔
696
        """
697
        Timer designed to track the cumulative time spent in FreqAI for one pass through
698
        the whitelist. This will check if the time spent is more than 1/4 the time
699
        of a single candle, and if so, it will warn the user of degraded performance
700
        """
701
        if do == 'start':
×
702
            self.pair_it += 1
×
703
            self.begin_time = time.time()
×
704
        elif do == 'stop':
×
705
            end = time.time()
×
706
            time_spent = (end - self.begin_time)
×
707
            if self.freqai_info.get('write_metrics_to_disk', False):
×
708
                self.dd.update_metric_tracker('inference_time', time_spent, pair)
×
709
            self.inference_time += time_spent
×
710
            if self.pair_it == self.total_pairs:
×
711
                logger.info(
×
712
                    f'Total time spent inferencing pairlist {self.inference_time:.2f} seconds')
713
                if self.inference_time > 0.25 * self.base_tf_seconds:
×
714
                    logger.warning("Inference took over 25% of the candle time. Reduce pairlist to"
×
715
                                   " avoid blinding open trades and degrading performance.")
716
                self.pair_it = 0
×
717
                self.inference_time = 0
×
718
        return
×
719

720
    def train_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''):
1✔
721
        """
722
        Timer designed to track the cumulative time spent training the full pairlist in
723
        FreqAI.
724
        """
725
        if do == 'start':
1✔
726
            self.pair_it_train += 1
1✔
727
            self.begin_time_train = time.time()
1✔
728
        elif do == 'stop':
1✔
729
            end = time.time()
1✔
730
            time_spent = (end - self.begin_time_train)
1✔
731
            if self.freqai_info.get('write_metrics_to_disk', False):
1✔
732
                self.dd.collect_metrics(time_spent, pair)
×
733

734
            self.train_time += time_spent
1✔
735
            if self.pair_it_train == self.total_pairs:
1✔
736
                logger.info(
×
737
                    f'Total time spent training pairlist {self.train_time:.2f} seconds')
738
                self.pair_it_train = 0
×
739
                self.train_time = 0
×
740
        return
1✔
741

742
    def get_init_model(self, pair: str) -> Any:
1✔
743
        if pair not in self.dd.model_dictionary or not self.continual_learning:
1✔
744
            init_model = None
1✔
745
        else:
746
            init_model = self.dd.model_dictionary[pair]
×
747

748
        return init_model
1✔
749

750
    def _set_train_queue(self):
1✔
751
        """
752
        Sets train queue from existing train timestamps if they exist
753
        otherwise it sets the train queue based on the provided whitelist.
754
        """
755
        current_pairlist = self.config.get("exchange", {}).get("pair_whitelist")
1✔
756
        if not self.dd.pair_dict:
1✔
757
            logger.info('Set fresh train queue from whitelist. '
1✔
758
                        f'Queue: {current_pairlist}')
759
            return deque(current_pairlist)
1✔
760

761
        best_queue = deque()
1✔
762

763
        pair_dict_sorted = sorted(self.dd.pair_dict.items(),
1✔
764
                                  key=lambda k: k[1]['trained_timestamp'])
765
        for pair in pair_dict_sorted:
1✔
766
            if pair[0] in current_pairlist:
1✔
767
                best_queue.append(pair[0])
1✔
768
        for pair in current_pairlist:
1✔
769
            if pair not in best_queue:
1✔
770
                best_queue.appendleft(pair)
1✔
771

772
        logger.info('Set existing queue from trained timestamps. '
1✔
773
                    f'Best approximation queue: {best_queue}')
774
        return best_queue
1✔
775

776
    def cache_corr_pairlist_dfs(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
1✔
777
        """
778
        Cache the corr_pairlist dfs to speed up performance for subsequent pairs during the
779
        current candle.
780
        :param dataframe: strategy fed dataframe
781
        :param dk: datakitchen object for current asset
782
        :return: dataframe to attach/extract cached corr_pair dfs to/from.
783
        """
784

785
        if self.get_corr_dataframes:
1✔
786
            self.corr_dataframes = dk.extract_corr_pair_columns_from_populated_indicators(dataframe)
1✔
787
            if not self.corr_dataframes:
1✔
788
                logger.warning("Couldn't cache corr_pair dataframes for improved performance. "
1✔
789
                               "Consider ensuring that the full coin/stake, e.g. XYZ/USD, "
790
                               "is included in the column names when you are creating features "
791
                               "in `populate_any_indicators()`.")
792
            self.get_corr_dataframes = not bool(self.corr_dataframes)
1✔
793
        elif self.corr_dataframes:
×
794
            dataframe = dk.attach_corr_pair_columns(
×
795
                dataframe, self.corr_dataframes, dk.pair)
796

797
        return dataframe
1✔
798

799
    def track_current_candle(self):
1✔
800
        """
801
        Checks if the latest candle appended by the datadrawer is
802
        equivalent to the latest candle seen by FreqAI. If not, it
803
        asks to refresh the cached corr_dfs, and resets the pair
804
        counter.
805
        """
806
        if self.dd.current_candle > self.current_candle:
1✔
807
            self.get_corr_dataframes = True
1✔
808
            self.pair_it = 1
1✔
809
            self.current_candle = self.dd.current_candle
1✔
810

811
    def ensure_data_exists(self, len_dataframe_backtest: int,
1✔
812
                           tr_backtest: TimeRange, pair: str) -> bool:
813
        """
814
        Check if the dataframe is empty, if not, report useful information to user.
815
        :param len_dataframe_backtest: the len of backtesting dataframe
816
        :param tr_backtest: current backtesting timerange.
817
        :param pair: current pair
818
        :return: if the data exists or not
819
        """
820
        if self.config.get("freqai_backtest_live_models", False) and len_dataframe_backtest == 0:
1✔
821
            logger.info(f"No data found for pair {pair} from "
×
822
                        f"from { tr_backtest.start_fmt} to {tr_backtest.stop_fmt}. "
823
                        "Probably more than one training within the same candle period.")
824
            return False
×
825
        return True
1✔
826

827
    def log_backtesting_progress(self, tr_train: TimeRange, pair: str,
1✔
828
                                 train_it: int, total_trains: int):
829
        """
830
        Log the backtesting progress so user knows how many pairs have been trained and
831
        how many more pairs/trains remain.
832
        :param tr_train: the training timerange
833
        :param train_it: the train iteration for the current pair (the sliding window progress)
834
        :param pair: the current pair
835
        :param total_trains: total trains (total number of slides for the sliding window)
836
        """
837
        if not self.config.get("freqai_backtest_live_models", False):
1✔
838
            logger.info(
1✔
839
                f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs"
840
                f" from {tr_train.start_fmt} "
841
                f"to {tr_train.stop_fmt}, {train_it}/{total_trains} "
842
                "trains"
843
            )
844

845
    def backtesting_fit_live_predictions(self, dk: FreqaiDataKitchen):
1✔
846
        """
847
        Apply fit_live_predictions function in backtesting with a dummy historic_predictions
848
        The loop is required to simulate dry/live operation, as it is not possible to predict
849
        the type of logic implemented by the user.
850
        :param dk: datakitchen object
851
        """
852
        fit_live_predictions_candles = self.freqai_info.get("fit_live_predictions_candles", 0)
1✔
853
        if fit_live_predictions_candles:
1✔
854
            logger.info("Applying fit_live_predictions in backtesting")
1✔
855
            label_columns = [col for col in dk.full_df.columns if (
1✔
856
                col.startswith("&") and
857
                not (col.startswith("&") and col.endswith("_mean")) and
858
                not (col.startswith("&") and col.endswith("_std")) and
859
                col not in self.dk.data["extra_returns_per_train"])
860
            ]
861

862
            for index in range(len(dk.full_df)):
1✔
863
                if index >= fit_live_predictions_candles:
1✔
864
                    self.dd.historic_predictions[self.dk.pair] = (
1✔
865
                        dk.full_df.iloc[index - fit_live_predictions_candles:index])
866
                    self.fit_live_predictions(self.dk, self.dk.pair)
1✔
867
                    for label in label_columns:
1✔
868
                        if dk.full_df[label].dtype == object:
1✔
869
                            continue
×
870
                        if "labels_mean" in self.dk.data:
1✔
871
                            dk.full_df.at[index, f"{label}_mean"] = (
1✔
872
                                self.dk.data["labels_mean"][label])
873
                        if "labels_std" in self.dk.data:
1✔
874
                            dk.full_df.at[index, f"{label}_std"] = self.dk.data["labels_std"][label]
1✔
875

876
                    for extra_col in self.dk.data["extra_returns_per_train"]:
1✔
877
                        dk.full_df.at[index, f"{extra_col}"] = (
×
878
                            self.dk.data["extra_returns_per_train"][extra_col])
879

880
        return
1✔
881

882
    def update_metadata(self, metadata: Dict[str, Any]):
1✔
883
        """
884
        Update global metadata and save the updated json file
885
        :param metadata: new global metadata dict
886
        """
887
        self.dd.save_global_metadata_to_disk(metadata)
1✔
888
        self.metadata = metadata
1✔
889

890
    def set_start_dry_live_date(self, live_dataframe: DataFrame):
1✔
891
        key_name = "start_dry_live_date"
1✔
892
        if key_name not in self.metadata:
1✔
893
            metadata = self.metadata
1✔
894
            metadata[key_name] = int(
1✔
895
                pd.to_datetime(live_dataframe.tail(1)["date"].values[0]).timestamp())
896
            self.update_metadata(metadata)
1✔
897

898
    def start_backtesting_from_historic_predictions(
1✔
899
        self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
900
    ) -> FreqaiDataKitchen:
901
        """
902
        :param dataframe: DataFrame = strategy passed dataframe
903
        :param metadata: Dict = pair metadata
904
        :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
905
        :return:
906
            FreqaiDataKitchen = Data management/analysis tool associated to present pair only
907
        """
908
        pair = metadata["pair"]
×
909
        dk.return_dataframe = dataframe
×
910
        saved_dataframe = self.dd.historic_predictions[pair]
×
911
        columns_to_drop = list(set(saved_dataframe.columns).intersection(
×
912
            dk.return_dataframe.columns))
913
        dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop))
×
914
        dk.return_dataframe = pd.merge(
×
915
            dk.return_dataframe, saved_dataframe, how='left', left_on='date', right_on="date_pred")
916
        # dk.return_dataframe = dk.return_dataframe[saved_dataframe.columns].fillna(0)
917
        return dk
×
918

919
    # Following methods which are overridden by user made prediction models.
920
    # See freqai/prediction_models/CatboostPredictionModel.py for an example.
921

922
    @abstractmethod
1✔
923
    def train(self, unfiltered_df: DataFrame, pair: str,
1✔
924
              dk: FreqaiDataKitchen, **kwargs) -> Any:
925
        """
926
        Filter the training data and train a model to it. Train makes heavy use of the datahandler
927
        for storing, saving, loading, and analyzing the data.
928
        :param unfiltered_df: Full dataframe for the current training period
929
        :param metadata: pair metadata from strategy.
930
        :return: Trained model which can be used to inference (self.predict)
931
        """
932

933
    @abstractmethod
1✔
934
    def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen, **kwargs) -> Any:
1✔
935
        """
936
        Most regressors use the same function names and arguments e.g. user
937
        can drop in LGBMRegressor in place of CatBoostRegressor and all data
938
        management will be properly handled by Freqai.
939
        :param data_dictionary: Dict = the dictionary constructed by DataHandler to hold
940
                                all the training and test data/labels.
941
        """
942

943
        return
×
944

945
    @abstractmethod
1✔
946
    def predict(
1✔
947
        self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
948
    ) -> Tuple[DataFrame, NDArray[np.int_]]:
949
        """
950
        Filter the prediction features data and predict with it.
951
        :param unfiltered_df: Full dataframe for the current backtest period.
952
        :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
953
        :param first: boolean = whether this is the first prediction or not.
954
        :return:
955
        :predictions: np.array of predictions
956
        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
957
        data (NaNs) or felt uncertain about data (i.e. SVM and/or DI index)
958
        """
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc