• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

freqtrade / freqtrade / 9394559170

26 Apr 2024 06:36AM UTC coverage: 94.656% (-0.02%) from 94.674%
9394559170

push

github

xmatthias
Loader should be passed as kwarg for clarity

20280 of 21425 relevant lines covered (94.66%)

0.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.08
/freqtrade/data/converter/converter.py
1
"""
2
Functions to convert data from one format to another
3
"""
4
import logging
1✔
5
from typing import Dict
1✔
6

7
import numpy as np
1✔
8
import pandas as pd
1✔
9
from pandas import DataFrame, to_datetime
1✔
10

11
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, Config
1✔
12
from freqtrade.enums import CandleType, TradingMode
1✔
13

14

15
logger = logging.getLogger(__name__)
1✔
16

17

18
def ohlcv_to_dataframe(ohlcv: list, timeframe: str, pair: str, *,
1✔
19
                       fill_missing: bool = True, drop_incomplete: bool = True) -> DataFrame:
20
    """
21
    Converts a list with candle (OHLCV) data (in format returned by ccxt.fetch_ohlcv)
22
    to a Dataframe
23
    :param ohlcv: list with candle (OHLCV) data, as returned by exchange.async_get_candle_history
24
    :param timeframe: timeframe (e.g. 5m). Used to fill up eventual missing data
25
    :param pair: Pair this data is for (used to warn if fillup was necessary)
26
    :param fill_missing: fill up missing candles with 0 candles
27
                         (see ohlcv_fill_up_missing_data for details)
28
    :param drop_incomplete: Drop the last candle of the dataframe, assuming it's incomplete
29
    :return: DataFrame
30
    """
31
    logger.debug(f"Converting candle (OHLCV) data to dataframe for pair {pair}.")
1✔
32
    cols = DEFAULT_DATAFRAME_COLUMNS
1✔
33
    df = DataFrame(ohlcv, columns=cols)
1✔
34

35
    df['date'] = to_datetime(df['date'], unit='ms', utc=True)
1✔
36

37
    # Some exchanges return int values for Volume and even for OHLC.
38
    # Convert them since TA-LIB indicators used in the strategy assume floats
39
    # and fail with exception...
40
    df = df.astype(dtype={'open': 'float', 'high': 'float', 'low': 'float', 'close': 'float',
1✔
41
                          'volume': 'float'})
42
    return clean_ohlcv_dataframe(df, timeframe, pair,
1✔
43
                                 fill_missing=fill_missing,
44
                                 drop_incomplete=drop_incomplete)
45

46

47
def clean_ohlcv_dataframe(data: DataFrame, timeframe: str, pair: str, *,
1✔
48
                          fill_missing: bool, drop_incomplete: bool) -> DataFrame:
49
    """
50
    Cleanse a OHLCV dataframe by
51
      * Grouping it by date (removes duplicate tics)
52
      * dropping last candles if requested
53
      * Filling up missing data (if requested)
54
    :param data: DataFrame containing candle (OHLCV) data.
55
    :param timeframe: timeframe (e.g. 5m). Used to fill up eventual missing data
56
    :param pair: Pair this data is for (used to warn if fillup was necessary)
57
    :param fill_missing: fill up missing candles with 0 candles
58
                         (see ohlcv_fill_up_missing_data for details)
59
    :param drop_incomplete: Drop the last candle of the dataframe, assuming it's incomplete
60
    :return: DataFrame
61
    """
62
    # group by index and aggregate results to eliminate duplicate ticks
63
    data = data.groupby(by='date', as_index=False, sort=True).agg({
1✔
64
        'open': 'first',
65
        'high': 'max',
66
        'low': 'min',
67
        'close': 'last',
68
        'volume': 'max',
69
    })
70
    # eliminate partial candle
71
    if drop_incomplete:
1✔
72
        data.drop(data.tail(1).index, inplace=True)
1✔
73
        logger.debug('Dropping last candle')
1✔
74

75
    if fill_missing:
1✔
76
        return ohlcv_fill_up_missing_data(data, timeframe, pair)
1✔
77
    else:
78
        return data
1✔
79

80

81
def ohlcv_fill_up_missing_data(dataframe: DataFrame, timeframe: str, pair: str) -> DataFrame:
1✔
82
    """
83
    Fills up missing data with 0 volume rows,
84
    using the previous close as price for "open", "high" "low" and "close", volume is set to 0
85

86
    """
87
    from freqtrade.exchange import timeframe_to_resample_freq
1✔
88

89
    ohlcv_dict = {
1✔
90
        'open': 'first',
91
        'high': 'max',
92
        'low': 'min',
93
        'close': 'last',
94
        'volume': 'sum'
95
    }
96
    resample_interval = timeframe_to_resample_freq(timeframe)
1✔
97
    # Resample to create "NAN" values
98
    df = dataframe.resample(resample_interval, on='date').agg(ohlcv_dict)
1✔
99

100
    # Forwardfill close for missing columns
101
    df['close'] = df['close'].ffill()
1✔
102
    # Use close for "open, high, low"
103
    df.loc[:, ['open', 'high', 'low']] = df[['open', 'high', 'low']].fillna(
1✔
104
        value={'open': df['close'],
105
               'high': df['close'],
106
               'low': df['close'],
107
               })
108
    df.reset_index(inplace=True)
1✔
109
    len_before = len(dataframe)
1✔
110
    len_after = len(df)
1✔
111
    pct_missing = (len_after - len_before) / len_before if len_before > 0 else 0
1✔
112
    if len_before != len_after:
1✔
113
        message = (f"Missing data fillup for {pair}, {timeframe}: "
1✔
114
                   f"before: {len_before} - after: {len_after} - {pct_missing:.2%}")
115
        if pct_missing > 0.01:
1✔
116
            logger.info(message)
1✔
117
        else:
118
            # Don't be verbose if only a small amount is missing
119
            logger.debug(message)
1✔
120
    return df
1✔
121

122

123
def trim_dataframe(df: DataFrame, timerange, *, df_date_col: str = 'date',
1✔
124
                   startup_candles: int = 0) -> DataFrame:
125
    """
126
    Trim dataframe based on given timerange
127
    :param df: Dataframe to trim
128
    :param timerange: timerange (use start and end date if available)
129
    :param df_date_col: Column in the dataframe to use as Date column
130
    :param startup_candles: When not 0, is used instead the timerange start date
131
    :return: trimmed dataframe
132
    """
133
    if startup_candles:
1✔
134
        # Trim candles instead of timeframe in case of given startup_candle count
135
        df = df.iloc[startup_candles:, :]
1✔
136
    else:
137
        if timerange.starttype == 'date':
1✔
138
            df = df.loc[df[df_date_col] >= timerange.startdt, :]
1✔
139
    if timerange.stoptype == 'date':
1✔
140
        df = df.loc[df[df_date_col] <= timerange.stopdt, :]
1✔
141
    return df
1✔
142

143

144
def trim_dataframes(preprocessed: Dict[str, DataFrame], timerange,
1✔
145
                    startup_candles: int) -> Dict[str, DataFrame]:
146
    """
147
    Trim startup period from analyzed dataframes
148
    :param preprocessed: Dict of pair: dataframe
149
    :param timerange: timerange (use start and end date if available)
150
    :param startup_candles: Startup-candles that should be removed
151
    :return: Dict of trimmed dataframes
152
    """
153
    processed: Dict[str, DataFrame] = {}
1✔
154

155
    for pair, df in preprocessed.items():
1✔
156
        trimed_df = trim_dataframe(df, timerange, startup_candles=startup_candles)
1✔
157
        if not trimed_df.empty:
1✔
158
            processed[pair] = trimed_df
1✔
159
        else:
160
            logger.warning(f'{pair} has no data left after adjusting for startup candles, '
×
161
                           f'skipping.')
162
    return processed
1✔
163

164

165
def order_book_to_dataframe(bids: list, asks: list) -> DataFrame:
1✔
166
    """
167
    TODO: This should get a dedicated test
168
    Gets order book list, returns dataframe with below format per suggested by creslin
169
    -------------------------------------------------------------------
170
     b_sum       b_size       bids       asks       a_size       a_sum
171
    -------------------------------------------------------------------
172
    """
173
    cols = ['bids', 'b_size']
1✔
174

175
    bids_frame = DataFrame(bids, columns=cols)
1✔
176
    # add cumulative sum column
177
    bids_frame['b_sum'] = bids_frame['b_size'].cumsum()
1✔
178
    cols2 = ['asks', 'a_size']
1✔
179
    asks_frame = DataFrame(asks, columns=cols2)
1✔
180
    # add cumulative sum column
181
    asks_frame['a_sum'] = asks_frame['a_size'].cumsum()
1✔
182

183
    frame = pd.concat([bids_frame['b_sum'], bids_frame['b_size'], bids_frame['bids'],
1✔
184
                       asks_frame['asks'], asks_frame['a_size'], asks_frame['a_sum']], axis=1,
185
                      keys=['b_sum', 'b_size', 'bids', 'asks', 'a_size', 'a_sum'])
186
    # logger.info('order book %s', frame )
187
    return frame
1✔
188

189

190
def convert_ohlcv_format(
1✔
191
    config: Config,
192
    convert_from: str,
193
    convert_to: str,
194
    erase: bool,
195
):
196
    """
197
    Convert OHLCV from one format to another
198
    :param config: Config dictionary
199
    :param convert_from: Source format
200
    :param convert_to: Target format
201
    :param erase: Erase source data (does not apply if source and target format are identical)
202
    """
203
    from freqtrade.data.history import get_datahandler
1✔
204
    src = get_datahandler(config['datadir'], convert_from)
1✔
205
    trg = get_datahandler(config['datadir'], convert_to)
1✔
206
    timeframes = config.get('timeframes', [config.get('timeframe')])
1✔
207
    logger.info(f"Converting candle (OHLCV) for timeframe {timeframes}")
1✔
208

209
    candle_types = [CandleType.from_string(ct) for ct in config.get('candle_types', [
1✔
210
        c.value for c in CandleType])]
211
    logger.info(candle_types)
1✔
212
    paircombs = src.ohlcv_get_available_data(config['datadir'], TradingMode.SPOT)
1✔
213
    paircombs.extend(src.ohlcv_get_available_data(config['datadir'], TradingMode.FUTURES))
1✔
214

215
    if 'pairs' in config:
1✔
216
        # Filter pairs
217
        paircombs = [comb for comb in paircombs if comb[0] in config['pairs']]
1✔
218

219
    if 'timeframes' in config:
1✔
220
        paircombs = [comb for comb in paircombs if comb[1] in config['timeframes']]
1✔
221
    paircombs = [comb for comb in paircombs if comb[2] in candle_types]
1✔
222

223
    paircombs = sorted(paircombs, key=lambda x: (x[0], x[1], x[2].value))
1✔
224

225
    formatted_paircombs = '\n'.join([f"{pair}, {timeframe}, {candle_type}"
1✔
226
                                    for pair, timeframe, candle_type in paircombs])
227

228
    logger.info(f"Converting candle (OHLCV) data for the following pair combinations:\n"
1✔
229
                f"{formatted_paircombs}")
230
    for pair, timeframe, candle_type in paircombs:
1✔
231
        data = src.ohlcv_load(pair=pair, timeframe=timeframe,
1✔
232
                              timerange=None,
233
                              fill_missing=False,
234
                              drop_incomplete=False,
235
                              startup_candles=0,
236
                              candle_type=candle_type)
237
        logger.info(f"Converting {len(data)} {timeframe} {candle_type} candles for {pair}")
1✔
238
        if len(data) > 0:
1✔
239
            trg.ohlcv_store(
1✔
240
                pair=pair,
241
                timeframe=timeframe,
242
                data=data,
243
                candle_type=candle_type
244
            )
245
            if erase and convert_from != convert_to:
1✔
246
                logger.info(f"Deleting source data for {pair} / {timeframe}")
1✔
247
                src.ohlcv_purge(pair=pair, timeframe=timeframe, candle_type=candle_type)
1✔
248

249

250
def reduce_dataframe_footprint(df: DataFrame) -> DataFrame:
1✔
251
    """
252
    Ensure all values are float32 in the incoming dataframe.
253
    :param df: Dataframe to be converted to float/int 32s
254
    :return: Dataframe converted to float/int 32s
255
    """
256

257
    logger.debug(f"Memory usage of dataframe is "
1✔
258
                 f"{df.memory_usage().sum() / 1024**2:.2f} MB")
259

260
    df_dtypes = df.dtypes
1✔
261
    for column, dtype in df_dtypes.items():
1✔
262
        if column in ['open', 'high', 'low', 'close', 'volume']:
1✔
263
            continue
1✔
264
        if dtype == np.float64:
1✔
265
            df_dtypes[column] = np.float32
1✔
266
        elif dtype == np.int64:
1✔
267
            df_dtypes[column] = np.int32
×
268
    df = df.astype(df_dtypes)
1✔
269

270
    logger.debug(f"Memory usage after optimization is: "
1✔
271
                 f"{df.memory_usage().sum() / 1024**2:.2f} MB")
272

273
    return df
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc