• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

freqtrade / freqtrade / 6181253459

08 Sep 2023 06:04AM UTC coverage: 94.614% (+0.06%) from 94.556%
6181253459

push

github-actions

web-flow
Merge pull request #9159 from stash86/fix-adjust

remove old codes when we only can do partial entries

2 of 2 new or added lines in 1 file covered. (100.0%)

19114 of 20202 relevant lines covered (94.61%)

0.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.99
/freqtrade/data/converter.py
1
"""
2
Functions to convert data from one format to another
3
"""
4
import logging
1✔
5
from typing import Dict, List
1✔
6

7
import numpy as np
1✔
8
import pandas as pd
1✔
9
from pandas import DataFrame, to_datetime
1✔
10

11
from freqtrade.constants import (DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS, TRADES_DTYPES,
1✔
12
                                 Config, TradeList)
13
from freqtrade.enums import CandleType, TradingMode
1✔
14

15

16
logger = logging.getLogger(__name__)
1✔
17

18

19
def ohlcv_to_dataframe(ohlcv: list, timeframe: str, pair: str, *,
1✔
20
                       fill_missing: bool = True, drop_incomplete: bool = True) -> DataFrame:
21
    """
22
    Converts a list with candle (OHLCV) data (in format returned by ccxt.fetch_ohlcv)
23
    to a Dataframe
24
    :param ohlcv: list with candle (OHLCV) data, as returned by exchange.async_get_candle_history
25
    :param timeframe: timeframe (e.g. 5m). Used to fill up eventual missing data
26
    :param pair: Pair this data is for (used to warn if fillup was necessary)
27
    :param fill_missing: fill up missing candles with 0 candles
28
                         (see ohlcv_fill_up_missing_data for details)
29
    :param drop_incomplete: Drop the last candle of the dataframe, assuming it's incomplete
30
    :return: DataFrame
31
    """
32
    logger.debug(f"Converting candle (OHLCV) data to dataframe for pair {pair}.")
1✔
33
    cols = DEFAULT_DATAFRAME_COLUMNS
1✔
34
    df = DataFrame(ohlcv, columns=cols)
1✔
35

36
    df['date'] = to_datetime(df['date'], unit='ms', utc=True)
1✔
37

38
    # Some exchanges return int values for Volume and even for OHLC.
39
    # Convert them since TA-LIB indicators used in the strategy assume floats
40
    # and fail with exception...
41
    df = df.astype(dtype={'open': 'float', 'high': 'float', 'low': 'float', 'close': 'float',
1✔
42
                          'volume': 'float'})
43
    return clean_ohlcv_dataframe(df, timeframe, pair,
1✔
44
                                 fill_missing=fill_missing,
45
                                 drop_incomplete=drop_incomplete)
46

47

48
def clean_ohlcv_dataframe(data: DataFrame, timeframe: str, pair: str, *,
1✔
49
                          fill_missing: bool, drop_incomplete: bool) -> DataFrame:
50
    """
51
    Cleanse a OHLCV dataframe by
52
      * Grouping it by date (removes duplicate tics)
53
      * dropping last candles if requested
54
      * Filling up missing data (if requested)
55
    :param data: DataFrame containing candle (OHLCV) data.
56
    :param timeframe: timeframe (e.g. 5m). Used to fill up eventual missing data
57
    :param pair: Pair this data is for (used to warn if fillup was necessary)
58
    :param fill_missing: fill up missing candles with 0 candles
59
                         (see ohlcv_fill_up_missing_data for details)
60
    :param drop_incomplete: Drop the last candle of the dataframe, assuming it's incomplete
61
    :return: DataFrame
62
    """
63
    # group by index and aggregate results to eliminate duplicate ticks
64
    data = data.groupby(by='date', as_index=False, sort=True).agg({
1✔
65
        'open': 'first',
66
        'high': 'max',
67
        'low': 'min',
68
        'close': 'last',
69
        'volume': 'max',
70
    })
71
    # eliminate partial candle
72
    if drop_incomplete:
1✔
73
        data.drop(data.tail(1).index, inplace=True)
1✔
74
        logger.debug('Dropping last candle')
1✔
75

76
    if fill_missing:
1✔
77
        return ohlcv_fill_up_missing_data(data, timeframe, pair)
1✔
78
    else:
79
        return data
1✔
80

81

82
def ohlcv_fill_up_missing_data(dataframe: DataFrame, timeframe: str, pair: str) -> DataFrame:
1✔
83
    """
84
    Fills up missing data with 0 volume rows,
85
    using the previous close as price for "open", "high" "low" and "close", volume is set to 0
86

87
    """
88
    from freqtrade.exchange import timeframe_to_minutes
1✔
89

90
    ohlcv_dict = {
1✔
91
        'open': 'first',
92
        'high': 'max',
93
        'low': 'min',
94
        'close': 'last',
95
        'volume': 'sum'
96
    }
97
    timeframe_minutes = timeframe_to_minutes(timeframe)
1✔
98
    resample_interval = f'{timeframe_minutes}min'
1✔
99
    if timeframe_minutes >= 43200 and timeframe_minutes < 525600:
1✔
100
        # Monthly candles need special treatment to stick to the 1st of the month
101
        resample_interval = f'{timeframe}S'
1✔
102
    elif timeframe_minutes > 43200:
1✔
103
        resample_interval = timeframe
×
104
    # Resample to create "NAN" values
105
    df = dataframe.resample(resample_interval, on='date').agg(ohlcv_dict)
1✔
106

107
    # Forwardfill close for missing columns
108
    df['close'] = df['close'].ffill()
1✔
109
    # Use close for "open, high, low"
110
    df.loc[:, ['open', 'high', 'low']] = df[['open', 'high', 'low']].fillna(
1✔
111
        value={'open': df['close'],
112
               'high': df['close'],
113
               'low': df['close'],
114
               })
115
    df.reset_index(inplace=True)
1✔
116
    len_before = len(dataframe)
1✔
117
    len_after = len(df)
1✔
118
    pct_missing = (len_after - len_before) / len_before if len_before > 0 else 0
1✔
119
    if len_before != len_after:
1✔
120
        message = (f"Missing data fillup for {pair}: before: {len_before} - after: {len_after}"
1✔
121
                   f" - {pct_missing:.2%}")
122
        if pct_missing > 0.01:
1✔
123
            logger.info(message)
1✔
124
        else:
125
            # Don't be verbose if only a small amount is missing
126
            logger.debug(message)
1✔
127
    return df
1✔
128

129

130
def trim_dataframe(df: DataFrame, timerange, *, df_date_col: str = 'date',
1✔
131
                   startup_candles: int = 0) -> DataFrame:
132
    """
133
    Trim dataframe based on given timerange
134
    :param df: Dataframe to trim
135
    :param timerange: timerange (use start and end date if available)
136
    :param df_date_col: Column in the dataframe to use as Date column
137
    :param startup_candles: When not 0, is used instead the timerange start date
138
    :return: trimmed dataframe
139
    """
140
    if startup_candles:
1✔
141
        # Trim candles instead of timeframe in case of given startup_candle count
142
        df = df.iloc[startup_candles:, :]
1✔
143
    else:
144
        if timerange.starttype == 'date':
1✔
145
            df = df.loc[df[df_date_col] >= timerange.startdt, :]
1✔
146
    if timerange.stoptype == 'date':
1✔
147
        df = df.loc[df[df_date_col] <= timerange.stopdt, :]
1✔
148
    return df
1✔
149

150

151
def trim_dataframes(preprocessed: Dict[str, DataFrame], timerange,
1✔
152
                    startup_candles: int) -> Dict[str, DataFrame]:
153
    """
154
    Trim startup period from analyzed dataframes
155
    :param preprocessed: Dict of pair: dataframe
156
    :param timerange: timerange (use start and end date if available)
157
    :param startup_candles: Startup-candles that should be removed
158
    :return: Dict of trimmed dataframes
159
    """
160
    processed: Dict[str, DataFrame] = {}
1✔
161

162
    for pair, df in preprocessed.items():
1✔
163
        trimed_df = trim_dataframe(df, timerange, startup_candles=startup_candles)
1✔
164
        if not trimed_df.empty:
1✔
165
            processed[pair] = trimed_df
1✔
166
        else:
167
            logger.warning(f'{pair} has no data left after adjusting for startup candles, '
×
168
                           f'skipping.')
169
    return processed
1✔
170

171

172
def order_book_to_dataframe(bids: list, asks: list) -> DataFrame:
1✔
173
    """
174
    TODO: This should get a dedicated test
175
    Gets order book list, returns dataframe with below format per suggested by creslin
176
    -------------------------------------------------------------------
177
     b_sum       b_size       bids       asks       a_size       a_sum
178
    -------------------------------------------------------------------
179
    """
180
    cols = ['bids', 'b_size']
1✔
181

182
    bids_frame = DataFrame(bids, columns=cols)
1✔
183
    # add cumulative sum column
184
    bids_frame['b_sum'] = bids_frame['b_size'].cumsum()
1✔
185
    cols2 = ['asks', 'a_size']
1✔
186
    asks_frame = DataFrame(asks, columns=cols2)
1✔
187
    # add cumulative sum column
188
    asks_frame['a_sum'] = asks_frame['a_size'].cumsum()
1✔
189

190
    frame = pd.concat([bids_frame['b_sum'], bids_frame['b_size'], bids_frame['bids'],
1✔
191
                       asks_frame['asks'], asks_frame['a_size'], asks_frame['a_sum']], axis=1,
192
                      keys=['b_sum', 'b_size', 'bids', 'asks', 'a_size', 'a_sum'])
193
    # logger.info('order book %s', frame )
194
    return frame
1✔
195

196

197
def trades_df_remove_duplicates(trades: pd.DataFrame) -> pd.DataFrame:
1✔
198
    """
199
    Removes duplicates from the trades DataFrame.
200
    Uses pandas.DataFrame.drop_duplicates to remove duplicates based on the 'timestamp' column.
201
    :param trades: DataFrame with the columns constants.DEFAULT_TRADES_COLUMNS
202
    :return: DataFrame with duplicates removed based on the 'timestamp' column
203
    """
204
    return trades.drop_duplicates(subset=['timestamp', 'id'])
1✔
205

206

207
def trades_dict_to_list(trades: List[Dict]) -> TradeList:
1✔
208
    """
209
    Convert fetch_trades result into a List (to be more memory efficient).
210
    :param trades: List of trades, as returned by ccxt.fetch_trades.
211
    :return: List of Lists, with constants.DEFAULT_TRADES_COLUMNS as columns
212
    """
213
    return [[t[col] for col in DEFAULT_TRADES_COLUMNS] for t in trades]
1✔
214

215

216
def trades_convert_types(trades: DataFrame) -> DataFrame:
1✔
217
    """
218
    Convert Trades dtypes and add 'date' column
219
    """
220
    trades = trades.astype(TRADES_DTYPES)
1✔
221
    trades['date'] = to_datetime(trades['timestamp'], unit='ms', utc=True)
1✔
222
    return trades
1✔
223

224

225
def trades_list_to_df(trades: TradeList, convert: bool = True):
1✔
226
    """
227
    convert trades list to dataframe
228
    :param trades: List of Lists with constants.DEFAULT_TRADES_COLUMNS as columns
229
    """
230
    if not trades:
1✔
231
        df = DataFrame(columns=DEFAULT_TRADES_COLUMNS)
1✔
232
    else:
233
        df = DataFrame(trades, columns=DEFAULT_TRADES_COLUMNS)
1✔
234

235
    if convert:
1✔
236
        df = trades_convert_types(df)
1✔
237

238
    return df
1✔
239

240

241
def trades_to_ohlcv(trades: DataFrame, timeframe: str) -> DataFrame:
1✔
242
    """
243
    Converts trades list to OHLCV list
244
    :param trades: List of trades, as returned by ccxt.fetch_trades.
245
    :param timeframe: Timeframe to resample data to
246
    :return: OHLCV Dataframe.
247
    :raises: ValueError if no trades are provided
248
    """
249
    from freqtrade.exchange import timeframe_to_minutes
1✔
250
    timeframe_minutes = timeframe_to_minutes(timeframe)
1✔
251
    if trades.empty:
1✔
252
        raise ValueError('Trade-list empty.')
1✔
253
    df = trades.set_index('date', drop=True)
1✔
254

255
    df_new = df['price'].resample(f'{timeframe_minutes}min').ohlc()
1✔
256
    df_new['volume'] = df['amount'].resample(f'{timeframe_minutes}min').sum()
1✔
257
    df_new['date'] = df_new.index
1✔
258
    # Drop 0 volume rows
259
    df_new = df_new.dropna()
1✔
260
    return df_new.loc[:, DEFAULT_DATAFRAME_COLUMNS]
1✔
261

262

263
def convert_trades_format(config: Config, convert_from: str, convert_to: str, erase: bool):
1✔
264
    """
265
    Convert trades from one format to another format.
266
    :param config: Config dictionary
267
    :param convert_from: Source format
268
    :param convert_to: Target format
269
    :param erase: Erase source data (does not apply if source and target format are identical)
270
    """
271
    from freqtrade.data.history.idatahandler import get_datahandler
1✔
272
    src = get_datahandler(config['datadir'], convert_from)
1✔
273
    trg = get_datahandler(config['datadir'], convert_to)
1✔
274

275
    if 'pairs' not in config:
1✔
276
        config['pairs'] = src.trades_get_pairs(config['datadir'])
1✔
277
    logger.info(f"Converting trades for {config['pairs']}")
1✔
278

279
    for pair in config['pairs']:
1✔
280
        data = src.trades_load(pair=pair)
1✔
281
        logger.info(f"Converting {len(data)} trades for {pair}")
1✔
282
        trg.trades_store(pair, data)
1✔
283
        if erase and convert_from != convert_to:
1✔
284
            logger.info(f"Deleting source Trade data for {pair}.")
1✔
285
            src.trades_purge(pair=pair)
1✔
286

287

288
def convert_ohlcv_format(
1✔
289
    config: Config,
290
    convert_from: str,
291
    convert_to: str,
292
    erase: bool,
293
):
294
    """
295
    Convert OHLCV from one format to another
296
    :param config: Config dictionary
297
    :param convert_from: Source format
298
    :param convert_to: Target format
299
    :param erase: Erase source data (does not apply if source and target format are identical)
300
    """
301
    from freqtrade.data.history.idatahandler import get_datahandler
1✔
302
    src = get_datahandler(config['datadir'], convert_from)
1✔
303
    trg = get_datahandler(config['datadir'], convert_to)
1✔
304
    timeframes = config.get('timeframes', [config.get('timeframe')])
1✔
305
    logger.info(f"Converting candle (OHLCV) for timeframe {timeframes}")
1✔
306

307
    candle_types = [CandleType.from_string(ct) for ct in config.get('candle_types', [
1✔
308
        c.value for c in CandleType])]
309
    logger.info(candle_types)
1✔
310
    paircombs = src.ohlcv_get_available_data(config['datadir'], TradingMode.SPOT)
1✔
311
    paircombs.extend(src.ohlcv_get_available_data(config['datadir'], TradingMode.FUTURES))
1✔
312

313
    if 'pairs' in config:
1✔
314
        # Filter pairs
315
        paircombs = [comb for comb in paircombs if comb[0] in config['pairs']]
1✔
316

317
    if 'timeframes' in config:
1✔
318
        paircombs = [comb for comb in paircombs if comb[1] in config['timeframes']]
1✔
319
    paircombs = [comb for comb in paircombs if comb[2] in candle_types]
1✔
320

321
    paircombs = sorted(paircombs, key=lambda x: (x[0], x[1], x[2].value))
1✔
322

323
    formatted_paircombs = '\n'.join([f"{pair}, {timeframe}, {candle_type}"
1✔
324
                                    for pair, timeframe, candle_type in paircombs])
325

326
    logger.info(f"Converting candle (OHLCV) data for the following pair combinations:\n"
1✔
327
                f"{formatted_paircombs}")
328
    for pair, timeframe, candle_type in paircombs:
1✔
329
        data = src.ohlcv_load(pair=pair, timeframe=timeframe,
1✔
330
                              timerange=None,
331
                              fill_missing=False,
332
                              drop_incomplete=False,
333
                              startup_candles=0,
334
                              candle_type=candle_type)
335
        logger.info(f"Converting {len(data)} {timeframe} {candle_type} candles for {pair}")
1✔
336
        if len(data) > 0:
1✔
337
            trg.ohlcv_store(
1✔
338
                pair=pair,
339
                timeframe=timeframe,
340
                data=data,
341
                candle_type=candle_type
342
            )
343
            if erase and convert_from != convert_to:
1✔
344
                logger.info(f"Deleting source data for {pair} / {timeframe}")
1✔
345
                src.ohlcv_purge(pair=pair, timeframe=timeframe, candle_type=candle_type)
1✔
346

347

348
def reduce_dataframe_footprint(df: DataFrame) -> DataFrame:
1✔
349
    """
350
    Ensure all values are float32 in the incoming dataframe.
351
    :param df: Dataframe to be converted to float/int 32s
352
    :return: Dataframe converted to float/int 32s
353
    """
354

355
    logger.debug(f"Memory usage of dataframe is "
1✔
356
                 f"{df.memory_usage().sum() / 1024**2:.2f} MB")
357

358
    df_dtypes = df.dtypes
1✔
359
    for column, dtype in df_dtypes.items():
1✔
360
        if column in ['open', 'high', 'low', 'close', 'volume']:
1✔
361
            continue
1✔
362
        if dtype == np.float64:
1✔
363
            df_dtypes[column] = np.float32
1✔
364
        elif dtype == np.int64:
1✔
365
            df_dtypes[column] = np.int32
×
366
    df = df.astype(df_dtypes)
1✔
367

368
    logger.debug(f"Memory usage after optimization is: "
1✔
369
                 f"{df.memory_usage().sum() / 1024**2:.2f} MB")
370

371
    return df
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc