Files
strategy-lab/to_explore/tutorials-QubitQuants/Parameter Optimization.ipynb
David Brazda e3da60c647 daily update
2024-10-21 20:57:56 +02:00

819 KiB
Raw Permalink Blame History

In [1]:
import numpy as np
import pandas as pd
import vectorbtpro as vbt
In [2]:
## Acquire BTCUSDT 1m crypto data from Binance

data = vbt.BinanceData.fetch(
    ["BTCUSDT"], 
    start="2019-01-01 UTC", 
    end="2023-02-02 UTC",
    timeframe="1m"
    )

## Save acquired data locally for persistance
data.to_hdf("/Users/dilip.rajkumar/Documents/vbtpro_tuts_private/data/Binance_BTCUSDT_OHLCV_3Y_m1.h5")
In [3]:
## Load m1 data - BTCUSD
m1_data = vbt.BinanceData.from_hdf('../data/Binance_BTCUSDT_OHLCV_3Y_m1.h5')
print(m1_data.columns)
m1_data.get()
Index(['BTCUSDT'], dtype='object', name='symbol')
Out[3]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
Open High Low Close Volume Quote volume Trade count Taker base volume Taker quote volume
Open time
2019-01-01 00:00:00+00:00 3701.23 3703.72 3701.09 3702.46 17.100110 6.329971e+04 180 5.746515 2.127570e+04
2019-01-01 00:01:00+00:00 3702.44 3702.63 3695.66 3697.04 23.700604 8.768108e+04 148 15.120491 5.593539e+04
2019-01-01 00:02:00+00:00 3699.42 3702.04 3696.08 3698.14 14.488615 5.360224e+04 80 12.700389 4.699097e+04
2019-01-01 00:03:00+00:00 3697.49 3698.19 3695.97 3696.51 8.499966 3.142328e+04 75 4.199726 1.552737e+04
2019-01-01 00:04:00+00:00 3697.20 3697.62 3695.00 3696.32 21.782886 8.051433e+04 93 15.080810 5.574050e+04
... ... ... ... ... ... ... ... ... ...
2023-02-01 23:55:00+00:00 23704.51 23711.98 23693.64 23707.48 123.787420 2.934020e+06 3405 65.091570 1.542877e+06
2023-02-01 23:56:00+00:00 23708.59 23719.69 23704.29 23714.49 157.832940 3.742687e+06 4230 90.425430 2.144293e+06
2023-02-01 23:57:00+00:00 23713.36 23726.83 23712.72 23722.29 170.786230 4.051256e+06 4601 90.996750 2.158566e+06
2023-02-01 23:58:00+00:00 23721.50 23736.12 23712.27 23716.12 254.517100 6.037764e+06 5560 132.888520 3.152625e+06
2023-02-01 23:59:00+00:00 23715.63 23733.10 23710.86 23732.66 137.082290 3.251743e+06 4520 75.053000 1.780439e+06

2145889 rows × 9 columns

Storing resampled price data in mtf_data dictionary

In [4]:
m5_data  = m1_data.resample('5T')   # Convert 1 minute to 5 mins
m15_data = m1_data.resample('15T')  # Convert 1 minute to 15 mins
m30_data = m1_data.resample('30T')  # Convert 1 minute to 30 mins
h1_data  = m1_data.resample("1H")   # Convert 1 minute to 1 hour
h2_data  = m1_data.resample("2H")   # Convert 1 minute to 2 hour
h4_data  = m1_data.resample('4H')   # Convert 1 minute to 4 hour
h12_data = m1_data.resample('12H')  # Convert 1 minute to 12 hour
d1_data  = m1_data.resample('1D')   # Convert 1 minute to Daily data

mtf_data = { "1T" : m1_data, "5T" : m5_data, "15T" : m15_data, "30T" : m30_data,
             "1H" : h1_data, "2H" : h2_data, "4H" : h4_data, "12H" : h12_data, "1D" : d1_data }

freq_dict = { "1T" : 1, "5T" : 5, "15T" : 15, "30T" : 30,
              "1H" : 60, "2H" : 120, "4H" : 240, "8H" : 480, "12H" : 720,
              "1D": 1440 }            

Helper Functions

In [5]:
def remapped_tf(input_value : int) -> str:
    """Map an integer to a string timeframe format"""
    tf_freq = {1 : "1T", 5 : "5T", 15 : "15T", 30 : "30T", 60 :"1H", 
                  120 : "2H", 240 : "4H", 720 : "12H", 1440 : "1D"}
    new_value = tf_freq.get(input_value)
    return new_value            
In [6]:
def flatten_list(list_2D : list):
    """Flatten a list of list of strings"""
    flat_list = list_2D if len(list_2D) == 0 else [item for sublist in list_2D for item in sublist]
    return flat_list
In [7]:
def create_list_numbers(r1, r2, step):
    """Create a list of numbers between two bounds (r1, r2) and incrementing
       each number using the specified `step` value """
    if type(r1) == float and type(r2) == float:
        return list(np.round(np.arange(r1, r2+step, step), 2))
    return list(np.arange(r1, r2+step, step))

You might remember this create_resamplers function from our first tutorial which we used for upsampling

In [8]:
def create_resamplers(result_dict_keys_list : list, source_indices : list,  
                      source_frequencies :list, target_index : pd.Series, target_freq : str):
    """
    Creates a dictionary of vbtpro resampler objects.

    Parameters
    ==========
    result_dict_keys_list : list, list of strings, which are keys of the output dictionary
    source_indices        : list, list of pd.time series objects of the higher timeframes
    source_frequencies    : list(str), which are short form representation of time series order. Eg:["1D", "4h"]
    target_index          : pd.Series, target time series for the resampler objects
    target_freq           : str, target time frequency for the resampler objects

    Returns
    ===========
    resamplers_dict       : dict, vbt pro resampler objects
    """
    
    
    resamplers = []
    for si, sf in zip(source_indices, source_frequencies):
        resamplers.append(vbt.Resampler(source_index = si,  target_index = target_index,
                                        source_freq = sf, target_freq = target_freq))
    return dict(zip(result_dict_keys_list, resamplers))
In [9]:
@vbt.parameterized(merge_func = "concat", random_subset = 1000, show_progress=True)  
def optimal_2BB(lower_tf : int = 1, higher_tf: int = 5,
                ltf_rsi_timeperiod : int = 21, 
                bb_price_timeperiod : int = 14, bb_rsi_timeperiod : int = 14,
                bb_price_nbdevup : int = 2, bb_price_nbdevdn: int = 2,
                bb_rsi_nbdevup : int = 2, bb_rsi_nbdevdn : int = 2,
                output_metric : str | list = "total_return",
                index = None
                ):
    
    lower_tf  = remapped_tf(lower_tf)
    higher_tf = remapped_tf(higher_tf)
    # print("New Lower TF:", lower_tf, "New Higher TF:", higher_tf)
    
    if index is None:
        ltf_data = mtf_data[lower_tf]
        htf_data = mtf_data[higher_tf]
    else:
        # print(f"Start Index:{index[0]} || End Index: {index[-1]}")
        ltf_data = mtf_data[lower_tf].loc[index[0]:index[-1]]
        htf_data = mtf_data[higher_tf].loc[index[0]:index[-1]]

    ### Get OHLC prices for lower and higher timeframes
    ltf_open, ltf_high, ltf_low, ltf_close = ltf_data.get('Open'), ltf_data.get('High'), ltf_data.get('Low'), ltf_data.get('Close')
    htf_open, htf_high, htf_low, htf_close = htf_data.get('Open'), htf_data.get('High'), htf_data.get('Low'), htf_data.get('Close')

    ltf_rsi = vbt.talib("RSI", timeperiod = ltf_rsi_timeperiod).run(ltf_close, skipna=True).real.ffill()
    ltf_bbands_rsi = vbt.talib("BBANDS").run(ltf_rsi, timeperiod = bb_rsi_timeperiod, nbdevup = bb_rsi_nbdevup, nbdevdn = bb_rsi_nbdevdn, skipna=True)    
    htf_bbands_price = vbt.talib("BBANDS").run(htf_close, timeperiod = bb_price_timeperiod, nbdevup = bb_price_nbdevup, nbdevdn = bb_price_nbdevdn, skipna=True)

    ## Initialize  dictionary
    data = {}

    col_values = [ ltf_close, ltf_rsi,ltf_bbands_rsi.upperband, ltf_bbands_rsi.middleband, ltf_bbands_rsi.lowerband ]

    col_keys = [ "ltf_close", "ltf_rsi", "ltf_bbands_rsi_upper",  "ltf_bbands_rsi_middle", "ltf_bbands_rsi_lower" ]

    # Assign key, value pairs for method of time series data to store in data dict
    for key, time_series in zip(col_keys, col_values):
        data[key] = time_series.ffill()

    resampler_dict_keys = [higher_tf + "_" + lower_tf]

    list_resamplers = create_resamplers(result_dict_keys_list = resampler_dict_keys,
                                        source_indices = [htf_close.index], 
                                        source_frequencies = [higher_tf], 
                                        target_index = ltf_close.index,
                                        target_freq = lower_tf)

    # print(list_resamplers)
    
    ## Use along with  Manual indicator creation method for MTF
    series_to_resample = [
        [htf_open, htf_high, htf_low, htf_close, 
        htf_bbands_price.upperband, htf_bbands_price.middleband, htf_bbands_price.lowerband]
        ]


    resample_data_keys = [
        ["htf_open", "htf_high", "htf_low", "htf_close", 
        "htf_bbands_price_upper",  "htf_bbands_price_middle",  "htf_bbands_price_lower"]
            ]    

    df_cols_order = col_keys + flatten_list(resample_data_keys)
    ## Create resampled time series data aligned to base line frequency (15min)
    # print("COLUMNS ORDER:", df_cols_order)
    
    for lst_series, lst_keys, resampler in zip(series_to_resample, resample_data_keys, resampler_dict_keys):
        for key, time_series in zip(lst_keys, lst_series):
            if key.lower().endswith('open'):
                # print(f'Resampling {key} differently using vbt.resample_opening using "{resampler}" resampler')
                resampled_time_series = time_series.vbt.resample_opening(list_resamplers[resampler])
            else:
                resampled_time_series = time_series.vbt.resample_closing(list_resamplers[resampler])
            data[key] = resampled_time_series
    

    ## construct a multi-timeframe dataframe
    mtf_df = pd.DataFrame(data)[df_cols_order]

    # print("DataFrame Output:\n", mtf_df.head())

    ## Long Entry Conditions
    c1_long_entry = (mtf_df['htf_low'] <= mtf_df['htf_bbands_price_lower'])
    c2_long_entry = (mtf_df['ltf_rsi'] <= mtf_df['ltf_bbands_rsi_lower'] )

    ## Long Exit Conditions
    c1_long_exit =  (mtf_df['htf_high'] >= mtf_df['htf_bbands_price_upper'])
    c2_long_exit =  (mtf_df['ltf_rsi']  >= mtf_df['ltf_bbands_rsi_upper'])             

    ## Create entries and exit columns using the above conditions
    mtf_df['entry'] = c1_long_entry & c2_long_entry
    mtf_df['exit']  = c1_long_exit & c2_long_exit

    mtf_df['signal'] = 0   
    mtf_df['signal'] = np.where( mtf_df['entry'], 1, 0)
    mtf_df['signal'] = np.where( mtf_df['exit'] , -1, mtf_df['signal'])

    entries = mtf_df.signal == 1.0
    exits = mtf_df.signal == -1.0

    pf = vbt.Portfolio.from_signals(
        close = ltf_close, 
        entries = entries, 
        exits = exits, 
        direction = "both", ## This setting trades both long and short signals
        freq = pd.Timedelta(minutes = freq_dict[lower_tf]), 
        init_cash = 100000
    )

    if type(output_metric) == str:
        return pf.deep_getattr(output_metric) ## When tuning a single metric
    elif type(output_metric) == list:
        return pd.Series({k: getattr(pf, k) for k in output_metric}) ## When you want to tune a list of metrics

Applying line_profiler to inspect time complexity of optimal_2BB function

In [10]:
# %load_ext line_profiler
## Apply the line_profiler on the unwrapped raw optimal_2BB() function
# %lprun -f optimal_2BB optimal_2BB()
In [11]:
pf_results = optimal_2BB(
    lower_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720], condition = "x <= higher_tf"),
    higher_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720, 1440]),
    ltf_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_price_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_price_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_price_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_rsi_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_rsi_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    output_metric = "total_return"
 )
  0%|          | 0/1000 [00:00<?, ?it/s]
In [12]:
## BTCUSDT - 
print(f"Best Total Returns: {round(pf_results.max(), 2)} %")
print(f"Parameter Combinations with Best Total Returns:{pf_results.idxmax()}")

print(f"Worst Total Returns: {round(pf_results.min(), 2)} %")
print(f"Parameter Combinations with Worst Total Returns:{pf_results.idxmin()}")
Best Total Returns: 8917.26 %
Parameter Combinations with Best Total Returns:(1, 1, 19, 20, 22, 1.75, 2.25, 1.5, 2.0)
Worst Total Returns: -4.41 %
Parameter Combinations with Worst Total Returns:(120, 1440, 18, 19, 22, 2.5, 2.0, 2.25, 2.5)
In [13]:
pf_results.sort_values(ascending=False)
Out[13]:
lower_tf  higher_tf  ltf_rsi_timeperiod  bb_price_timeperiod  bb_rsi_timeperiod  bb_price_nbdevup  bb_price_nbdevdn  bb_rsi_nbdevup  bb_rsi_nbdevdn
1         1          19                  20                   22                 1.75              2.25              1.50            2.00              8917.264415
                     22                  19                   20                 2.50              2.25              1.50            2.00              3166.917315
                     21                  22                   21                 2.25              2.00              2.00            1.75              1877.640187
                     19                  18                   22                 1.50              2.00              2.25            1.50              1685.686751
                     18                  20                   19                 1.75              2.00              2.25            1.50              1618.576841
                                                                                                                                                          ...     
240       720        19                  19                   19                 2.50              2.25              2.25            2.25                -3.272245
                     21                  18                   20                 1.50              2.25              2.00            2.50                -3.326493
120       720        21                  19                   22                 2.00              2.25              2.25            2.00                -3.330802
240       1440       21                  22                   18                 2.50              1.50              2.50            2.50                -3.853700
120       1440       18                  19                   22                 2.50              2.00              2.25            2.50                -4.410079
Length: 1000, dtype: float64
In [36]:
pf_results = optimal_2BB(
    lower_tf = vbt.Param([5, 30], condition = "x <= higher_tf"),
    higher_tf = vbt.Param([1, 5, 15]),
    ltf_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_price_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_price_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_price_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_rsi_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_rsi_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    output_metric = ["total_profit", "total_return", "max_drawdown", "sharpe_ratio"]
 )
  0%|          | 0/1000 [00:00<?, ?it/s]
In [15]:
## Use this block when returning data for more than one metric
pf_results_df = pf_results.unstack(level = -1)
pf_results_df = pf_results_df[['total_return','max_drawdown','sharpe_ratio']].sort_values(by=['total_return', 'max_drawdown'], ascending=False)
pf_results_df.reset_index(inplace=True)
pf_results_df
Out[15]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
lower_tf higher_tf ltf_rsi_timeperiod bb_price_timeperiod bb_rsi_timeperiod bb_price_nbdevup bb_price_nbdevdn bb_rsi_nbdevup bb_rsi_nbdevdn total_return max_drawdown sharpe_ratio
0 5 5 18 18 19 2.25 1.50 1.75 2.00 504.490043 -0.389288 2.328832
1 5 5 18 18 20 1.75 1.75 1.75 1.75 410.278460 -0.437595 2.261473
2 5 5 18 18 18 2.25 1.50 1.50 2.00 371.557575 -0.449186 2.232431
3 5 5 21 18 19 1.50 2.00 2.00 2.00 298.156929 -0.436903 2.162062
4 5 5 22 19 20 1.75 1.75 1.75 2.25 225.497056 -0.468920 2.075027
... ... ... ... ... ... ... ... ... ... ... ... ...
995 5 15 18 20 20 1.75 2.25 2.50 2.50 -0.967112 -0.976728 -0.634604
996 5 15 18 22 20 2.25 2.00 2.00 2.50 -0.968102 -0.989068 -0.627487
997 5 15 19 20 19 1.75 2.25 2.50 2.50 -0.969068 -0.980316 -0.652226
998 5 15 19 19 18 2.50 2.50 2.25 2.50 -0.975576 -0.989047 -0.728846
999 5 15 20 22 20 2.25 2.50 2.25 2.50 -0.977365 -0.991565 -0.704983

1000 rows × 12 columns

Check if x <= higher_tf condition was met

In [16]:
print("Length of DF:",len(pf_results_df[pf_results_df['lower_tf'] > pf_results_df['higher_tf']]))
Length of DF: 0
In [51]:
## Best and Worst Results and Parameter Combinations - (1, 1, 21, 18, 22, 2.25, 1.75, 1.5, 1.75)
print(f"Best Total Returns: {round(pf_results_df['total_return'].max(), 2)} %")
print(f"Parameter Combinations with Best Total Returns:")
pd.DataFrame(pf_results_df.iloc[pf_results_df['total_return'].idxmax()]).T
Best Total Returns: 504.49 %
Parameter Combinations with Best Total Returns:
Out[51]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
lower_tf higher_tf ltf_rsi_timeperiod bb_price_timeperiod bb_rsi_timeperiod bb_price_nbdevup bb_price_nbdevdn bb_rsi_nbdevup bb_rsi_nbdevdn total_return max_drawdown sharpe_ratio
0 5.0 5.0 18.0 18.0 19.0 2.25 1.5 1.75 2.0 504.490043 -0.389288 2.328832
In [52]:
print(f"Worst Total Returns: {round(pf_results_df['total_return'].min(), 2)} %")
print(f"Parameter Combinations with Worst Total Returns:")
pd.DataFrame(pf_results_df.iloc[pf_results_df['total_return'].idxmin()]).T
Worst Total Returns: -0.98 %
Parameter Combinations with Worst Total Returns:
Out[52]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
lower_tf higher_tf ltf_rsi_timeperiod bb_price_timeperiod bb_rsi_timeperiod bb_price_nbdevup bb_price_nbdevdn bb_rsi_nbdevup bb_rsi_nbdevdn total_return max_drawdown sharpe_ratio
999 5.0 15.0 20.0 22.0 20.0 2.25 2.5 2.25 2.5 -0.977365 -0.991565 -0.704983

Cross Validation

Cross validation is an important part of the backtesting pipeline to ensure robustness testing.

In [18]:
## Global Plot Settings
vbt.settings.set_theme("dark")
vbt.settings['plotting']['layout']['width'] = 1600
In [19]:
splitter = vbt.Splitter.from_rolling(
    index = d1_data.index, 
    length = 360, 
    split = 0.5,
    set_labels = ["train", "test"]
    )
In [20]:
splitter.plot().show()
/opt/miniconda3/envs/vbt/lib/python3.10/site-packages/jupyter_client/session.py:718: UserWarning:

Message serialization failed with:
Out of range float values are not JSON compliant
Supporting this message is deprecated in jupyter-client 7, please make sure your message is JSON-compliant

In [21]:
splitter.splits
Out[21]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
set train test
split
0 slice(0, 180, None) slice(180, 360, None)
1 slice(180, 360, None) slice(360, 540, None)
2 slice(360, 540, None) slice(540, 720, None)
3 slice(540, 720, None) slice(720, 900, None)
4 slice(720, 900, None) slice(900, 1080, None)
5 slice(900, 1080, None) slice(1080, 1260, None)
6 slice(1080, 1260, None) slice(1260, 1440, None)
In [22]:
splitter.index
Out[22]:
DatetimeIndex(['2019-01-01 00:00:00+00:00', '2019-01-02 00:00:00+00:00',
               '2019-01-03 00:00:00+00:00', '2019-01-04 00:00:00+00:00',
               '2019-01-05 00:00:00+00:00', '2019-01-06 00:00:00+00:00',
               '2019-01-07 00:00:00+00:00', '2019-01-08 00:00:00+00:00',
               '2019-01-09 00:00:00+00:00', '2019-01-10 00:00:00+00:00',
               ...
               '2023-01-23 00:00:00+00:00', '2023-01-24 00:00:00+00:00',
               '2023-01-25 00:00:00+00:00', '2023-01-26 00:00:00+00:00',
               '2023-01-27 00:00:00+00:00', '2023-01-28 00:00:00+00:00',
               '2023-01-29 00:00:00+00:00', '2023-01-30 00:00:00+00:00',
               '2023-01-31 00:00:00+00:00', '2023-02-01 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='Open time', length=1493, freq='D')

Other splitter.take options

In [23]:
close_slices = splitter.take(d1_data.close, into="reset_stacked_by_set")
close_slices
Out[23]:
set
train    split         0         1        2         3  ...
test     split         0        1         2         3  ...
dtype: object
In [24]:
close_slices = splitter.take(d1_data.close, into="reset_stacked")
close_slices
Out[24]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead tr th { text-align: left; } </style>
split 0 1 2 3 4 5 6
set train test train test train test train test train test train test train test
0 3797.14 10854.10 10854.10 7254.74 7254.74 9296.49 9296.49 22719.71 22719.71 35483.72 35483.72 47632.38 47632.38 22136.41
1 3858.56 10624.93 10624.93 7316.14 7316.14 9249.49 9249.49 23810.79 23810.79 35600.16 35600.16 46131.20 46131.20 22583.72
2 3766.78 10842.85 10842.85 7388.24 7388.24 9162.21 9162.21 23232.76 23232.76 31608.93 31608.93 46834.48 46834.48 20401.31
3 3792.01 11940.00 11940.00 7246.00 7246.00 9012.00 9012.00 23729.20 23729.20 32509.56 32509.56 46681.23 46681.23 20468.81
4 3770.96 11145.67 11145.67 7195.23 7195.23 9116.35 9116.35 24712.47 24712.47 33678.07 33678.07 46914.16 46914.16 18970.79
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
175 11820.86 7501.44 7501.44 9310.23 9310.23 21335.52 21335.52 40516.29 40516.29 49389.99 49389.99 30109.93 30109.93 17088.96
176 13093.80 7317.09 7317.09 9358.95 9358.95 22797.16 22797.16 40144.04 40144.04 50053.90 50053.90 29091.88 29091.88 16836.64
177 11329.99 7255.77 7255.77 9294.69 9294.69 23107.39 23107.39 38349.01 38349.01 46702.75 46702.75 28424.70 28424.70 17224.10
178 12400.63 7204.63 7204.63 9685.69 9685.69 23821.61 23821.61 38092.97 38092.97 48343.28 48343.28 26574.53 26574.53 17128.56
179 11903.13 7202.00 7202.00 9624.89 9624.89 23455.52 23455.52 35819.84 35819.84 48864.98 48864.98 22487.41 22487.41 17127.49

180 rows × 14 columns

In [25]:
close_slices = splitter.take(d1_data.close)
close_slices
Out[25]:
split  set  
0      train    Open time
2019-01-01 00:00:00+00:00     3797.1...
       test     Open time
2019-06-30 00:00:00+00:00    10854.1...
1      train    Open time
2019-06-30 00:00:00+00:00    10854.1...
       test     Open time
2019-12-27 00:00:00+00:00    7254.74...
2      train    Open time
2019-12-27 00:00:00+00:00    7254.74...
       test     Open time
2020-06-24 00:00:00+00:00     9296.4...
3      train    Open time
2020-06-24 00:00:00+00:00     9296.4...
       test     Open time
2020-12-21 00:00:00+00:00    22719.7...
4      train    Open time
2020-12-21 00:00:00+00:00    22719.7...
       test     Open time
2021-06-19 00:00:00+00:00    35483.7...
5      train    Open time
2021-06-19 00:00:00+00:00    35483.7...
       test     Open time
2021-12-16 00:00:00+00:00    47632.3...
6      train    Open time
2021-12-16 00:00:00+00:00    47632.3...
       test     Open time
2022-06-14 00:00:00+00:00    22136.4...
dtype: object
In [26]:
print("Total Nr. of Splits:",len(close_slices.index))
df_splits = pd.DataFrame(close_slices.index.tolist(), columns=["split", "period"])
unique_splits = df_splits["split"].unique().tolist()
print("Unique Splits:", unique_splits)
df_splits
Total Nr. of Splits: 14
Unique Splits: [0, 1, 2, 3, 4, 5, 6]
Out[26]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
split period
0 0 train
1 0 test
2 1 train
3 1 test
4 2 train
5 2 test
6 3 train
7 3 test
8 4 train
9 4 test
10 5 train
11 5 test
12 6 train
13 6 test
In [27]:
def get_total_return(close_prices):
    return close_prices.vbt.to_returns().vbt.returns.total()

base_line_returns = close_slices.apply(get_total_return)
base_line_returns
Out[27]:
split  set  
0      train    2.134762
       test    -0.336472
1      train   -0.336472
       test     0.326704
2      train    0.326704
       test     1.523051
3      train    1.523051
       test     0.576598
4      train    0.576598
       test     0.377110
5      train    0.377110
       test    -0.527897
6      train   -0.527897
       test    -0.226275
dtype: float64
In [28]:
train_slices = [slice(close_slices[i, "train"].index[0], close_slices[i, "train"].index[-1]) for i in unique_splits]
train_slices
Out[28]:
[slice(Timestamp('2019-01-01 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2019-06-29 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2019-06-30 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2019-12-26 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2019-12-27 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2020-06-23 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2020-06-24 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2020-12-20 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2020-12-21 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2021-06-18 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2021-06-19 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2021-12-15 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2021-12-16 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2022-06-13 00:00:00+0000', tz='UTC', freq='D'), None)]
In [29]:
test_slices = [slice(close_slices[i, "test"].index[0], close_slices[i, "test"].index[-1]) for i in unique_splits]
test_slices
Out[29]:
[slice(Timestamp('2019-06-30 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2019-12-26 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2019-12-27 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2020-06-23 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2020-06-24 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2020-12-20 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2020-12-21 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2021-06-18 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2021-06-19 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2021-12-15 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2021-12-16 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2022-06-13 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2022-06-14 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2022-12-10 00:00:00+0000', tz='UTC', freq='D'), None)]

Performance on train splits

In [30]:
train_perf = splitter.apply(
    apply_func = optimal_2BB, ## apply your strategy function to the splitter object, followed by its arguments
    lower_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720], condition = "x <= higher_tf"),
    higher_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720, 1440]),    
    ltf_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_price_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_price_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_price_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_rsi_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_rsi_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    output_metric = "sharpe_ratio",
    #### Arguments of splitter.apply() not related to strategy
    index =  vbt.Takeable(splitter.index), ## DataTime index from the splitter object
    set_ = "train",  ## Specify the set to be used for this CV simulation - train or test
    _random_subset = 500, ## Specify the nr. of simulations to run per train split
    merge_func ="concat", ## concat the results
    execute_kwargs=dict(show_progress=True), ## execute_kwargs control the execution of each split/set/range - Show Progress bar of the simulation
    _execute_kwargs=dict(show_progress=False, clear_cache=50, collect_garbage=50) ## _execute_kwargs control the execution of your parameter combinations
    )

train_perf.sort_values(ascending=False)
  0%|          | 0/7 [00:00<?, ?it/s]
Out[30]:
split  lower_tf  higher_tf  ltf_rsi_timeperiod  bb_price_timeperiod  bb_rsi_timeperiod  bb_price_nbdevup  bb_price_nbdevdn  bb_rsi_nbdevup  bb_rsi_nbdevdn
4      1         1          20                  18                   20                 1.75              1.50              1.75            2.25              8.036717
                            21                  21                   21                 1.75              2.25              1.50            2.00              7.804521
                            18                  19                   21                 2.00              2.25              1.50            2.25              7.350429
                            20                  19                   19                 2.00              2.25              1.50            2.25              7.342806
                            21                  20                   22                 2.00              2.25              1.75            2.50              7.026099
                                                                                                                                                                ...   
3      5         30         21                  22                   20                 2.00              2.50              1.75            2.50             -3.449168
0      1         1440       18                  18                   18                 1.50              2.25              2.00            2.00                   NaN
                            19                  19                   21                 2.50              2.50              1.75            1.75                   NaN
                            20                  19                   19                 2.00              2.25              1.75            2.50                   NaN
                            22                  22                   18                 2.25              2.50              1.75            1.75                   NaN
Length: 3500, dtype: float64

View train split performance statistics

In [31]:
train_split_describe = pd.concat([train_perf[train_perf.index.get_level_values('split') == i].describe()\
                                for i in unique_splits], axis = 1, 
                                keys = [f"Train_Split_{i}" for i in unique_splits])
train_split_describe                                
Out[31]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
Train_Split_0 Train_Split_1 Train_Split_2 Train_Split_3 Train_Split_4 Train_Split_5 Train_Split_6
count 496.000000 500.000000 500.000000 500.000000 500.000000 500.000000 500.000000
mean -0.430325 -0.256053 -0.012914 -0.297292 0.423548 0.284101 0.127918
std 1.213900 1.050923 0.951679 1.606742 1.404564 1.090453 0.989352
min -3.042384 -2.782880 -2.549706 -3.449168 -3.107068 -2.726325 -2.549975
25% -1.259347 -0.963931 -0.611205 -1.370408 -0.403063 -0.484848 -0.565334
50% -0.741358 -0.297287 -0.181718 -0.578251 0.254005 0.086147 0.068521
75% 0.254324 0.434706 0.399538 0.444610 0.905467 0.966222 0.840372
max 4.418397 2.999906 4.448941 6.546613 8.036717 3.734348 3.144399
In [32]:
## Compute baseline, best and worst returns for the overlaid line plots
train_split_best_returns = train_split_describe.loc['max'].reset_index(drop=True)
train_split_worst_returns = train_split_describe.loc['min'].reset_index(drop=True)
train_splits_baseline_returns = pd.Series([base_line_returns[i, "train"] for i in unique_splits])

## Create Box Plot for train_performance statistics
train_split_fig = train_perf.vbt.boxplot(
    by_level="split",
    trace_kwargs=dict(
        line=dict(color="lightskyblue"),
        opacity=0.4,
        showlegend=False
        ),
        xaxis_title="Train Splits",
        yaxis_title="Sharpe Ratio"
        )

train_split_best_returns.vbt.plot(trace_kwargs=dict(name="Best Returns", line=dict(color="limegreen", dash="dash")), fig=train_split_fig)
train_split_worst_returns.vbt.plot(trace_kwargs=dict(name="Worst Returns", line=dict(color="tomato", dash="dash")), fig=train_split_fig)
train_splits_baseline_returns.vbt.plot(trace_kwargs=dict(name="Baseline", line=dict(color="yellow", dash="dash")), fig=train_split_fig)
train_split_fig.show()

Performance on test splits

In [33]:
test_perf = splitter.apply(
    apply_func = optimal_2BB, ## apply your strategy function to the splitter object, followed by its arguments
    lower_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720], condition = "x <= higher_tf"),
    higher_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720, 1440]),    
    ltf_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_price_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_price_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_price_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_rsi_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_rsi_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    output_metric = "sharpe_ratio",
    #### Arguments of splitter.apply() not related to strategy
    index =  vbt.Takeable(splitter.index), ## DataTime index from the splitter object
    _random_subset = 500,    ## Specify the nr. of simulations to run per test split
    set_ = "test",  ## Specify the set to be used for this CV simulation - train or test
    merge_func ="concat", ## concat the results
    execute_kwargs=dict(show_progress=True), ## execute_kwargs control the execution of each split/set/range - Show Progress bar of the simulation
    _execute_kwargs=dict(show_progress=False, clear_cache=50, collect_garbage=50) ## _execute_kwargs control the execution of your parameter combinations
    )
    
test_perf.sort_values(ascending=False)
  0%|          | 0/7 [00:00<?, ?it/s]
Out[33]:
split  lower_tf  higher_tf  ltf_rsi_timeperiod  bb_price_timeperiod  bb_rsi_timeperiod  bb_price_nbdevup  bb_price_nbdevdn  bb_rsi_nbdevup  bb_rsi_nbdevdn
4      1         1          22                  20                   21                 2.25              2.50              1.50            1.50              7.777249
                            18                  22                   20                 2.25              2.00              1.75            1.75              7.665653
                            19                  20                   20                 2.00              2.25              2.00            2.25              7.652166
3      1         1          18                  18                   20                 2.25              2.25              1.75            1.75              7.082363
4      1         1          22                  19                   22                 1.75              2.25              2.00            1.75              7.035542
                                                                                                                                                                ...   
0      1         1440       19                  18                   20                 2.50              2.00              2.00            2.00                   NaN
                                                22                   21                 1.75              2.50              2.00            2.25                   NaN
                            22                  18                   18                 1.75              1.75              1.50            2.00                   NaN
       15        720        20                  21                   22                 1.50              2.25              2.00            2.00                   NaN
3      120       1440       20                  18                   20                 2.50              1.50              2.00            2.25                   NaN
Length: 3500, dtype: float64

View test split performance statistics

In [34]:
test_split_describe = pd.concat([test_perf[test_perf.index.get_level_values('split') == i].describe()\
                                for i in unique_splits], axis = 1, 
                                keys = [f"Test_Split_{i}" for i in unique_splits])
test_split_describe                                
Out[34]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
Test_Split_0 Test_Split_1 Test_Split_2 Test_Split_3 Test_Split_4 Test_Split_5 Test_Split_6
count 496.000000 500.000000 500.000000 499.000000 500.000000 500.000000 500.000000
mean -0.407962 -0.239648 -0.037181 -0.348490 0.414004 0.343005 0.155692
std 1.192991 1.040834 0.977560 1.614325 1.407563 1.207516 1.061513
min -2.745665 -2.802081 -2.625053 -3.548850 -3.107068 -2.663871 -3.188824
25% -1.268158 -0.928053 -0.643853 -1.352547 -0.393175 -0.488930 -0.635615
50% -0.683554 -0.237556 -0.206665 -0.608184 0.267323 0.117486 0.109689
75% 0.170201 0.382643 0.398758 0.251647 0.904010 0.995893 0.870143
max 3.443967 2.653158 3.703606 7.082363 7.777249 4.995166 3.511205
In [35]:
## Compute baseline, best and worst returns for the overlaid line plots
test_split_best_returns = test_split_describe.loc['max'].reset_index(drop=True)
test_split_worst_returns = test_split_describe.loc['min'].reset_index(drop=True)
test_splits_baseline_returns = pd.Series([base_line_returns[i, "test"] for i in unique_splits])

## Create Box Plot for test_performance statistics
test_split_fig = test_perf.vbt.boxplot(
    by_level="split",
    trace_kwargs=dict(
        line=dict(color="lightskyblue"),
        opacity=0.4,
        showlegend=False
        ),
        xaxis_title="Test Splits",
        yaxis_title="Sharpe Ratio"
        )

test_split_best_returns.vbt.plot(trace_kwargs=dict(name="Best Returns", line=dict(color="limegreen", dash="dash")), fig=test_split_fig)
test_split_worst_returns.vbt.plot(trace_kwargs=dict(name="Worst Returns", line=dict(color="tomato", dash="dash")), fig=test_split_fig)
test_splits_baseline_returns.vbt.plot(trace_kwargs=dict(name="Baseline", line=dict(color="yellow", dash="dash")), fig=test_split_fig)
test_split_fig.show()