strategy-lab/Parameter Optimization.ipynb at master

Files

David Brazda e3da60c647 daily update

2024-10-21 20:57:56 +02:00

819 KiB

Raw Permalink Blame History

In [1]:

import numpy as np
import pandas as pd
import vectorbtpro as vbt

In [2]:

## Acquire BTCUSDT 1m crypto data from Binance

data = vbt.BinanceData.fetch(
    ["BTCUSDT"], 
    start="2019-01-01 UTC", 
    end="2023-02-02 UTC",
    timeframe="1m"
    )

## Save acquired data locally for persistance
data.to_hdf("/Users/dilip.rajkumar/Documents/vbtpro_tuts_private/data/Binance_BTCUSDT_OHLCV_3Y_m1.h5")

In [3]:

## Load m1 data - BTCUSD
m1_data = vbt.BinanceData.from_hdf('../data/Binance_BTCUSDT_OHLCV_3Y_m1.h5')
print(m1_data.columns)
m1_data.get()

Index(['BTCUSDT'], dtype='object', name='symbol')

Out[3]:

	Open	High	Low	Close	Volume	Quote volume	Trade count	Taker base volume	Taker quote volume
Open time
2019-01-01 00:00:00+00:00	3701.23	3703.72	3701.09	3702.46	17.100110	6.329971e+04	180	5.746515	2.127570e+04
2019-01-01 00:01:00+00:00	3702.44	3702.63	3695.66	3697.04	23.700604	8.768108e+04	148	15.120491	5.593539e+04
2019-01-01 00:02:00+00:00	3699.42	3702.04	3696.08	3698.14	14.488615	5.360224e+04	80	12.700389	4.699097e+04
2019-01-01 00:03:00+00:00	3697.49	3698.19	3695.97	3696.51	8.499966	3.142328e+04	75	4.199726	1.552737e+04
2019-01-01 00:04:00+00:00	3697.20	3697.62	3695.00	3696.32	21.782886	8.051433e+04	93	15.080810	5.574050e+04
...	...	...	...	...	...	...	...	...	...
2023-02-01 23:55:00+00:00	23704.51	23711.98	23693.64	23707.48	123.787420	2.934020e+06	3405	65.091570	1.542877e+06
2023-02-01 23:56:00+00:00	23708.59	23719.69	23704.29	23714.49	157.832940	3.742687e+06	4230	90.425430	2.144293e+06
2023-02-01 23:57:00+00:00	23713.36	23726.83	23712.72	23722.29	170.786230	4.051256e+06	4601	90.996750	2.158566e+06
2023-02-01 23:58:00+00:00	23721.50	23736.12	23712.27	23716.12	254.517100	6.037764e+06	5560	132.888520	3.152625e+06
2023-02-01 23:59:00+00:00	23715.63	23733.10	23710.86	23732.66	137.082290	3.251743e+06	4520	75.053000	1.780439e+06

2145889 rows × 9 columns

Storing resampled price data in `mtf_data` dictionary¶

In [4]:

m5_data  = m1_data.resample('5T')   # Convert 1 minute to 5 mins
m15_data = m1_data.resample('15T')  # Convert 1 minute to 15 mins
m30_data = m1_data.resample('30T')  # Convert 1 minute to 30 mins
h1_data  = m1_data.resample("1H")   # Convert 1 minute to 1 hour
h2_data  = m1_data.resample("2H")   # Convert 1 minute to 2 hour
h4_data  = m1_data.resample('4H')   # Convert 1 minute to 4 hour
h12_data = m1_data.resample('12H')  # Convert 1 minute to 12 hour
d1_data  = m1_data.resample('1D')   # Convert 1 minute to Daily data

mtf_data = { "1T" : m1_data, "5T" : m5_data, "15T" : m15_data, "30T" : m30_data,
             "1H" : h1_data, "2H" : h2_data, "4H" : h4_data, "12H" : h12_data, "1D" : d1_data }

freq_dict = { "1T" : 1, "5T" : 5, "15T" : 15, "30T" : 30,
              "1H" : 60, "2H" : 120, "4H" : 240, "8H" : 480, "12H" : 720,
              "1D": 1440 }

Helper Functions¶

In [5]:

def remapped_tf(input_value : int) -> str:
    """Map an integer to a string timeframe format"""
    tf_freq = {1 : "1T", 5 : "5T", 15 : "15T", 30 : "30T", 60 :"1H", 
                  120 : "2H", 240 : "4H", 720 : "12H", 1440 : "1D"}
    new_value = tf_freq.get(input_value)
    return new_value

In [6]:

def flatten_list(list_2D : list):
    """Flatten a list of list of strings"""
    flat_list = list_2D if len(list_2D) == 0 else [item for sublist in list_2D for item in sublist]
    return flat_list

In [7]:

def create_list_numbers(r1, r2, step):
    """Create a list of numbers between two bounds (r1, r2) and incrementing
       each number using the specified `step` value """
    if type(r1) == float and type(r2) == float:
        return list(np.round(np.arange(r1, r2+step, step), 2))
    return list(np.arange(r1, r2+step, step))

You might remember this create_resamplers function from our first tutorial which we used for upsampling

In [8]:

def create_resamplers(result_dict_keys_list : list, source_indices : list,  
                      source_frequencies :list, target_index : pd.Series, target_freq : str):
    """
    Creates a dictionary of vbtpro resampler objects.

    Parameters
    ==========
    result_dict_keys_list : list, list of strings, which are keys of the output dictionary
    source_indices        : list, list of pd.time series objects of the higher timeframes
    source_frequencies    : list(str), which are short form representation of time series order. Eg:["1D", "4h"]
    target_index          : pd.Series, target time series for the resampler objects
    target_freq           : str, target time frequency for the resampler objects

    Returns
    ===========
    resamplers_dict       : dict, vbt pro resampler objects
    """
    
    
    resamplers = []
    for si, sf in zip(source_indices, source_frequencies):
        resamplers.append(vbt.Resampler(source_index = si,  target_index = target_index,
                                        source_freq = sf, target_freq = target_freq))
    return dict(zip(result_dict_keys_list, resamplers))

In [9]:

@vbt.parameterized(merge_func = "concat", random_subset = 1000, show_progress=True)  
def optimal_2BB(lower_tf : int = 1, higher_tf: int = 5,
                ltf_rsi_timeperiod : int = 21, 
                bb_price_timeperiod : int = 14, bb_rsi_timeperiod : int = 14,
                bb_price_nbdevup : int = 2, bb_price_nbdevdn: int = 2,
                bb_rsi_nbdevup : int = 2, bb_rsi_nbdevdn : int = 2,
                output_metric : str | list = "total_return",
                index = None
                ):
    
    lower_tf  = remapped_tf(lower_tf)
    higher_tf = remapped_tf(higher_tf)
    # print("New Lower TF:", lower_tf, "New Higher TF:", higher_tf)
    
    if index is None:
        ltf_data = mtf_data[lower_tf]
        htf_data = mtf_data[higher_tf]
    else:
        # print(f"Start Index:{index[0]} || End Index: {index[-1]}")
        ltf_data = mtf_data[lower_tf].loc[index[0]:index[-1]]
        htf_data = mtf_data[higher_tf].loc[index[0]:index[-1]]

    ### Get OHLC prices for lower and higher timeframes
    ltf_open, ltf_high, ltf_low, ltf_close = ltf_data.get('Open'), ltf_data.get('High'), ltf_data.get('Low'), ltf_data.get('Close')
    htf_open, htf_high, htf_low, htf_close = htf_data.get('Open'), htf_data.get('High'), htf_data.get('Low'), htf_data.get('Close')

    ltf_rsi = vbt.talib("RSI", timeperiod = ltf_rsi_timeperiod).run(ltf_close, skipna=True).real.ffill()
    ltf_bbands_rsi = vbt.talib("BBANDS").run(ltf_rsi, timeperiod = bb_rsi_timeperiod, nbdevup = bb_rsi_nbdevup, nbdevdn = bb_rsi_nbdevdn, skipna=True)    
    htf_bbands_price = vbt.talib("BBANDS").run(htf_close, timeperiod = bb_price_timeperiod, nbdevup = bb_price_nbdevup, nbdevdn = bb_price_nbdevdn, skipna=True)

    ## Initialize  dictionary
    data = {}

    col_values = [ ltf_close, ltf_rsi,ltf_bbands_rsi.upperband, ltf_bbands_rsi.middleband, ltf_bbands_rsi.lowerband ]

    col_keys = [ "ltf_close", "ltf_rsi", "ltf_bbands_rsi_upper",  "ltf_bbands_rsi_middle", "ltf_bbands_rsi_lower" ]

    # Assign key, value pairs for method of time series data to store in data dict
    for key, time_series in zip(col_keys, col_values):
        data[key] = time_series.ffill()

    resampler_dict_keys = [higher_tf + "_" + lower_tf]

    list_resamplers = create_resamplers(result_dict_keys_list = resampler_dict_keys,
                                        source_indices = [htf_close.index], 
                                        source_frequencies = [higher_tf], 
                                        target_index = ltf_close.index,
                                        target_freq = lower_tf)

    # print(list_resamplers)
    
    ## Use along with  Manual indicator creation method for MTF
    series_to_resample = [
        [htf_open, htf_high, htf_low, htf_close, 
        htf_bbands_price.upperband, htf_bbands_price.middleband, htf_bbands_price.lowerband]
        ]


    resample_data_keys = [
        ["htf_open", "htf_high", "htf_low", "htf_close", 
        "htf_bbands_price_upper",  "htf_bbands_price_middle",  "htf_bbands_price_lower"]
            ]    

    df_cols_order = col_keys + flatten_list(resample_data_keys)
    ## Create resampled time series data aligned to base line frequency (15min)
    # print("COLUMNS ORDER:", df_cols_order)
    
    for lst_series, lst_keys, resampler in zip(series_to_resample, resample_data_keys, resampler_dict_keys):
        for key, time_series in zip(lst_keys, lst_series):
            if key.lower().endswith('open'):
                # print(f'Resampling {key} differently using vbt.resample_opening using "{resampler}" resampler')
                resampled_time_series = time_series.vbt.resample_opening(list_resamplers[resampler])
            else:
                resampled_time_series = time_series.vbt.resample_closing(list_resamplers[resampler])
            data[key] = resampled_time_series
    

    ## construct a multi-timeframe dataframe
    mtf_df = pd.DataFrame(data)[df_cols_order]

    # print("DataFrame Output:\n", mtf_df.head())

    ## Long Entry Conditions
    c1_long_entry = (mtf_df['htf_low'] <= mtf_df['htf_bbands_price_lower'])
    c2_long_entry = (mtf_df['ltf_rsi'] <= mtf_df['ltf_bbands_rsi_lower'] )

    ## Long Exit Conditions
    c1_long_exit =  (mtf_df['htf_high'] >= mtf_df['htf_bbands_price_upper'])
    c2_long_exit =  (mtf_df['ltf_rsi']  >= mtf_df['ltf_bbands_rsi_upper'])             

    ## Create entries and exit columns using the above conditions
    mtf_df['entry'] = c1_long_entry & c2_long_entry
    mtf_df['exit']  = c1_long_exit & c2_long_exit

    mtf_df['signal'] = 0   
    mtf_df['signal'] = np.where( mtf_df['entry'], 1, 0)
    mtf_df['signal'] = np.where( mtf_df['exit'] , -1, mtf_df['signal'])

    entries = mtf_df.signal == 1.0
    exits = mtf_df.signal == -1.0

    pf = vbt.Portfolio.from_signals(
        close = ltf_close, 
        entries = entries, 
        exits = exits, 
        direction = "both", ## This setting trades both long and short signals
        freq = pd.Timedelta(minutes = freq_dict[lower_tf]), 
        init_cash = 100000
    )

    if type(output_metric) == str:
        return pf.deep_getattr(output_metric) ## When tuning a single metric
    elif type(output_metric) == list:
        return pd.Series({k: getattr(pf, k) for k in output_metric}) ## When you want to tune a list of metrics

Applying `line_profiler` to inspect time complexity of `optimal_2BB` function¶

In [10]:

# %load_ext line_profiler
## Apply the line_profiler on the unwrapped raw optimal_2BB() function
# %lprun -f optimal_2BB optimal_2BB()

In [11]:

pf_results = optimal_2BB(
    lower_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720], condition = "x <= higher_tf"),
    higher_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720, 1440]),
    ltf_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_price_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_price_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_price_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_rsi_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_rsi_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    output_metric = "total_return"
 )

  0%|          | 0/1000 [00:00<?, ?it/s]

In [12]:

## BTCUSDT - 
print(f"Best Total Returns: {round(pf_results.max(), 2)} %")
print(f"Parameter Combinations with Best Total Returns:{pf_results.idxmax()}")

print(f"Worst Total Returns: {round(pf_results.min(), 2)} %")
print(f"Parameter Combinations with Worst Total Returns:{pf_results.idxmin()}")

Best Total Returns: 8917.26 %
Parameter Combinations with Best Total Returns:(1, 1, 19, 20, 22, 1.75, 2.25, 1.5, 2.0)
Worst Total Returns: -4.41 %
Parameter Combinations with Worst Total Returns:(120, 1440, 18, 19, 22, 2.5, 2.0, 2.25, 2.5)

In [13]:

pf_results.sort_values(ascending=False)

Out[13]:

lower_tf  higher_tf  ltf_rsi_timeperiod  bb_price_timeperiod  bb_rsi_timeperiod  bb_price_nbdevup  bb_price_nbdevdn  bb_rsi_nbdevup  bb_rsi_nbdevdn
1         1          19                  20                   22                 1.75              2.25              1.50            2.00              8917.264415
                     22                  19                   20                 2.50              2.25              1.50            2.00              3166.917315
                     21                  22                   21                 2.25              2.00              2.00            1.75              1877.640187
                     19                  18                   22                 1.50              2.00              2.25            1.50              1685.686751
                     18                  20                   19                 1.75              2.00              2.25            1.50              1618.576841
                                                                                                                                                          ...     
240       720        19                  19                   19                 2.50              2.25              2.25            2.25                -3.272245
                     21                  18                   20                 1.50              2.25              2.00            2.50                -3.326493
120       720        21                  19                   22                 2.00              2.25              2.25            2.00                -3.330802
240       1440       21                  22                   18                 2.50              1.50              2.50            2.50                -3.853700
120       1440       18                  19                   22                 2.50              2.00              2.25            2.50                -4.410079
Length: 1000, dtype: float64

In [36]:

pf_results = optimal_2BB(
    lower_tf = vbt.Param([5, 30], condition = "x <= higher_tf"),
    higher_tf = vbt.Param([1, 5, 15]),
    ltf_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_price_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_price_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_price_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_rsi_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_rsi_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    output_metric = ["total_profit", "total_return", "max_drawdown", "sharpe_ratio"]
 )

  0%|          | 0/1000 [00:00<?, ?it/s]

In [15]:

## Use this block when returning data for more than one metric
pf_results_df = pf_results.unstack(level = -1)
pf_results_df = pf_results_df[['total_return','max_drawdown','sharpe_ratio']].sort_values(by=['total_return', 'max_drawdown'], ascending=False)
pf_results_df.reset_index(inplace=True)
pf_results_df

Out[15]:

	lower_tf	higher_tf	ltf_rsi_timeperiod	bb_price_timeperiod	bb_rsi_timeperiod	bb_price_nbdevup	bb_price_nbdevdn	bb_rsi_nbdevup	bb_rsi_nbdevdn	total_return	max_drawdown	sharpe_ratio
0	5	5	18	18	19	2.25	1.50	1.75	2.00	504.490043	-0.389288	2.328832
1	5	5	18	18	20	1.75	1.75	1.75	1.75	410.278460	-0.437595	2.261473
2	5	5	18	18	18	2.25	1.50	1.50	2.00	371.557575	-0.449186	2.232431
3	5	5	21	18	19	1.50	2.00	2.00	2.00	298.156929	-0.436903	2.162062
4	5	5	22	19	20	1.75	1.75	1.75	2.25	225.497056	-0.468920	2.075027
...	...	...	...	...	...	...	...	...	...	...	...	...
995	5	15	18	20	20	1.75	2.25	2.50	2.50	-0.967112	-0.976728	-0.634604
996	5	15	18	22	20	2.25	2.00	2.00	2.50	-0.968102	-0.989068	-0.627487
997	5	15	19	20	19	1.75	2.25	2.50	2.50	-0.969068	-0.980316	-0.652226
998	5	15	19	19	18	2.50	2.50	2.25	2.50	-0.975576	-0.989047	-0.728846
999	5	15	20	22	20	2.25	2.50	2.25	2.50	-0.977365	-0.991565	-0.704983

1000 rows × 12 columns

Check if x <= higher_tf condition was met

In [16]:

print("Length of DF:",len(pf_results_df[pf_results_df['lower_tf'] > pf_results_df['higher_tf']]))

Length of DF: 0

In [51]:

## Best and Worst Results and Parameter Combinations - (1, 1, 21, 18, 22, 2.25, 1.75, 1.5, 1.75)
print(f"Best Total Returns: {round(pf_results_df['total_return'].max(), 2)} %")
print(f"Parameter Combinations with Best Total Returns:")
pd.DataFrame(pf_results_df.iloc[pf_results_df['total_return'].idxmax()]).T

Best Total Returns: 504.49 %
Parameter Combinations with Best Total Returns:

Out[51]:

	lower_tf	higher_tf	ltf_rsi_timeperiod	bb_price_timeperiod	bb_rsi_timeperiod	bb_price_nbdevup	bb_price_nbdevdn	bb_rsi_nbdevup	bb_rsi_nbdevdn	total_return	max_drawdown	sharpe_ratio
0	5.0	5.0	18.0	18.0	19.0	2.25	1.5	1.75	2.0	504.490043	-0.389288	2.328832

In [52]:

print(f"Worst Total Returns: {round(pf_results_df['total_return'].min(), 2)} %")
print(f"Parameter Combinations with Worst Total Returns:")
pd.DataFrame(pf_results_df.iloc[pf_results_df['total_return'].idxmin()]).T

Worst Total Returns: -0.98 %
Parameter Combinations with Worst Total Returns:

Out[52]:

	lower_tf	higher_tf	ltf_rsi_timeperiod	bb_price_timeperiod	bb_rsi_timeperiod	bb_price_nbdevup	bb_price_nbdevdn	bb_rsi_nbdevup	bb_rsi_nbdevdn	total_return	max_drawdown	sharpe_ratio
999	5.0	15.0	20.0	22.0	20.0	2.25	2.5	2.25	2.5	-0.977365	-0.991565	-0.704983

Cross Validation¶

Cross validation is an important part of the backtesting pipeline to ensure robustness testing.

In [18]:

## Global Plot Settings
vbt.settings.set_theme("dark")
vbt.settings['plotting']['layout']['width'] = 1600

In [19]:

splitter = vbt.Splitter.from_rolling(
    index = d1_data.index, 
    length = 360, 
    split = 0.5,
    set_labels = ["train", "test"]
    )

In [20]:

splitter.plot().show()

/opt/miniconda3/envs/vbt/lib/python3.10/site-packages/jupyter_client/session.py:718: UserWarning:

Message serialization failed with:
Out of range float values are not JSON compliant
Supporting this message is deprecated in jupyter-client 7, please make sure your message is JSON-compliant

In [21]:

splitter.splits

Out[21]:

set	train	test
split
0	slice(0, 180, None)	slice(180, 360, None)
1	slice(180, 360, None)	slice(360, 540, None)
2	slice(360, 540, None)	slice(540, 720, None)
3	slice(540, 720, None)	slice(720, 900, None)
4	slice(720, 900, None)	slice(900, 1080, None)
5	slice(900, 1080, None)	slice(1080, 1260, None)
6	slice(1080, 1260, None)	slice(1260, 1440, None)

In [22]:

splitter.index

Out[22]:

DatetimeIndex(['2019-01-01 00:00:00+00:00', '2019-01-02 00:00:00+00:00',
               '2019-01-03 00:00:00+00:00', '2019-01-04 00:00:00+00:00',
               '2019-01-05 00:00:00+00:00', '2019-01-06 00:00:00+00:00',
               '2019-01-07 00:00:00+00:00', '2019-01-08 00:00:00+00:00',
               '2019-01-09 00:00:00+00:00', '2019-01-10 00:00:00+00:00',
               ...
               '2023-01-23 00:00:00+00:00', '2023-01-24 00:00:00+00:00',
               '2023-01-25 00:00:00+00:00', '2023-01-26 00:00:00+00:00',
               '2023-01-27 00:00:00+00:00', '2023-01-28 00:00:00+00:00',
               '2023-01-29 00:00:00+00:00', '2023-01-30 00:00:00+00:00',
               '2023-01-31 00:00:00+00:00', '2023-02-01 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='Open time', length=1493, freq='D')

Other `splitter.take` options¶

In [23]:

close_slices = splitter.take(d1_data.close, into="reset_stacked_by_set")
close_slices

Out[23]:

set
train    split         0         1        2         3  ...
test     split         0        1         2         3  ...
dtype: object

In [24]:

close_slices = splitter.take(d1_data.close, into="reset_stacked")
close_slices

Out[24]:

split	0		1		2		3		4		5		6
set	train	test	train	test	train	test	train	test	train	test	train	test	train	test
0	3797.14	10854.10	10854.10	7254.74	7254.74	9296.49	9296.49	22719.71	22719.71	35483.72	35483.72	47632.38	47632.38	22136.41
1	3858.56	10624.93	10624.93	7316.14	7316.14	9249.49	9249.49	23810.79	23810.79	35600.16	35600.16	46131.20	46131.20	22583.72
2	3766.78	10842.85	10842.85	7388.24	7388.24	9162.21	9162.21	23232.76	23232.76	31608.93	31608.93	46834.48	46834.48	20401.31
3	3792.01	11940.00	11940.00	7246.00	7246.00	9012.00	9012.00	23729.20	23729.20	32509.56	32509.56	46681.23	46681.23	20468.81
4	3770.96	11145.67	11145.67	7195.23	7195.23	9116.35	9116.35	24712.47	24712.47	33678.07	33678.07	46914.16	46914.16	18970.79
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
175	11820.86	7501.44	7501.44	9310.23	9310.23	21335.52	21335.52	40516.29	40516.29	49389.99	49389.99	30109.93	30109.93	17088.96
176	13093.80	7317.09	7317.09	9358.95	9358.95	22797.16	22797.16	40144.04	40144.04	50053.90	50053.90	29091.88	29091.88	16836.64
177	11329.99	7255.77	7255.77	9294.69	9294.69	23107.39	23107.39	38349.01	38349.01	46702.75	46702.75	28424.70	28424.70	17224.10
178	12400.63	7204.63	7204.63	9685.69	9685.69	23821.61	23821.61	38092.97	38092.97	48343.28	48343.28	26574.53	26574.53	17128.56
179	11903.13	7202.00	7202.00	9624.89	9624.89	23455.52	23455.52	35819.84	35819.84	48864.98	48864.98	22487.41	22487.41	17127.49

180 rows × 14 columns

In [25]:

close_slices = splitter.take(d1_data.close)
close_slices

Out[25]:

split  set  
0      train    Open time
2019-01-01 00:00:00+00:00     3797.1...
       test     Open time
2019-06-30 00:00:00+00:00    10854.1...
1      train    Open time
2019-06-30 00:00:00+00:00    10854.1...
       test     Open time
2019-12-27 00:00:00+00:00    7254.74...
2      train    Open time
2019-12-27 00:00:00+00:00    7254.74...
       test     Open time
2020-06-24 00:00:00+00:00     9296.4...
3      train    Open time
2020-06-24 00:00:00+00:00     9296.4...
       test     Open time
2020-12-21 00:00:00+00:00    22719.7...
4      train    Open time
2020-12-21 00:00:00+00:00    22719.7...
       test     Open time
2021-06-19 00:00:00+00:00    35483.7...
5      train    Open time
2021-06-19 00:00:00+00:00    35483.7...
       test     Open time
2021-12-16 00:00:00+00:00    47632.3...
6      train    Open time
2021-12-16 00:00:00+00:00    47632.3...
       test     Open time
2022-06-14 00:00:00+00:00    22136.4...
dtype: object

In [26]:

print("Total Nr. of Splits:",len(close_slices.index))
df_splits = pd.DataFrame(close_slices.index.tolist(), columns=["split", "period"])
unique_splits = df_splits["split"].unique().tolist()
print("Unique Splits:", unique_splits)
df_splits

Total Nr. of Splits: 14
Unique Splits: [0, 1, 2, 3, 4, 5, 6]

Out[26]:

	split	period
0	0	train
1	0	test
2	1	train
3	1	test
4	2	train
5	2	test
6	3	train
7	3	test
8	4	train
9	4	test
10	5	train
11	5	test
12	6	train
13	6	test

In [27]:

def get_total_return(close_prices):
    return close_prices.vbt.to_returns().vbt.returns.total()

base_line_returns = close_slices.apply(get_total_return)
base_line_returns

Out[27]:

split  set  
0      train    2.134762
       test    -0.336472
1      train   -0.336472
       test     0.326704
2      train    0.326704
       test     1.523051
3      train    1.523051
       test     0.576598
4      train    0.576598
       test     0.377110
5      train    0.377110
       test    -0.527897
6      train   -0.527897
       test    -0.226275
dtype: float64

In [28]:

train_slices = [slice(close_slices[i, "train"].index[0], close_slices[i, "train"].index[-1]) for i in unique_splits]
train_slices

Out[28]:

[slice(Timestamp('2019-01-01 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2019-06-29 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2019-06-30 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2019-12-26 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2019-12-27 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2020-06-23 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2020-06-24 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2020-12-20 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2020-12-21 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2021-06-18 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2021-06-19 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2021-12-15 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2021-12-16 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2022-06-13 00:00:00+0000', tz='UTC', freq='D'), None)]

In [29]:

test_slices = [slice(close_slices[i, "test"].index[0], close_slices[i, "test"].index[-1]) for i in unique_splits]
test_slices

Out[29]:

[slice(Timestamp('2019-06-30 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2019-12-26 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2019-12-27 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2020-06-23 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2020-06-24 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2020-12-20 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2020-12-21 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2021-06-18 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2021-06-19 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2021-12-15 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2021-12-16 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2022-06-13 00:00:00+0000', tz='UTC', freq='D'), None),
 slice(Timestamp('2022-06-14 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2022-12-10 00:00:00+0000', tz='UTC', freq='D'), None)]

Performance on `train` splits¶

In [30]:

train_perf = splitter.apply(
    apply_func = optimal_2BB, ## apply your strategy function to the splitter object, followed by its arguments
    lower_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720], condition = "x <= higher_tf"),
    higher_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720, 1440]),    
    ltf_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_price_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_price_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_price_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_rsi_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_rsi_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    output_metric = "sharpe_ratio",
    #### Arguments of splitter.apply() not related to strategy
    index =  vbt.Takeable(splitter.index), ## DataTime index from the splitter object
    set_ = "train",  ## Specify the set to be used for this CV simulation - train or test
    _random_subset = 500, ## Specify the nr. of simulations to run per train split
    merge_func ="concat", ## concat the results
    execute_kwargs=dict(show_progress=True), ## execute_kwargs control the execution of each split/set/range - Show Progress bar of the simulation
    _execute_kwargs=dict(show_progress=False, clear_cache=50, collect_garbage=50) ## _execute_kwargs control the execution of your parameter combinations
    )

train_perf.sort_values(ascending=False)

  0%|          | 0/7 [00:00<?, ?it/s]

Out[30]:

split  lower_tf  higher_tf  ltf_rsi_timeperiod  bb_price_timeperiod  bb_rsi_timeperiod  bb_price_nbdevup  bb_price_nbdevdn  bb_rsi_nbdevup  bb_rsi_nbdevdn
4      1         1          20                  18                   20                 1.75              1.50              1.75            2.25              8.036717
                            21                  21                   21                 1.75              2.25              1.50            2.00              7.804521
                            18                  19                   21                 2.00              2.25              1.50            2.25              7.350429
                            20                  19                   19                 2.00              2.25              1.50            2.25              7.342806
                            21                  20                   22                 2.00              2.25              1.75            2.50              7.026099
                                                                                                                                                                ...   
3      5         30         21                  22                   20                 2.00              2.50              1.75            2.50             -3.449168
0      1         1440       18                  18                   18                 1.50              2.25              2.00            2.00                   NaN
                            19                  19                   21                 2.50              2.50              1.75            1.75                   NaN
                            20                  19                   19                 2.00              2.25              1.75            2.50                   NaN
                            22                  22                   18                 2.25              2.50              1.75            1.75                   NaN
Length: 3500, dtype: float64

View train split performance statistics

In [31]:

train_split_describe = pd.concat([train_perf[train_perf.index.get_level_values('split') == i].describe()\
                                for i in unique_splits], axis = 1, 
                                keys = [f"Train_Split_{i}" for i in unique_splits])
train_split_describe

Out[31]:

	Train_Split_0	Train_Split_1	Train_Split_2	Train_Split_3	Train_Split_4	Train_Split_5	Train_Split_6
count	496.000000	500.000000	500.000000	500.000000	500.000000	500.000000	500.000000
mean	-0.430325	-0.256053	-0.012914	-0.297292	0.423548	0.284101	0.127918
std	1.213900	1.050923	0.951679	1.606742	1.404564	1.090453	0.989352
min	-3.042384	-2.782880	-2.549706	-3.449168	-3.107068	-2.726325	-2.549975
25%	-1.259347	-0.963931	-0.611205	-1.370408	-0.403063	-0.484848	-0.565334
50%	-0.741358	-0.297287	-0.181718	-0.578251	0.254005	0.086147	0.068521
75%	0.254324	0.434706	0.399538	0.444610	0.905467	0.966222	0.840372
max	4.418397	2.999906	4.448941	6.546613	8.036717	3.734348	3.144399

In [32]:

## Compute baseline, best and worst returns for the overlaid line plots
train_split_best_returns = train_split_describe.loc['max'].reset_index(drop=True)
train_split_worst_returns = train_split_describe.loc['min'].reset_index(drop=True)
train_splits_baseline_returns = pd.Series([base_line_returns[i, "train"] for i in unique_splits])

## Create Box Plot for train_performance statistics
train_split_fig = train_perf.vbt.boxplot(
    by_level="split",
    trace_kwargs=dict(
        line=dict(color="lightskyblue"),
        opacity=0.4,
        showlegend=False
        ),
        xaxis_title="Train Splits",
        yaxis_title="Sharpe Ratio"
        )

train_split_best_returns.vbt.plot(trace_kwargs=dict(name="Best Returns", line=dict(color="limegreen", dash="dash")), fig=train_split_fig)
train_split_worst_returns.vbt.plot(trace_kwargs=dict(name="Worst Returns", line=dict(color="tomato", dash="dash")), fig=train_split_fig)
train_splits_baseline_returns.vbt.plot(trace_kwargs=dict(name="Baseline", line=dict(color="yellow", dash="dash")), fig=train_split_fig)
train_split_fig.show()

Performance on `test` splits¶

In [33]:

test_perf = splitter.apply(
    apply_func = optimal_2BB, ## apply your strategy function to the splitter object, followed by its arguments
    lower_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720], condition = "x <= higher_tf"),
    higher_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720, 1440]),    
    ltf_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_price_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)),
    bb_price_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_price_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_rsi_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    bb_rsi_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)),
    output_metric = "sharpe_ratio",
    #### Arguments of splitter.apply() not related to strategy
    index =  vbt.Takeable(splitter.index), ## DataTime index from the splitter object
    _random_subset = 500,    ## Specify the nr. of simulations to run per test split
    set_ = "test",  ## Specify the set to be used for this CV simulation - train or test
    merge_func ="concat", ## concat the results
    execute_kwargs=dict(show_progress=True), ## execute_kwargs control the execution of each split/set/range - Show Progress bar of the simulation
    _execute_kwargs=dict(show_progress=False, clear_cache=50, collect_garbage=50) ## _execute_kwargs control the execution of your parameter combinations
    )
    
test_perf.sort_values(ascending=False)

  0%|          | 0/7 [00:00<?, ?it/s]

Out[33]:

split  lower_tf  higher_tf  ltf_rsi_timeperiod  bb_price_timeperiod  bb_rsi_timeperiod  bb_price_nbdevup  bb_price_nbdevdn  bb_rsi_nbdevup  bb_rsi_nbdevdn
4      1         1          22                  20                   21                 2.25              2.50              1.50            1.50              7.777249
                            18                  22                   20                 2.25              2.00              1.75            1.75              7.665653
                            19                  20                   20                 2.00              2.25              2.00            2.25              7.652166
3      1         1          18                  18                   20                 2.25              2.25              1.75            1.75              7.082363
4      1         1          22                  19                   22                 1.75              2.25              2.00            1.75              7.035542
                                                                                                                                                                ...   
0      1         1440       19                  18                   20                 2.50              2.00              2.00            2.00                   NaN
                                                22                   21                 1.75              2.50              2.00            2.25                   NaN
                            22                  18                   18                 1.75              1.75              1.50            2.00                   NaN
       15        720        20                  21                   22                 1.50              2.25              2.00            2.00                   NaN
3      120       1440       20                  18                   20                 2.50              1.50              2.00            2.25                   NaN
Length: 3500, dtype: float64

View test split performance statistics

In [34]:

test_split_describe = pd.concat([test_perf[test_perf.index.get_level_values('split') == i].describe()\
                                for i in unique_splits], axis = 1, 
                                keys = [f"Test_Split_{i}" for i in unique_splits])
test_split_describe

Out[34]:

	Test_Split_0	Test_Split_1	Test_Split_2	Test_Split_3	Test_Split_4	Test_Split_5	Test_Split_6
count	496.000000	500.000000	500.000000	499.000000	500.000000	500.000000	500.000000
mean	-0.407962	-0.239648	-0.037181	-0.348490	0.414004	0.343005	0.155692
std	1.192991	1.040834	0.977560	1.614325	1.407563	1.207516	1.061513
min	-2.745665	-2.802081	-2.625053	-3.548850	-3.107068	-2.663871	-3.188824
25%	-1.268158	-0.928053	-0.643853	-1.352547	-0.393175	-0.488930	-0.635615
50%	-0.683554	-0.237556	-0.206665	-0.608184	0.267323	0.117486	0.109689
75%	0.170201	0.382643	0.398758	0.251647	0.904010	0.995893	0.870143
max	3.443967	2.653158	3.703606	7.082363	7.777249	4.995166	3.511205

In [35]:

## Compute baseline, best and worst returns for the overlaid line plots
test_split_best_returns = test_split_describe.loc['max'].reset_index(drop=True)
test_split_worst_returns = test_split_describe.loc['min'].reset_index(drop=True)
test_splits_baseline_returns = pd.Series([base_line_returns[i, "test"] for i in unique_splits])

## Create Box Plot for test_performance statistics
test_split_fig = test_perf.vbt.boxplot(
    by_level="split",
    trace_kwargs=dict(
        line=dict(color="lightskyblue"),
        opacity=0.4,
        showlegend=False
        ),
        xaxis_title="Test Splits",
        yaxis_title="Sharpe Ratio"
        )

test_split_best_returns.vbt.plot(trace_kwargs=dict(name="Best Returns", line=dict(color="limegreen", dash="dash")), fig=test_split_fig)
test_split_worst_returns.vbt.plot(trace_kwargs=dict(name="Worst Returns", line=dict(color="tomato", dash="dash")), fig=test_split_fig)
test_splits_baseline_returns.vbt.plot(trace_kwargs=dict(name="Baseline", line=dict(color="yellow", dash="dash")), fig=test_split_fig)
test_split_fig.show()

819 KiB Raw Permalink Blame History Unescape Escape

Storing resampled price data in mtf_data dictionary¶

Helper Functions¶

Applying line_profiler to inspect time complexity of optimal_2BB function¶