819 KiB
819 KiB
In [1]:
import numpy as np import pandas as pd import vectorbtpro as vbt
In [2]:
## Acquire BTCUSDT 1m crypto data from Binance data = vbt.BinanceData.fetch( ["BTCUSDT"], start="2019-01-01 UTC", end="2023-02-02 UTC", timeframe="1m" ) ## Save acquired data locally for persistance data.to_hdf("/Users/dilip.rajkumar/Documents/vbtpro_tuts_private/data/Binance_BTCUSDT_OHLCV_3Y_m1.h5")
In [3]:
## Load m1 data - BTCUSD m1_data = vbt.BinanceData.from_hdf('../data/Binance_BTCUSDT_OHLCV_3Y_m1.h5') print(m1_data.columns) m1_data.get()
Index(['BTCUSDT'], dtype='object', name='symbol')
Out[3]:
<style scoped="">
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| Open | High | Low | Close | Volume | Quote volume | Trade count | Taker base volume | Taker quote volume | |
|---|---|---|---|---|---|---|---|---|---|
| Open time | |||||||||
| 2019-01-01 00:00:00+00:00 | 3701.23 | 3703.72 | 3701.09 | 3702.46 | 17.100110 | 6.329971e+04 | 180 | 5.746515 | 2.127570e+04 |
| 2019-01-01 00:01:00+00:00 | 3702.44 | 3702.63 | 3695.66 | 3697.04 | 23.700604 | 8.768108e+04 | 148 | 15.120491 | 5.593539e+04 |
| 2019-01-01 00:02:00+00:00 | 3699.42 | 3702.04 | 3696.08 | 3698.14 | 14.488615 | 5.360224e+04 | 80 | 12.700389 | 4.699097e+04 |
| 2019-01-01 00:03:00+00:00 | 3697.49 | 3698.19 | 3695.97 | 3696.51 | 8.499966 | 3.142328e+04 | 75 | 4.199726 | 1.552737e+04 |
| 2019-01-01 00:04:00+00:00 | 3697.20 | 3697.62 | 3695.00 | 3696.32 | 21.782886 | 8.051433e+04 | 93 | 15.080810 | 5.574050e+04 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2023-02-01 23:55:00+00:00 | 23704.51 | 23711.98 | 23693.64 | 23707.48 | 123.787420 | 2.934020e+06 | 3405 | 65.091570 | 1.542877e+06 |
| 2023-02-01 23:56:00+00:00 | 23708.59 | 23719.69 | 23704.29 | 23714.49 | 157.832940 | 3.742687e+06 | 4230 | 90.425430 | 2.144293e+06 |
| 2023-02-01 23:57:00+00:00 | 23713.36 | 23726.83 | 23712.72 | 23722.29 | 170.786230 | 4.051256e+06 | 4601 | 90.996750 | 2.158566e+06 |
| 2023-02-01 23:58:00+00:00 | 23721.50 | 23736.12 | 23712.27 | 23716.12 | 254.517100 | 6.037764e+06 | 5560 | 132.888520 | 3.152625e+06 |
| 2023-02-01 23:59:00+00:00 | 23715.63 | 23733.10 | 23710.86 | 23732.66 | 137.082290 | 3.251743e+06 | 4520 | 75.053000 | 1.780439e+06 |
2145889 rows × 9 columns
Storing resampled price data in mtf_data dictionary¶
In [4]:
m5_data = m1_data.resample('5T') # Convert 1 minute to 5 mins m15_data = m1_data.resample('15T') # Convert 1 minute to 15 mins m30_data = m1_data.resample('30T') # Convert 1 minute to 30 mins h1_data = m1_data.resample("1H") # Convert 1 minute to 1 hour h2_data = m1_data.resample("2H") # Convert 1 minute to 2 hour h4_data = m1_data.resample('4H') # Convert 1 minute to 4 hour h12_data = m1_data.resample('12H') # Convert 1 minute to 12 hour d1_data = m1_data.resample('1D') # Convert 1 minute to Daily data mtf_data = { "1T" : m1_data, "5T" : m5_data, "15T" : m15_data, "30T" : m30_data, "1H" : h1_data, "2H" : h2_data, "4H" : h4_data, "12H" : h12_data, "1D" : d1_data } freq_dict = { "1T" : 1, "5T" : 5, "15T" : 15, "30T" : 30, "1H" : 60, "2H" : 120, "4H" : 240, "8H" : 480, "12H" : 720, "1D": 1440 }
Helper Functions¶
In [5]:
def remapped_tf(input_value : int) -> str: """Map an integer to a string timeframe format""" tf_freq = {1 : "1T", 5 : "5T", 15 : "15T", 30 : "30T", 60 :"1H", 120 : "2H", 240 : "4H", 720 : "12H", 1440 : "1D"} new_value = tf_freq.get(input_value) return new_value
In [6]:
def flatten_list(list_2D : list): """Flatten a list of list of strings""" flat_list = list_2D if len(list_2D) == 0 else [item for sublist in list_2D for item in sublist] return flat_list
In [7]:
def create_list_numbers(r1, r2, step): """Create a list of numbers between two bounds (r1, r2) and incrementing each number using the specified `step` value """ if type(r1) == float and type(r2) == float: return list(np.round(np.arange(r1, r2+step, step), 2)) return list(np.arange(r1, r2+step, step))
You might remember this create_resamplers function from our first tutorial which we used for upsampling
In [8]:
def create_resamplers(result_dict_keys_list : list, source_indices : list, source_frequencies :list, target_index : pd.Series, target_freq : str): """ Creates a dictionary of vbtpro resampler objects. Parameters ========== result_dict_keys_list : list, list of strings, which are keys of the output dictionary source_indices : list, list of pd.time series objects of the higher timeframes source_frequencies : list(str), which are short form representation of time series order. Eg:["1D", "4h"] target_index : pd.Series, target time series for the resampler objects target_freq : str, target time frequency for the resampler objects Returns =========== resamplers_dict : dict, vbt pro resampler objects """ resamplers = [] for si, sf in zip(source_indices, source_frequencies): resamplers.append(vbt.Resampler(source_index = si, target_index = target_index, source_freq = sf, target_freq = target_freq)) return dict(zip(result_dict_keys_list, resamplers))
In [9]:
@vbt.parameterized(merge_func = "concat", random_subset = 1000, show_progress=True) def optimal_2BB(lower_tf : int = 1, higher_tf: int = 5, ltf_rsi_timeperiod : int = 21, bb_price_timeperiod : int = 14, bb_rsi_timeperiod : int = 14, bb_price_nbdevup : int = 2, bb_price_nbdevdn: int = 2, bb_rsi_nbdevup : int = 2, bb_rsi_nbdevdn : int = 2, output_metric : str | list = "total_return", index = None ): lower_tf = remapped_tf(lower_tf) higher_tf = remapped_tf(higher_tf) # print("New Lower TF:", lower_tf, "New Higher TF:", higher_tf) if index is None: ltf_data = mtf_data[lower_tf] htf_data = mtf_data[higher_tf] else: # print(f"Start Index:{index[0]} || End Index: {index[-1]}") ltf_data = mtf_data[lower_tf].loc[index[0]:index[-1]] htf_data = mtf_data[higher_tf].loc[index[0]:index[-1]] ### Get OHLC prices for lower and higher timeframes ltf_open, ltf_high, ltf_low, ltf_close = ltf_data.get('Open'), ltf_data.get('High'), ltf_data.get('Low'), ltf_data.get('Close') htf_open, htf_high, htf_low, htf_close = htf_data.get('Open'), htf_data.get('High'), htf_data.get('Low'), htf_data.get('Close') ltf_rsi = vbt.talib("RSI", timeperiod = ltf_rsi_timeperiod).run(ltf_close, skipna=True).real.ffill() ltf_bbands_rsi = vbt.talib("BBANDS").run(ltf_rsi, timeperiod = bb_rsi_timeperiod, nbdevup = bb_rsi_nbdevup, nbdevdn = bb_rsi_nbdevdn, skipna=True) htf_bbands_price = vbt.talib("BBANDS").run(htf_close, timeperiod = bb_price_timeperiod, nbdevup = bb_price_nbdevup, nbdevdn = bb_price_nbdevdn, skipna=True) ## Initialize dictionary data = {} col_values = [ ltf_close, ltf_rsi,ltf_bbands_rsi.upperband, ltf_bbands_rsi.middleband, ltf_bbands_rsi.lowerband ] col_keys = [ "ltf_close", "ltf_rsi", "ltf_bbands_rsi_upper", "ltf_bbands_rsi_middle", "ltf_bbands_rsi_lower" ] # Assign key, value pairs for method of time series data to store in data dict for key, time_series in zip(col_keys, col_values): data[key] = time_series.ffill() resampler_dict_keys = [higher_tf + "_" + lower_tf] list_resamplers = create_resamplers(result_dict_keys_list = resampler_dict_keys, source_indices = [htf_close.index], source_frequencies = [higher_tf], target_index = ltf_close.index, target_freq = lower_tf) # print(list_resamplers) ## Use along with Manual indicator creation method for MTF series_to_resample = [ [htf_open, htf_high, htf_low, htf_close, htf_bbands_price.upperband, htf_bbands_price.middleband, htf_bbands_price.lowerband] ] resample_data_keys = [ ["htf_open", "htf_high", "htf_low", "htf_close", "htf_bbands_price_upper", "htf_bbands_price_middle", "htf_bbands_price_lower"] ] df_cols_order = col_keys + flatten_list(resample_data_keys) ## Create resampled time series data aligned to base line frequency (15min) # print("COLUMNS ORDER:", df_cols_order) for lst_series, lst_keys, resampler in zip(series_to_resample, resample_data_keys, resampler_dict_keys): for key, time_series in zip(lst_keys, lst_series): if key.lower().endswith('open'): # print(f'Resampling {key} differently using vbt.resample_opening using "{resampler}" resampler') resampled_time_series = time_series.vbt.resample_opening(list_resamplers[resampler]) else: resampled_time_series = time_series.vbt.resample_closing(list_resamplers[resampler]) data[key] = resampled_time_series ## construct a multi-timeframe dataframe mtf_df = pd.DataFrame(data)[df_cols_order] # print("DataFrame Output:\n", mtf_df.head()) ## Long Entry Conditions c1_long_entry = (mtf_df['htf_low'] <= mtf_df['htf_bbands_price_lower']) c2_long_entry = (mtf_df['ltf_rsi'] <= mtf_df['ltf_bbands_rsi_lower'] ) ## Long Exit Conditions c1_long_exit = (mtf_df['htf_high'] >= mtf_df['htf_bbands_price_upper']) c2_long_exit = (mtf_df['ltf_rsi'] >= mtf_df['ltf_bbands_rsi_upper']) ## Create entries and exit columns using the above conditions mtf_df['entry'] = c1_long_entry & c2_long_entry mtf_df['exit'] = c1_long_exit & c2_long_exit mtf_df['signal'] = 0 mtf_df['signal'] = np.where( mtf_df['entry'], 1, 0) mtf_df['signal'] = np.where( mtf_df['exit'] , -1, mtf_df['signal']) entries = mtf_df.signal == 1.0 exits = mtf_df.signal == -1.0 pf = vbt.Portfolio.from_signals( close = ltf_close, entries = entries, exits = exits, direction = "both", ## This setting trades both long and short signals freq = pd.Timedelta(minutes = freq_dict[lower_tf]), init_cash = 100000 ) if type(output_metric) == str: return pf.deep_getattr(output_metric) ## When tuning a single metric elif type(output_metric) == list: return pd.Series({k: getattr(pf, k) for k in output_metric}) ## When you want to tune a list of metrics
Applying line_profiler to inspect time complexity of optimal_2BB function¶
In [10]:
# %load_ext line_profiler ## Apply the line_profiler on the unwrapped raw optimal_2BB() function # %lprun -f optimal_2BB optimal_2BB()
In [11]:
pf_results = optimal_2BB( lower_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720], condition = "x <= higher_tf"), higher_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720, 1440]), ltf_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)), bb_price_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)), bb_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)), bb_price_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)), bb_price_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)), bb_rsi_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)), bb_rsi_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)), output_metric = "total_return" )
0%| | 0/1000 [00:00<?, ?it/s]
In [12]:
## BTCUSDT - print(f"Best Total Returns: {round(pf_results.max(), 2)} %") print(f"Parameter Combinations with Best Total Returns:{pf_results.idxmax()}") print(f"Worst Total Returns: {round(pf_results.min(), 2)} %") print(f"Parameter Combinations with Worst Total Returns:{pf_results.idxmin()}")
Best Total Returns: 8917.26 % Parameter Combinations with Best Total Returns:(1, 1, 19, 20, 22, 1.75, 2.25, 1.5, 2.0) Worst Total Returns: -4.41 % Parameter Combinations with Worst Total Returns:(120, 1440, 18, 19, 22, 2.5, 2.0, 2.25, 2.5)
In [13]:
pf_results.sort_values(ascending=False)
Out[13]:
lower_tf higher_tf ltf_rsi_timeperiod bb_price_timeperiod bb_rsi_timeperiod bb_price_nbdevup bb_price_nbdevdn bb_rsi_nbdevup bb_rsi_nbdevdn
1 1 19 20 22 1.75 2.25 1.50 2.00 8917.264415
22 19 20 2.50 2.25 1.50 2.00 3166.917315
21 22 21 2.25 2.00 2.00 1.75 1877.640187
19 18 22 1.50 2.00 2.25 1.50 1685.686751
18 20 19 1.75 2.00 2.25 1.50 1618.576841
...
240 720 19 19 19 2.50 2.25 2.25 2.25 -3.272245
21 18 20 1.50 2.25 2.00 2.50 -3.326493
120 720 21 19 22 2.00 2.25 2.25 2.00 -3.330802
240 1440 21 22 18 2.50 1.50 2.50 2.50 -3.853700
120 1440 18 19 22 2.50 2.00 2.25 2.50 -4.410079
Length: 1000, dtype: float64
In [36]:
pf_results = optimal_2BB( lower_tf = vbt.Param([5, 30], condition = "x <= higher_tf"), higher_tf = vbt.Param([1, 5, 15]), ltf_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)), bb_price_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)), bb_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)), bb_price_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)), bb_price_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)), bb_rsi_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)), bb_rsi_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)), output_metric = ["total_profit", "total_return", "max_drawdown", "sharpe_ratio"] )
0%| | 0/1000 [00:00<?, ?it/s]
In [15]:
## Use this block when returning data for more than one metric pf_results_df = pf_results.unstack(level = -1) pf_results_df = pf_results_df[['total_return','max_drawdown','sharpe_ratio']].sort_values(by=['total_return', 'max_drawdown'], ascending=False) pf_results_df.reset_index(inplace=True) pf_results_df
Out[15]:
<style scoped="">
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| lower_tf | higher_tf | ltf_rsi_timeperiod | bb_price_timeperiod | bb_rsi_timeperiod | bb_price_nbdevup | bb_price_nbdevdn | bb_rsi_nbdevup | bb_rsi_nbdevdn | total_return | max_drawdown | sharpe_ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5 | 5 | 18 | 18 | 19 | 2.25 | 1.50 | 1.75 | 2.00 | 504.490043 | -0.389288 | 2.328832 |
| 1 | 5 | 5 | 18 | 18 | 20 | 1.75 | 1.75 | 1.75 | 1.75 | 410.278460 | -0.437595 | 2.261473 |
| 2 | 5 | 5 | 18 | 18 | 18 | 2.25 | 1.50 | 1.50 | 2.00 | 371.557575 | -0.449186 | 2.232431 |
| 3 | 5 | 5 | 21 | 18 | 19 | 1.50 | 2.00 | 2.00 | 2.00 | 298.156929 | -0.436903 | 2.162062 |
| 4 | 5 | 5 | 22 | 19 | 20 | 1.75 | 1.75 | 1.75 | 2.25 | 225.497056 | -0.468920 | 2.075027 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 995 | 5 | 15 | 18 | 20 | 20 | 1.75 | 2.25 | 2.50 | 2.50 | -0.967112 | -0.976728 | -0.634604 |
| 996 | 5 | 15 | 18 | 22 | 20 | 2.25 | 2.00 | 2.00 | 2.50 | -0.968102 | -0.989068 | -0.627487 |
| 997 | 5 | 15 | 19 | 20 | 19 | 1.75 | 2.25 | 2.50 | 2.50 | -0.969068 | -0.980316 | -0.652226 |
| 998 | 5 | 15 | 19 | 19 | 18 | 2.50 | 2.50 | 2.25 | 2.50 | -0.975576 | -0.989047 | -0.728846 |
| 999 | 5 | 15 | 20 | 22 | 20 | 2.25 | 2.50 | 2.25 | 2.50 | -0.977365 | -0.991565 | -0.704983 |
1000 rows × 12 columns
Check if x <= higher_tf condition was met
In [16]:
print("Length of DF:",len(pf_results_df[pf_results_df['lower_tf'] > pf_results_df['higher_tf']]))
Length of DF: 0
In [51]:
## Best and Worst Results and Parameter Combinations - (1, 1, 21, 18, 22, 2.25, 1.75, 1.5, 1.75) print(f"Best Total Returns: {round(pf_results_df['total_return'].max(), 2)} %") print(f"Parameter Combinations with Best Total Returns:") pd.DataFrame(pf_results_df.iloc[pf_results_df['total_return'].idxmax()]).T
Best Total Returns: 504.49 % Parameter Combinations with Best Total Returns:
Out[51]:
<style scoped="">
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| lower_tf | higher_tf | ltf_rsi_timeperiod | bb_price_timeperiod | bb_rsi_timeperiod | bb_price_nbdevup | bb_price_nbdevdn | bb_rsi_nbdevup | bb_rsi_nbdevdn | total_return | max_drawdown | sharpe_ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5.0 | 5.0 | 18.0 | 18.0 | 19.0 | 2.25 | 1.5 | 1.75 | 2.0 | 504.490043 | -0.389288 | 2.328832 |
In [52]:
print(f"Worst Total Returns: {round(pf_results_df['total_return'].min(), 2)} %") print(f"Parameter Combinations with Worst Total Returns:") pd.DataFrame(pf_results_df.iloc[pf_results_df['total_return'].idxmin()]).T
Worst Total Returns: -0.98 % Parameter Combinations with Worst Total Returns:
Out[52]:
<style scoped="">
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| lower_tf | higher_tf | ltf_rsi_timeperiod | bb_price_timeperiod | bb_rsi_timeperiod | bb_price_nbdevup | bb_price_nbdevdn | bb_rsi_nbdevup | bb_rsi_nbdevdn | total_return | max_drawdown | sharpe_ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 999 | 5.0 | 15.0 | 20.0 | 22.0 | 20.0 | 2.25 | 2.5 | 2.25 | 2.5 | -0.977365 | -0.991565 | -0.704983 |
Cross Validation¶
Cross validation is an important part of the backtesting pipeline to ensure robustness testing.
In [18]:
## Global Plot Settings vbt.settings.set_theme("dark") vbt.settings['plotting']['layout']['width'] = 1600
In [19]:
splitter = vbt.Splitter.from_rolling( index = d1_data.index, length = 360, split = 0.5, set_labels = ["train", "test"] )
In [20]:
splitter.plot().show()
/opt/miniconda3/envs/vbt/lib/python3.10/site-packages/jupyter_client/session.py:718: UserWarning: Message serialization failed with: Out of range float values are not JSON compliant Supporting this message is deprecated in jupyter-client 7, please make sure your message is JSON-compliant
In [21]:
splitter.splits
Out[21]:
<style scoped="">
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| set | train | test |
|---|---|---|
| split | ||
| 0 | slice(0, 180, None) | slice(180, 360, None) |
| 1 | slice(180, 360, None) | slice(360, 540, None) |
| 2 | slice(360, 540, None) | slice(540, 720, None) |
| 3 | slice(540, 720, None) | slice(720, 900, None) |
| 4 | slice(720, 900, None) | slice(900, 1080, None) |
| 5 | slice(900, 1080, None) | slice(1080, 1260, None) |
| 6 | slice(1080, 1260, None) | slice(1260, 1440, None) |
In [22]:
splitter.index
Out[22]:
DatetimeIndex(['2019-01-01 00:00:00+00:00', '2019-01-02 00:00:00+00:00',
'2019-01-03 00:00:00+00:00', '2019-01-04 00:00:00+00:00',
'2019-01-05 00:00:00+00:00', '2019-01-06 00:00:00+00:00',
'2019-01-07 00:00:00+00:00', '2019-01-08 00:00:00+00:00',
'2019-01-09 00:00:00+00:00', '2019-01-10 00:00:00+00:00',
...
'2023-01-23 00:00:00+00:00', '2023-01-24 00:00:00+00:00',
'2023-01-25 00:00:00+00:00', '2023-01-26 00:00:00+00:00',
'2023-01-27 00:00:00+00:00', '2023-01-28 00:00:00+00:00',
'2023-01-29 00:00:00+00:00', '2023-01-30 00:00:00+00:00',
'2023-01-31 00:00:00+00:00', '2023-02-01 00:00:00+00:00'],
dtype='datetime64[ns, UTC]', name='Open time', length=1493, freq='D')
Other splitter.take options¶
In [23]:
close_slices = splitter.take(d1_data.close, into="reset_stacked_by_set") close_slices
Out[23]:
set train split 0 1 2 3 ... test split 0 1 2 3 ... dtype: object
In [24]:
close_slices = splitter.take(d1_data.close, into="reset_stacked") close_slices
Out[24]:
<style scoped="">
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead tr th {
text-align: left;
}
</style>
| split | 0 | 1 | 2 | 3 | 4 | 5 | 6 | |||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| set | train | test | train | test | train | test | train | test | train | test | train | test | train | test |
| 0 | 3797.14 | 10854.10 | 10854.10 | 7254.74 | 7254.74 | 9296.49 | 9296.49 | 22719.71 | 22719.71 | 35483.72 | 35483.72 | 47632.38 | 47632.38 | 22136.41 |
| 1 | 3858.56 | 10624.93 | 10624.93 | 7316.14 | 7316.14 | 9249.49 | 9249.49 | 23810.79 | 23810.79 | 35600.16 | 35600.16 | 46131.20 | 46131.20 | 22583.72 |
| 2 | 3766.78 | 10842.85 | 10842.85 | 7388.24 | 7388.24 | 9162.21 | 9162.21 | 23232.76 | 23232.76 | 31608.93 | 31608.93 | 46834.48 | 46834.48 | 20401.31 |
| 3 | 3792.01 | 11940.00 | 11940.00 | 7246.00 | 7246.00 | 9012.00 | 9012.00 | 23729.20 | 23729.20 | 32509.56 | 32509.56 | 46681.23 | 46681.23 | 20468.81 |
| 4 | 3770.96 | 11145.67 | 11145.67 | 7195.23 | 7195.23 | 9116.35 | 9116.35 | 24712.47 | 24712.47 | 33678.07 | 33678.07 | 46914.16 | 46914.16 | 18970.79 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 175 | 11820.86 | 7501.44 | 7501.44 | 9310.23 | 9310.23 | 21335.52 | 21335.52 | 40516.29 | 40516.29 | 49389.99 | 49389.99 | 30109.93 | 30109.93 | 17088.96 |
| 176 | 13093.80 | 7317.09 | 7317.09 | 9358.95 | 9358.95 | 22797.16 | 22797.16 | 40144.04 | 40144.04 | 50053.90 | 50053.90 | 29091.88 | 29091.88 | 16836.64 |
| 177 | 11329.99 | 7255.77 | 7255.77 | 9294.69 | 9294.69 | 23107.39 | 23107.39 | 38349.01 | 38349.01 | 46702.75 | 46702.75 | 28424.70 | 28424.70 | 17224.10 |
| 178 | 12400.63 | 7204.63 | 7204.63 | 9685.69 | 9685.69 | 23821.61 | 23821.61 | 38092.97 | 38092.97 | 48343.28 | 48343.28 | 26574.53 | 26574.53 | 17128.56 |
| 179 | 11903.13 | 7202.00 | 7202.00 | 9624.89 | 9624.89 | 23455.52 | 23455.52 | 35819.84 | 35819.84 | 48864.98 | 48864.98 | 22487.41 | 22487.41 | 17127.49 |
180 rows × 14 columns
In [25]:
close_slices = splitter.take(d1_data.close) close_slices
Out[25]:
split set
0 train Open time
2019-01-01 00:00:00+00:00 3797.1...
test Open time
2019-06-30 00:00:00+00:00 10854.1...
1 train Open time
2019-06-30 00:00:00+00:00 10854.1...
test Open time
2019-12-27 00:00:00+00:00 7254.74...
2 train Open time
2019-12-27 00:00:00+00:00 7254.74...
test Open time
2020-06-24 00:00:00+00:00 9296.4...
3 train Open time
2020-06-24 00:00:00+00:00 9296.4...
test Open time
2020-12-21 00:00:00+00:00 22719.7...
4 train Open time
2020-12-21 00:00:00+00:00 22719.7...
test Open time
2021-06-19 00:00:00+00:00 35483.7...
5 train Open time
2021-06-19 00:00:00+00:00 35483.7...
test Open time
2021-12-16 00:00:00+00:00 47632.3...
6 train Open time
2021-12-16 00:00:00+00:00 47632.3...
test Open time
2022-06-14 00:00:00+00:00 22136.4...
dtype: object
In [26]:
print("Total Nr. of Splits:",len(close_slices.index)) df_splits = pd.DataFrame(close_slices.index.tolist(), columns=["split", "period"]) unique_splits = df_splits["split"].unique().tolist() print("Unique Splits:", unique_splits) df_splits
Total Nr. of Splits: 14 Unique Splits: [0, 1, 2, 3, 4, 5, 6]
Out[26]:
<style scoped="">
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| split | period | |
|---|---|---|
| 0 | 0 | train |
| 1 | 0 | test |
| 2 | 1 | train |
| 3 | 1 | test |
| 4 | 2 | train |
| 5 | 2 | test |
| 6 | 3 | train |
| 7 | 3 | test |
| 8 | 4 | train |
| 9 | 4 | test |
| 10 | 5 | train |
| 11 | 5 | test |
| 12 | 6 | train |
| 13 | 6 | test |
In [27]:
def get_total_return(close_prices): return close_prices.vbt.to_returns().vbt.returns.total() base_line_returns = close_slices.apply(get_total_return) base_line_returns
Out[27]:
split set
0 train 2.134762
test -0.336472
1 train -0.336472
test 0.326704
2 train 0.326704
test 1.523051
3 train 1.523051
test 0.576598
4 train 0.576598
test 0.377110
5 train 0.377110
test -0.527897
6 train -0.527897
test -0.226275
dtype: float64
In [28]:
train_slices = [slice(close_slices[i, "train"].index[0], close_slices[i, "train"].index[-1]) for i in unique_splits] train_slices
Out[28]:
[slice(Timestamp('2019-01-01 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2019-06-29 00:00:00+0000', tz='UTC', freq='D'), None),
slice(Timestamp('2019-06-30 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2019-12-26 00:00:00+0000', tz='UTC', freq='D'), None),
slice(Timestamp('2019-12-27 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2020-06-23 00:00:00+0000', tz='UTC', freq='D'), None),
slice(Timestamp('2020-06-24 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2020-12-20 00:00:00+0000', tz='UTC', freq='D'), None),
slice(Timestamp('2020-12-21 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2021-06-18 00:00:00+0000', tz='UTC', freq='D'), None),
slice(Timestamp('2021-06-19 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2021-12-15 00:00:00+0000', tz='UTC', freq='D'), None),
slice(Timestamp('2021-12-16 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2022-06-13 00:00:00+0000', tz='UTC', freq='D'), None)]
In [29]:
test_slices = [slice(close_slices[i, "test"].index[0], close_slices[i, "test"].index[-1]) for i in unique_splits] test_slices
Out[29]:
[slice(Timestamp('2019-06-30 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2019-12-26 00:00:00+0000', tz='UTC', freq='D'), None),
slice(Timestamp('2019-12-27 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2020-06-23 00:00:00+0000', tz='UTC', freq='D'), None),
slice(Timestamp('2020-06-24 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2020-12-20 00:00:00+0000', tz='UTC', freq='D'), None),
slice(Timestamp('2020-12-21 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2021-06-18 00:00:00+0000', tz='UTC', freq='D'), None),
slice(Timestamp('2021-06-19 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2021-12-15 00:00:00+0000', tz='UTC', freq='D'), None),
slice(Timestamp('2021-12-16 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2022-06-13 00:00:00+0000', tz='UTC', freq='D'), None),
slice(Timestamp('2022-06-14 00:00:00+0000', tz='UTC', freq='D'), Timestamp('2022-12-10 00:00:00+0000', tz='UTC', freq='D'), None)]
Performance on train splits¶
In [30]:
train_perf = splitter.apply( apply_func = optimal_2BB, ## apply your strategy function to the splitter object, followed by its arguments lower_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720], condition = "x <= higher_tf"), higher_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720, 1440]), ltf_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)), bb_price_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)), bb_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)), bb_price_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)), bb_price_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)), bb_rsi_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)), bb_rsi_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)), output_metric = "sharpe_ratio", #### Arguments of splitter.apply() not related to strategy index = vbt.Takeable(splitter.index), ## DataTime index from the splitter object set_ = "train", ## Specify the set to be used for this CV simulation - train or test _random_subset = 500, ## Specify the nr. of simulations to run per train split merge_func ="concat", ## concat the results execute_kwargs=dict(show_progress=True), ## execute_kwargs control the execution of each split/set/range - Show Progress bar of the simulation _execute_kwargs=dict(show_progress=False, clear_cache=50, collect_garbage=50) ## _execute_kwargs control the execution of your parameter combinations ) train_perf.sort_values(ascending=False)
0%| | 0/7 [00:00<?, ?it/s]
Out[30]:
split lower_tf higher_tf ltf_rsi_timeperiod bb_price_timeperiod bb_rsi_timeperiod bb_price_nbdevup bb_price_nbdevdn bb_rsi_nbdevup bb_rsi_nbdevdn
4 1 1 20 18 20 1.75 1.50 1.75 2.25 8.036717
21 21 21 1.75 2.25 1.50 2.00 7.804521
18 19 21 2.00 2.25 1.50 2.25 7.350429
20 19 19 2.00 2.25 1.50 2.25 7.342806
21 20 22 2.00 2.25 1.75 2.50 7.026099
...
3 5 30 21 22 20 2.00 2.50 1.75 2.50 -3.449168
0 1 1440 18 18 18 1.50 2.25 2.00 2.00 NaN
19 19 21 2.50 2.50 1.75 1.75 NaN
20 19 19 2.00 2.25 1.75 2.50 NaN
22 22 18 2.25 2.50 1.75 1.75 NaN
Length: 3500, dtype: float64
View train split performance statistics
In [31]:
train_split_describe = pd.concat([train_perf[train_perf.index.get_level_values('split') == i].describe()\ for i in unique_splits], axis = 1, keys = [f"Train_Split_{i}" for i in unique_splits]) train_split_describe
Out[31]:
<style scoped="">
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| Train_Split_0 | Train_Split_1 | Train_Split_2 | Train_Split_3 | Train_Split_4 | Train_Split_5 | Train_Split_6 | |
|---|---|---|---|---|---|---|---|
| count | 496.000000 | 500.000000 | 500.000000 | 500.000000 | 500.000000 | 500.000000 | 500.000000 |
| mean | -0.430325 | -0.256053 | -0.012914 | -0.297292 | 0.423548 | 0.284101 | 0.127918 |
| std | 1.213900 | 1.050923 | 0.951679 | 1.606742 | 1.404564 | 1.090453 | 0.989352 |
| min | -3.042384 | -2.782880 | -2.549706 | -3.449168 | -3.107068 | -2.726325 | -2.549975 |
| 25% | -1.259347 | -0.963931 | -0.611205 | -1.370408 | -0.403063 | -0.484848 | -0.565334 |
| 50% | -0.741358 | -0.297287 | -0.181718 | -0.578251 | 0.254005 | 0.086147 | 0.068521 |
| 75% | 0.254324 | 0.434706 | 0.399538 | 0.444610 | 0.905467 | 0.966222 | 0.840372 |
| max | 4.418397 | 2.999906 | 4.448941 | 6.546613 | 8.036717 | 3.734348 | 3.144399 |
In [32]:
## Compute baseline, best and worst returns for the overlaid line plots train_split_best_returns = train_split_describe.loc['max'].reset_index(drop=True) train_split_worst_returns = train_split_describe.loc['min'].reset_index(drop=True) train_splits_baseline_returns = pd.Series([base_line_returns[i, "train"] for i in unique_splits]) ## Create Box Plot for train_performance statistics train_split_fig = train_perf.vbt.boxplot( by_level="split", trace_kwargs=dict( line=dict(color="lightskyblue"), opacity=0.4, showlegend=False ), xaxis_title="Train Splits", yaxis_title="Sharpe Ratio" ) train_split_best_returns.vbt.plot(trace_kwargs=dict(name="Best Returns", line=dict(color="limegreen", dash="dash")), fig=train_split_fig) train_split_worst_returns.vbt.plot(trace_kwargs=dict(name="Worst Returns", line=dict(color="tomato", dash="dash")), fig=train_split_fig) train_splits_baseline_returns.vbt.plot(trace_kwargs=dict(name="Baseline", line=dict(color="yellow", dash="dash")), fig=train_split_fig) train_split_fig.show()
Performance on test splits¶
In [33]:
test_perf = splitter.apply( apply_func = optimal_2BB, ## apply your strategy function to the splitter object, followed by its arguments lower_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720], condition = "x <= higher_tf"), higher_tf = vbt.Param([1, 5, 15, 30, 60, 120, 240, 720, 1440]), ltf_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)), bb_price_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)), bb_rsi_timeperiod = vbt.Param(create_list_numbers(18, 22, 1)), bb_price_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)), bb_price_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)), bb_rsi_nbdevup = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)), bb_rsi_nbdevdn = vbt.Param(create_list_numbers(1.5, 2.5, step = 0.25)), output_metric = "sharpe_ratio", #### Arguments of splitter.apply() not related to strategy index = vbt.Takeable(splitter.index), ## DataTime index from the splitter object _random_subset = 500, ## Specify the nr. of simulations to run per test split set_ = "test", ## Specify the set to be used for this CV simulation - train or test merge_func ="concat", ## concat the results execute_kwargs=dict(show_progress=True), ## execute_kwargs control the execution of each split/set/range - Show Progress bar of the simulation _execute_kwargs=dict(show_progress=False, clear_cache=50, collect_garbage=50) ## _execute_kwargs control the execution of your parameter combinations ) test_perf.sort_values(ascending=False)
0%| | 0/7 [00:00<?, ?it/s]
Out[33]:
split lower_tf higher_tf ltf_rsi_timeperiod bb_price_timeperiod bb_rsi_timeperiod bb_price_nbdevup bb_price_nbdevdn bb_rsi_nbdevup bb_rsi_nbdevdn
4 1 1 22 20 21 2.25 2.50 1.50 1.50 7.777249
18 22 20 2.25 2.00 1.75 1.75 7.665653
19 20 20 2.00 2.25 2.00 2.25 7.652166
3 1 1 18 18 20 2.25 2.25 1.75 1.75 7.082363
4 1 1 22 19 22 1.75 2.25 2.00 1.75 7.035542
...
0 1 1440 19 18 20 2.50 2.00 2.00 2.00 NaN
22 21 1.75 2.50 2.00 2.25 NaN
22 18 18 1.75 1.75 1.50 2.00 NaN
15 720 20 21 22 1.50 2.25 2.00 2.00 NaN
3 120 1440 20 18 20 2.50 1.50 2.00 2.25 NaN
Length: 3500, dtype: float64
View test split performance statistics
In [34]:
test_split_describe = pd.concat([test_perf[test_perf.index.get_level_values('split') == i].describe()\ for i in unique_splits], axis = 1, keys = [f"Test_Split_{i}" for i in unique_splits]) test_split_describe
Out[34]:
<style scoped="">
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| Test_Split_0 | Test_Split_1 | Test_Split_2 | Test_Split_3 | Test_Split_4 | Test_Split_5 | Test_Split_6 | |
|---|---|---|---|---|---|---|---|
| count | 496.000000 | 500.000000 | 500.000000 | 499.000000 | 500.000000 | 500.000000 | 500.000000 |
| mean | -0.407962 | -0.239648 | -0.037181 | -0.348490 | 0.414004 | 0.343005 | 0.155692 |
| std | 1.192991 | 1.040834 | 0.977560 | 1.614325 | 1.407563 | 1.207516 | 1.061513 |
| min | -2.745665 | -2.802081 | -2.625053 | -3.548850 | -3.107068 | -2.663871 | -3.188824 |
| 25% | -1.268158 | -0.928053 | -0.643853 | -1.352547 | -0.393175 | -0.488930 | -0.635615 |
| 50% | -0.683554 | -0.237556 | -0.206665 | -0.608184 | 0.267323 | 0.117486 | 0.109689 |
| 75% | 0.170201 | 0.382643 | 0.398758 | 0.251647 | 0.904010 | 0.995893 | 0.870143 |
| max | 3.443967 | 2.653158 | 3.703606 | 7.082363 | 7.777249 | 4.995166 | 3.511205 |
In [35]:
## Compute baseline, best and worst returns for the overlaid line plots test_split_best_returns = test_split_describe.loc['max'].reset_index(drop=True) test_split_worst_returns = test_split_describe.loc['min'].reset_index(drop=True) test_splits_baseline_returns = pd.Series([base_line_returns[i, "test"] for i in unique_splits]) ## Create Box Plot for test_performance statistics test_split_fig = test_perf.vbt.boxplot( by_level="split", trace_kwargs=dict( line=dict(color="lightskyblue"), opacity=0.4, showlegend=False ), xaxis_title="Test Splits", yaxis_title="Sharpe Ratio" ) test_split_best_returns.vbt.plot(trace_kwargs=dict(name="Best Returns", line=dict(color="limegreen", dash="dash")), fig=test_split_fig) test_split_worst_returns.vbt.plot(trace_kwargs=dict(name="Worst Returns", line=dict(color="tomato", dash="dash")), fig=test_split_fig) test_splits_baseline_returns.vbt.plot(trace_kwargs=dict(name="Baseline", line=dict(color="yellow", dash="dash")), fig=test_split_fig) test_split_fig.show()