{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Pairs trading" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from vectorbtpro import *\n", "# whats_imported()\n", "\n", "vbt.settings.set_theme(\"dark\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Selection" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "SYMBOLS = vbt.BinanceData.list_symbols(\"*USDT\")\n", "POOL_FILE = \"temp/data_pool.h5\"\n", "START = \"2018\"\n", "END = \"2023\"\n", "\n", "# vbt.remove_dir(\"temp\", with_contents=True, missing_ok=True)\n", "vbt.make_dir(\"temp\")\n", "\n", "if not vbt.file_exists(POOL_FILE):\n", " with vbt.ProgressBar(total=len(SYMBOLS)) as pbar:\n", " collected = 0\n", " for symbol in SYMBOLS:\n", " try:\n", " data = vbt.BinanceData.pull(\n", " symbol, \n", " start=START,\n", " end=END,\n", " show_progress=False,\n", " silence_warnings=True\n", " )\n", " data.to_hdf(POOL_FILE)\n", " collected += 1\n", " except Exception:\n", " pass\n", " pbar.set_prefix(f\"{symbol} ({collected})\")\n", " pbar.update()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "SELECT_START = \"2020\"\n", "SELECT_END = \"2021\"\n", "\n", "data = vbt.HDFData.pull(\n", " POOL_FILE, \n", " start=SELECT_START, \n", " end=SELECT_END, \n", " silence_warnings=True\n", ")\n", "\n", "print(len(data.symbols))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = data.select([\n", " k \n", " for k, v in data.data.items() \n", " if not v.isnull().any().any()\n", "])\n", "\n", "print(len(data.symbols))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@vbt.parameterized(\n", " merge_func=\"concat\", \n", " engine=\"pathos\",\n", " distribute=\"chunks\",\n", " n_chunks=\"auto\"\n", ")\n", "def coint_pvalue(close, s1, s2):\n", " import statsmodels.tsa.stattools as ts\n", " import numpy as np\n", " return ts.coint(np.log(close[s1]), np.log(close[s2]))[1]\n", "\n", "COINT_FILE = \"temp/coint_pvalues.pickle\"\n", "\n", "# vbt.remove_file(COINT_FILE, missing_ok=True)\n", "if not vbt.file_exists(COINT_FILE):\n", " coint_pvalues = coint_pvalue(\n", " data.close,\n", " vbt.Param(data.symbols, condition=\"s1 != s2\"),\n", " vbt.Param(data.symbols)\n", " )\n", " vbt.save(coint_pvalues, COINT_FILE)\n", "else:\n", " coint_pvalues = vbt.load(COINT_FILE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "coint_pvalues = coint_pvalues.sort_values()\n", "\n", "print(coint_pvalues)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "S1, S2 = \"ALGOUSDT\", \"QTUMUSDT\"\n", "\n", "data.plot(column=\"Close\", symbol=[S1, S2], base=1).show_svg()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "S1_log = np.log(data.get(\"Close\", S1))\n", "S2_log = np.log(data.get(\"Close\", S2))\n", "log_diff = (S2_log - S1_log).rename(\"Log diff\")\n", "fig = log_diff.vbt.plot()\n", "fig.add_hline(y=log_diff.mean(), line_color=\"yellow\", line_dash=\"dot\")\n", "fig.show_svg()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Testing" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "DATA_FILE = \"temp/data.pickle\"\n", "\n", "# vbt.remove_file(DATA_FILE, missing_ok=True)\n", "if not vbt.file_exists(DATA_FILE):\n", " data = vbt.BinanceData.pull(\n", " [S1, S2], \n", " start=SELECT_END,\n", " end=END, \n", " timeframe=\"hourly\"\n", " )\n", " vbt.save(data, DATA_FILE)\n", "else:\n", " data = vbt.load(DATA_FILE)\n", "\n", "print(len(data.index))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Level: Researcher" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import scipy.stats as st\n", "\n", "WINDOW = 24 * 30\n", "UPPER = st.norm.ppf(1 - 0.05 / 2)\n", "LOWER = -st.norm.ppf(1 - 0.05 / 2)\n", "\n", "S1_close = data.get(\"Close\", S1)\n", "S2_close = data.get(\"Close\", S2)\n", "ols = vbt.OLS.run(S1_close, S2_close, window=vbt.Default(WINDOW))\n", "spread = ols.error.rename(\"Spread\")\n", "zscore = ols.zscore.rename(\"Z-score\")\n", "print(pd.concat((spread, zscore), axis=1))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "upper_crossed = zscore.vbt.crossed_above(UPPER)\n", "lower_crossed = zscore.vbt.crossed_below(LOWER)\n", "\n", "fig = zscore.vbt.plot()\n", "fig.add_hline(y=UPPER, line_color=\"orangered\", line_dash=\"dot\")\n", "fig.add_hline(y=0, line_color=\"yellow\", line_dash=\"dot\")\n", "fig.add_hline(y=LOWER, line_color=\"limegreen\", line_dash=\"dot\")\n", "upper_crossed.vbt.signals.plot_as_exits(zscore, fig=fig)\n", "lower_crossed.vbt.signals.plot_as_entries(zscore, fig=fig)\n", "fig.show_svg()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "long_entries = data.symbol_wrapper.fill(False)\n", "short_entries = data.symbol_wrapper.fill(False)\n", "\n", "short_entries.loc[upper_crossed, S1] = True\n", "long_entries.loc[upper_crossed, S2] = True\n", "long_entries.loc[lower_crossed, S1] = True\n", "short_entries.loc[lower_crossed, S2] = True\n", "\n", "print(long_entries.sum())\n", "print(short_entries.sum())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pf = vbt.Portfolio.from_signals(\n", " data,\n", " entries=long_entries,\n", " short_entries=short_entries,\n", " size=10,\n", " size_type=\"valuepercent100\",\n", " group_by=True,\n", " cash_sharing=True,\n", " call_seq=\"auto\"\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig = pf.plot_allocations()\n", "rebalancing_dates = data.index[np.unique(pf.orders.idx.values)]\n", "for date in rebalancing_dates:\n", " fig.add_vline(x=date, line_color=\"teal\", line_dash=\"dot\")\n", "fig.show_svg()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pf.stats()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "allocations = data.symbol_wrapper.fill()\n", "allocations.loc[upper_crossed, S1] = -0.1\n", "allocations.loc[upper_crossed, S2] = 0.1\n", "allocations.loc[lower_crossed, S1] = 0.1\n", "allocations.loc[lower_crossed, S2] = -0.1\n", "pfo = vbt.PortfolioOptimizer.from_filled_allocations(allocations)\n", "pfo.plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(pfo.allocations)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pf = pfo.simulate(data, pf_method=\"from_signals\")\n", "pf.total_return" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "PTS_expr = \"\"\"\n", " PTS:\n", " x = @in_close.iloc[:, 0]\n", " y = @in_close.iloc[:, 1]\n", " ols = vbt.OLS.run(x, y, window=@p_window, hide_params=True)\n", " upper = st.norm.ppf(1 - @p_upper_alpha / 2)\n", " lower = -st.norm.ppf(1 - @p_lower_alpha / 2)\n", " upper_crossed = ols.zscore.vbt.crossed_above(upper)\n", " lower_crossed = ols.zscore.vbt.crossed_below(lower)\n", " long_entries = wrapper.fill(False)\n", " short_entries = wrapper.fill(False)\n", " short_entries.loc[upper_crossed, x.name] = True\n", " long_entries.loc[upper_crossed, y.name] = True\n", " long_entries.loc[lower_crossed, x.name] = True\n", " short_entries.loc[lower_crossed, y.name] = True\n", " long_entries, short_entries\n", "\"\"\"\n", "\n", "PTS = vbt.IF.from_expr(PTS_expr, keep_pd=True, st=st)\n", "vbt.phelp(PTS.run)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "WINDOW_SPACE = np.arange(5, 50).tolist()\n", "ALPHA_SPACE = (np.arange(1, 100) / 1000).tolist()\n", "\n", "long_entries, short_entries = data.run(\n", " PTS, \n", " window=WINDOW_SPACE,\n", " upper_alpha=ALPHA_SPACE,\n", " lower_alpha=ALPHA_SPACE,\n", " param_product=True,\n", " random_subset=1000,\n", " seed=42,\n", " unpack=True\n", ")\n", "print(long_entries.columns)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pf = vbt.Portfolio.from_signals(\n", " data,\n", " entries=long_entries,\n", " short_entries=short_entries,\n", " size=10,\n", " size_type=\"valuepercent100\",\n", " group_by=vbt.ExceptLevel(\"symbol\"),\n", " cash_sharing=True,\n", " call_seq=\"auto\"\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "opt_results = pd.concat((\n", " pf.total_return,\n", " pf.trades.expectancy,\n", "), axis=1)\n", "print(opt_results.sort_values(by=\"total_return\", ascending=False))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "best_index = opt_results.idxmax()[\"expectancy\"]\n", "best_long_entries = long_entries[best_index]\n", "best_short_entries = short_entries[best_index]\n", "STOP_SPACE = [np.nan] + np.arange(1, 100).tolist()\n", "\n", "pf = vbt.Portfolio.from_signals(\n", " data,\n", " entries=best_long_entries,\n", " short_entries=best_short_entries,\n", " size=10,\n", " size_type=\"valuepercent100\",\n", " group_by=vbt.ExceptLevel(\"symbol\"),\n", " cash_sharing=True,\n", " call_seq=\"auto\",\n", " sl_stop=vbt.Param(STOP_SPACE),\n", " tsl_stop=vbt.Param(STOP_SPACE),\n", " tp_stop=vbt.Param(STOP_SPACE),\n", " delta_format=\"percent100\",\n", " stop_exit_price=\"close\",\n", " broadcast_kwargs=dict(random_subset=1000, seed=42)\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "opt_results = pd.concat((\n", " pf.total_return,\n", " pf.trades.expectancy,\n", "), axis=1)\n", "print(opt_results.sort_values(by=\"total_return\", ascending=False))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def plot_metric_by_stop(stop_name, metric_name, stat_name, smooth):\n", " from scipy.signal import savgol_filter\n", "\n", " values = pf.deep_getattr(metric_name)\n", " values = values.vbt.select_levels(stop_name)\n", " values = getattr(values.groupby(values.index), stat_name)()\n", " smooth_values = savgol_filter(values, smooth, 1)\n", " smooth_values = values.vbt.wrapper.wrap(smooth_values)\n", " fig = values.rename(metric_name).vbt.plot()\n", " smooth_values.rename(f\"{metric_name} (smoothed)\").vbt.plot(\n", " trace_kwargs=dict(line=dict(dash=\"dot\", color=\"yellow\")),\n", " fig=fig, \n", " )\n", " return fig\n", "\n", "plot_metric_by_stop(\n", " \"sl_stop\", \n", " \"trades.expectancy\", \n", " \"median\",\n", " 10\n", ").show_svg()\n", "plot_metric_by_stop(\n", " \"tsl_stop\", \n", " \"trades.expectancy\", \n", " \"median\",\n", " 10\n", ").show_svg()\n", "plot_metric_by_stop(\n", " \"tp_stop\", \n", " \"trades.expectancy\", \n", " \"median\",\n", " 10\n", ").show_svg()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Level: Engineer" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@njit(nogil=True)\n", "def pt_signals_nb(close, window=WINDOW, upper=UPPER, lower=LOWER):\n", " x = np.expand_dims(close[:, 0], 1)\n", " y = np.expand_dims(close[:, 1], 1)\n", " _, _, zscore = vbt.ind_nb.ols_nb(x, y, window)\n", " zscore_1d = zscore[:, 0]\n", " upper_ts = np.full_like(zscore_1d, upper, dtype=np.float_)\n", " lower_ts = np.full_like(zscore_1d, lower, dtype=np.float_)\n", " upper_crossed = vbt.nb.crossed_above_1d_nb(zscore_1d, upper_ts)\n", " lower_crossed = vbt.nb.crossed_above_1d_nb(lower_ts, zscore_1d)\n", " long_entries = np.full_like(close, False, dtype=np.bool_)\n", " short_entries = np.full_like(close, False, dtype=np.bool_)\n", " short_entries[upper_crossed, 0] = True\n", " long_entries[upper_crossed, 1] = True\n", " long_entries[lower_crossed, 0] = True\n", " short_entries[lower_crossed, 1] = True\n", " return long_entries, short_entries" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "long_entries, short_entries = pt_signals_nb(data.close.values)\n", "long_entries = data.symbol_wrapper.wrap(long_entries)\n", "short_entries = data.symbol_wrapper.wrap(short_entries)\n", "\n", "print(long_entries.sum())\n", "print(short_entries.sum())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@njit(nogil=True)\n", "def pt_portfolio_nb(\n", " open, \n", " high, \n", " low, \n", " close,\n", " long_entries,\n", " short_entries,\n", " sl_stop=np.nan,\n", " tsl_stop=np.nan,\n", " tp_stop=np.nan,\n", "):\n", " target_shape = close.shape\n", " group_lens = np.array([2])\n", " sim_out = vbt.pf_nb.from_signals_nb(\n", " target_shape=target_shape,\n", " group_lens=group_lens,\n", " auto_call_seq=True,\n", " open=open,\n", " high=high,\n", " low=low,\n", " close=close,\n", " long_entries=long_entries,\n", " short_entries=short_entries,\n", " size=10,\n", " size_type=vbt.pf_enums.SizeType.ValuePercent100,\n", " sl_stop=sl_stop,\n", " tsl_stop=tsl_stop,\n", " tp_stop=tp_stop,\n", " delta_format=vbt.pf_enums.DeltaFormat.Percent100,\n", " stop_exit_price=vbt.pf_enums.StopExitPrice.Close\n", " )\n", " return sim_out" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sim_out = pt_portfolio_nb(\n", " data.open.values,\n", " data.high.values,\n", " data.low.values,\n", " data.close.values,\n", " long_entries.values,\n", " short_entries.values\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pf = vbt.Portfolio(\n", " data.symbol_wrapper.regroup(group_by=True),\n", " sim_out,\n", " open=data.open,\n", " high=data.high,\n", " low=data.low,\n", " close=data.close,\n", " cash_sharing=True,\n", " init_cash=100\n", ")\n", "\n", "print(pf.total_return)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@njit(nogil=True)\n", "def pt_metrics_nb(close, sim_out):\n", " target_shape = close.shape\n", " group_lens = np.array([2])\n", " filled_close = vbt.nb.fbfill_nb(close)\n", " col_map = vbt.rec_nb.col_map_nb(\n", " col_arr=sim_out.order_records[\"col\"], \n", " n_cols=target_shape[1]\n", " )\n", " total_profit = vbt.pf_nb.total_profit_nb(\n", " target_shape=target_shape,\n", " close=filled_close,\n", " order_records=sim_out.order_records,\n", " col_map=col_map\n", " )\n", " total_profit_grouped = vbt.pf_nb.total_profit_grouped_nb(\n", " total_profit=total_profit,\n", " group_lens=group_lens,\n", " )[0]\n", " total_return = total_profit_grouped / 100\n", " trade_records = vbt.pf_nb.get_exit_trades_nb(\n", " order_records=sim_out.order_records, \n", " close=filled_close, \n", " col_map=col_map\n", " )\n", " trade_records = trade_records[\n", " trade_records[\"status\"] == vbt.pf_enums.TradeStatus.Closed\n", " ]\n", " expectancy = vbt.pf_nb.expectancy_reduce_nb(\n", " pnl_arr=trade_records[\"pnl\"]\n", " )\n", " return total_return, expectancy" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pt_metrics_nb(data.close.values, sim_out)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@njit(nogil=True)\n", "def pt_pipeline_nb(\n", " open, \n", " high, \n", " low, \n", " close,\n", " window=WINDOW,\n", " upper=UPPER,\n", " lower=LOWER,\n", " sl_stop=np.nan,\n", " tsl_stop=np.nan,\n", " tp_stop=np.nan,\n", "):\n", " long_entries, short_entries = pt_signals_nb(\n", " close, \n", " window=window, \n", " upper=upper, \n", " lower=lower\n", " )\n", " sim_out = pt_portfolio_nb(\n", " open,\n", " high,\n", " low,\n", " close,\n", " long_entries,\n", " short_entries,\n", " sl_stop=sl_stop,\n", " tsl_stop=tsl_stop,\n", " tp_stop=tp_stop\n", " )\n", " return pt_metrics_nb(close, sim_out)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pt_pipeline_nb(\n", " data.open.values,\n", " data.high.values,\n", " data.low.values,\n", " data.close.values\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%timeit\n", "pt_pipeline_nb(\n", " data.open.values,\n", " data.high.values,\n", " data.low.values,\n", " data.close.values\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "param_pt_pipeline = vbt.parameterized(\n", " pt_pipeline_nb, \n", " merge_func=\"concat\",\n", " seed=42,\n", " engine=\"threadpool\",\n", " chunk_len=\"auto\"\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "UPPER_SPACE = [st.norm.ppf(1 - x / 2) for x in ALPHA_SPACE]\n", "LOWER_SPACE = [-st.norm.ppf(1 - x / 2) for x in ALPHA_SPACE]\n", "POPT_FILE = \"temp/param_opt.pickle\"\n", "\n", "# vbt.remove_file(POPT_FILE, missing_ok=True)\n", "if not vbt.file_exists(POPT_FILE):\n", " param_opt = param_pt_pipeline(\n", " data.open.values,\n", " data.high.values,\n", " data.low.values,\n", " data.close.values,\n", " window=vbt.Param(WINDOW_SPACE),\n", " upper=vbt.Param(UPPER_SPACE),\n", " lower=vbt.Param(LOWER_SPACE)\n", " )\n", " vbt.save(param_opt, POPT_FILE)\n", "else:\n", " param_opt = vbt.load(POPT_FILE)\n", "\n", "total_return, expectancy = param_opt" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(total_return)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "grouped_metric = total_return.groupby(level=[\"upper\", \"lower\"]).mean()\n", "grouped_metric.vbt.heatmap(\n", " trace_kwargs=dict(colorscale=\"RdBu\", zmid=0),\n", " yaxis=dict(autorange=\"reversed\")\n", ").show_svg()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@njit(nogil=True)\n", "def pt_pipeline_mult_nb(\n", " n_params: int,\n", " open: tp.Array2d, \n", " high: tp.Array2d, \n", " low: tp.Array2d, \n", " close: tp.Array2d,\n", " window: tp.FlexArray1dLike = WINDOW,\n", " upper: tp.FlexArray1dLike = UPPER,\n", " lower: tp.FlexArray1dLike = LOWER,\n", " sl_stop: tp.FlexArray1dLike = np.nan,\n", " tsl_stop: tp.FlexArray1dLike = np.nan,\n", " tp_stop: tp.FlexArray1dLike = np.nan,\n", "):\n", " window_ = vbt.to_1d_array_nb(np.asarray(window))\n", " upper_ = vbt.to_1d_array_nb(np.asarray(upper))\n", " lower_ = vbt.to_1d_array_nb(np.asarray(lower))\n", " sl_stop_ = vbt.to_1d_array_nb(np.asarray(sl_stop))\n", " tsl_stop_ = vbt.to_1d_array_nb(np.asarray(tsl_stop))\n", " tp_stop_ = vbt.to_1d_array_nb(np.asarray(tp_stop))\n", "\n", " total_return = np.empty(n_params, dtype=np.float_)\n", " expectancy = np.empty(n_params, dtype=np.float_)\n", "\n", " for i in range(n_params):\n", " total_return[i], expectancy[i] = pt_pipeline_nb(\n", " open,\n", " high,\n", " low,\n", " close,\n", " window=vbt.flex_select_1d_nb(window_, i),\n", " upper=vbt.flex_select_1d_nb(upper_, i),\n", " lower=vbt.flex_select_1d_nb(lower_, i),\n", " sl_stop=vbt.flex_select_1d_nb(sl_stop_, i),\n", " tsl_stop=vbt.flex_select_1d_nb(tsl_stop_, i),\n", " tp_stop=vbt.flex_select_1d_nb(tp_stop_, i),\n", " )\n", " return total_return, expectancy" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pt_pipeline_mult_nb(\n", " 3,\n", " data.open.values,\n", " data.high.values,\n", " data.low.values,\n", " data.close.values,\n", " window=np.array([10, 20, 30])\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "chunked_pt_pipeline = vbt.chunked(\n", " pt_pipeline_mult_nb,\n", " size=vbt.ArgSizer(arg_query=\"n_params\"),\n", " arg_take_spec=dict(\n", " n_params=vbt.CountAdapter(),\n", " open=None,\n", " high=None,\n", " low=None,\n", " close=None,\n", " window=vbt.FlexArraySlicer(),\n", " upper=vbt.FlexArraySlicer(),\n", " lower=vbt.FlexArraySlicer(),\n", " sl_stop=vbt.FlexArraySlicer(),\n", " tsl_stop=vbt.FlexArraySlicer(),\n", " tp_stop=vbt.FlexArraySlicer()\n", " ),\n", " chunk_len=1000,\n", " merge_func=\"concat\",\n", " execute_kwargs=dict(\n", " chunk_len=\"auto\",\n", " engine=\"threadpool\",\n", " )\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "param_product, param_index = vbt.combine_params(\n", " dict(\n", " window=vbt.Param(WINDOW_SPACE),\n", " upper=vbt.Param(UPPER_SPACE),\n", " lower=vbt.Param(LOWER_SPACE)\n", " )\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "COPT_FILE = \"temp/chunked_opt.pickle\"\n", "\n", "# vbt.remove_file(COPT_FILE, missing_ok=True)\n", "if not vbt.file_exists(COPT_FILE):\n", " chunked_opt = chunked_pt_pipeline(\n", " len(param_index),\n", " data.open.values,\n", " data.high.values,\n", " data.low.values,\n", " data.close.values,\n", " window=param_product[\"window\"],\n", " upper=param_product[\"upper\"],\n", " lower=param_product[\"lower\"]\n", " )\n", " vbt.save(chunked_opt, COPT_FILE)\n", "else:\n", " chunked_opt = vbt.load(COPT_FILE)\n", "\n", "total_return, expectancy = chunked_opt" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "total_return = pd.Series(total_return, index=param_index)\n", "expectancy = pd.Series(expectancy, index=param_index)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "GRID_LEN = len(WINDOW_SPACE) * \\\n", " len(UPPER_SPACE) * \\\n", " len(LOWER_SPACE) * \\\n", " len(STOP_SPACE) ** 3\n", "print(GRID_LEN)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "GRID = dict(\n", " window=WINDOW_SPACE,\n", " upper=UPPER_SPACE,\n", " lower=LOWER_SPACE,\n", " sl_stop=STOP_SPACE,\n", " tsl_stop=STOP_SPACE,\n", " tp_stop=STOP_SPACE,\n", ")\n", "vbt.pprint(vbt.pick_from_param_grid(GRID, 123_456_789))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "FOUND_FILE = \"temp/found.pickle\"\n", "BEST_N = 100\n", "BEST_TH = 1.0\n", "CHUNK_LEN = 10_000\n", "\n", "# vbt.remove_file(FOUND_FILE, missing_ok=True)\n", "if vbt.file_exists(FOUND_FILE):\n", " found = vbt.load(FOUND_FILE)\n", "else:\n", " found = None\n", "with (\n", " vbt.ProgressBar(\n", " desc=\"Found\", \n", " initial=0 if found is None else len(found),\n", " total=BEST_N\n", " ) as pbar1,\n", " vbt.ProgressBar(\n", " desc=\"Processed\"\n", " ) as pbar2\n", "):\n", " while found is None or len(found) < BEST_N:\n", " param_df = pd.DataFrame([\n", " vbt.pick_from_param_grid(GRID) \n", " for _ in range(CHUNK_LEN)\n", " ])\n", " param_index = pd.MultiIndex.from_frame(param_df)\n", " _, expectancy = chunked_pt_pipeline(\n", " CHUNK_LEN,\n", " data.open.values,\n", " data.high.values,\n", " data.low.values,\n", " data.close.values,\n", " window=param_df[\"window\"],\n", " upper=param_df[\"upper\"],\n", " lower=param_df[\"lower\"],\n", " sl_stop=param_df[\"sl_stop\"],\n", " tsl_stop=param_df[\"tsl_stop\"],\n", " tp_stop=param_df[\"tp_stop\"],\n", " _chunk_len=None,\n", " _execute_kwargs=dict(\n", " chunk_len=None\n", " )\n", " )\n", " expectancy = pd.Series(expectancy, index=param_index)\n", " best_mask = expectancy >= BEST_TH\n", " if best_mask.any():\n", " best = expectancy[best_mask]\n", " if found is None:\n", " found = best\n", " else:\n", " found = pd.concat((found, best))\n", " found = found[~found.index.duplicated(keep=\"first\")]\n", " vbt.save(found, FOUND_FILE)\n", " pbar1.update_to(len(found))\n", " pbar1.refresh()\n", " pbar2.update(len(expectancy))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_param_median(param):\n", " return found.index.get_level_values(param).to_series().median()\n", "\n", "pt_pipeline_nb(\n", " data.open.values, \n", " data.high.values, \n", " data.low.values, \n", " data.close.values,\n", " window=int(get_param_median(\"window\")),\n", " upper=get_param_median(\"upper\"),\n", " lower=get_param_median(\"lower\"),\n", " sl_stop=get_param_median(\"sl_stop\"),\n", " tsl_stop=get_param_median(\"tsl_stop\"),\n", " tp_stop=get_param_median(\"tp_stop\")\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import optuna\n", "\n", "optuna.logging.disable_default_handler()\n", "optuna.logging.set_verbosity(optuna.logging.WARNING)\n", "\n", "def objective(trial):\n", " window = trial.suggest_categorical(\"window\", WINDOW_SPACE)\n", " upper = trial.suggest_categorical(\"upper\", UPPER_SPACE)\n", " lower = trial.suggest_categorical(\"lower\", LOWER_SPACE)\n", " sl_stop = trial.suggest_categorical(\"sl_stop\", STOP_SPACE)\n", " tsl_stop = trial.suggest_categorical(\"tsl_stop\", STOP_SPACE)\n", " tp_stop = trial.suggest_categorical(\"tp_stop\", STOP_SPACE)\n", " total_return, expectancy = pt_pipeline_nb(\n", " data.open.values,\n", " data.high.values,\n", " data.low.values,\n", " data.close.values,\n", " window=window,\n", " upper=upper,\n", " lower=lower,\n", " sl_stop=sl_stop,\n", " tsl_stop=tsl_stop,\n", " tp_stop=tp_stop\n", " )\n", " if np.isnan(total_return):\n", " raise optuna.TrialPruned()\n", " if np.isnan(expectancy):\n", " raise optuna.TrialPruned()\n", " return total_return, expectancy\n", "\n", "study = optuna.create_study(directions=[\"maximize\", \"maximize\"])\n", "study.optimize(objective, n_trials=1000)\n", "\n", "trials_df = study.trials_dataframe(attrs=[\"params\", \"values\"])\n", "trials_df.set_index([\n", " \"params_window\", \n", " \"params_upper\", \n", " \"params_lower\",\n", " \"params_sl_stop\",\n", " \"params_tsl_stop\",\n", " \"params_tp_stop\"\n", "], inplace=True)\n", "trials_df.index.rename([\n", " \"window\", \n", " \"upper\", \n", " \"lower\",\n", " \"sl_stop\",\n", " \"tsl_stop\",\n", " \"tp_stop\"\n", "], inplace=True)\n", "trials_df.columns = [\"total_return\", \"expectancy\"]\n", "trials_df = trials_df[~trials_df.index.duplicated(keep=\"first\")]\n", "print(trials_df.sort_values(by=\"total_return\", ascending=False))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Level: Architect" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "InOutputs = namedtuple(\"InOutputs\", [\"spread\", \"zscore\"])\n", "\n", "@njit(nogil=True, boundscheck=True)\n", "def can_execute_nb(c, wait_days):\n", " if c.order_counts[c.col] == 0:\n", " return True\n", " last_order = c.order_records[c.order_counts[c.col] - 1, c.col]\n", " ns_delta = c.index[c.i] - c.index[last_order.idx]\n", " if ns_delta >= wait_days * vbt.dt_nb.d_ns:\n", " return True\n", " return False\n", "\n", "@njit(nogil=True, boundscheck=True)\n", "def create_signals_nb(c, upper, lower, wait_days):\n", " _upper = vbt.pf_nb.select_nb(c, upper)\n", " _lower = vbt.pf_nb.select_nb(c, lower)\n", " _wait_days = vbt.pf_nb.select_nb(c, wait_days)\n", "\n", " if c.i > 0:\n", " prev_zscore = c.in_outputs.zscore[c.i - 1, c.group]\n", " zscore = c.in_outputs.zscore[c.i, c.group]\n", " if prev_zscore < _upper and zscore > _upper:\n", " if can_execute_nb(c, _wait_days):\n", " if c.col % 2 == 0:\n", " return False, False, True, False\n", " return True, False, False, False\n", " if prev_zscore > _lower and zscore < _lower:\n", " if can_execute_nb(c, _wait_days):\n", " if c.col % 2 == 0:\n", " return True, False, False, False\n", " return False, False, True, False\n", " return False, False, False, False\n", "\n", "@njit(nogil=True, boundscheck=True)\n", "def signal_func_nb(c, window, upper, lower, wait_days):\n", " _window = vbt.pf_nb.select_nb(c, window)\n", " \n", " if c.col % 2 == 0:\n", " x = vbt.pf_nb.select_nb(c, c.close, col=c.col)\n", " y = vbt.pf_nb.select_nb(c, c.close, col=c.col + 1)\n", " c.in_outputs.spread[c.i, c.group] = np.log(y) - np.log(x)\n", " \n", " window_start = c.i - _window + 1\n", " window_end = c.i + 1\n", " if window_start >= 0:\n", " s = c.in_outputs.spread[window_start : window_end, c.group]\n", " s_mean = np.nanmean(s)\n", " s_std = np.nanstd(s)\n", " c.in_outputs.zscore[c.i, c.group] = (s[-1] - s_mean) / s_std\n", " return create_signals_nb(c, upper, lower, wait_days)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "WAIT_DAYS = 30\n", "\n", "def iter_pt_portfolio(\n", " window=WINDOW, \n", " upper=UPPER, \n", " lower=LOWER, \n", " wait_days=WAIT_DAYS,\n", " signal_func_nb=signal_func_nb,\n", " more_signal_args=(),\n", " **kwargs\n", "):\n", " return vbt.Portfolio.from_signals(\n", " data,\n", " broadcast_named_args=dict(\n", " window=window,\n", " upper=upper,\n", " lower=lower,\n", " wait_days=wait_days\n", " ),\n", " in_outputs=vbt.RepEval(\"\"\"\n", " InOutputs(\n", " np.full((target_shape[0], target_shape[1] // 2), np.nan), \n", " np.full((target_shape[0], target_shape[1] // 2), np.nan)\n", " )\n", " \"\"\", context=dict(InOutputs=InOutputs)),\n", " signal_func_nb=signal_func_nb,\n", " signal_args=(\n", " vbt.Rep(\"window\"),\n", " vbt.Rep(\"upper\"),\n", " vbt.Rep(\"lower\"),\n", " vbt.Rep(\"wait_days\"),\n", " *more_signal_args\n", " ),\n", " size=10,\n", " size_type=\"valuepercent100\",\n", " group_by=vbt.ExceptLevel(\"symbol\"),\n", " cash_sharing=True,\n", " call_seq=\"auto\",\n", " delta_format=\"percent100\",\n", " stop_exit_price=\"close\",\n", " **kwargs\n", " )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pf = iter_pt_portfolio()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig = vbt.make_subplots(\n", " rows=2, \n", " cols=1, \n", " vertical_spacing=0,\n", " shared_xaxes=True\n", ")\n", "zscore = pf.get_in_output(\"zscore\").rename(\"Z-score\")\n", "zscore.vbt.plot(\n", " add_trace_kwargs=dict(row=1, col=1),\n", " fig=fig\n", ")\n", "fig.add_hline(row=1, y=UPPER, line_color=\"orangered\", line_dash=\"dot\")\n", "fig.add_hline(row=1, y=0, line_color=\"yellow\", line_dash=\"dot\")\n", "fig.add_hline(row=1, y=LOWER, line_color=\"limegreen\", line_dash=\"dot\")\n", "orders = pf.orders.regroup(group_by=False).iloc[:, 0]\n", "exit_mask = orders.side_sell.get_pd_mask(idx_arr=\"signal_idx\")\n", "entry_mask = orders.side_buy.get_pd_mask(idx_arr=\"signal_idx\")\n", "upper_crossed = zscore.vbt.crossed_above(UPPER)\n", "lower_crossed = zscore.vbt.crossed_below(LOWER)\n", "(upper_crossed & ~exit_mask).vbt.signals.plot_as_exits(\n", " pf.get_in_output(\"zscore\"),\n", " trace_kwargs=dict(\n", " name=\"Exits (ignored)\", \n", " marker=dict(color=\"lightgray\"), \n", " opacity=0.5\n", " ),\n", " add_trace_kwargs=dict(row=1, col=1),\n", " fig=fig\n", ")\n", "(lower_crossed & ~entry_mask).vbt.signals.plot_as_entries(\n", " pf.get_in_output(\"zscore\"),\n", " trace_kwargs=dict(\n", " name=\"Entries (ignored)\", \n", " marker=dict(color=\"lightgray\"), \n", " opacity=0.5\n", " ),\n", " add_trace_kwargs=dict(row=1, col=1),\n", " fig=fig\n", ")\n", "exit_mask.vbt.signals.plot_as_exits(\n", " pf.get_in_output(\"zscore\"),\n", " add_trace_kwargs=dict(row=1, col=1),\n", " fig=fig\n", ")\n", "entry_mask.vbt.signals.plot_as_entries(\n", " pf.get_in_output(\"zscore\"),\n", " add_trace_kwargs=dict(row=1, col=1),\n", " fig=fig\n", ")\n", "pf.plot_allocations(\n", " add_trace_kwargs=dict(row=2, col=1),\n", " fig=fig\n", ")\n", "rebalancing_dates = data.index[np.unique(orders.idx.values)]\n", "for date in rebalancing_dates:\n", " fig.add_vline(row=2, x=date, line_color=\"teal\", line_dash=\"dot\")\n", "fig.update_layout(height=600)\n", "fig.show_svg()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "WAIT_SPACE = np.arange(30, 370, 5).tolist()\n", "\n", "pf = iter_pt_portfolio(wait_days=vbt.Param(WAIT_SPACE))\n", "pf.orders.count().vbt.scatterplot(\n", " xaxis_title=\"Wait days\",\n", " yaxis_title=\"Order count\"\n", ").show_svg()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with (vbt.Timer() as timer, vbt.MemTracer() as tracer):\n", " iter_pt_portfolio(wait_days=vbt.Param(WAIT_SPACE))\n", "print(timer.elapsed())\n", "print(tracer.peak_usage())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "zscore_state_dt = np.dtype(\n", " [\n", " (\"cumsum\", np.float_),\n", " (\"cumsum_sq\", np.float_),\n", " (\"nancnt\", np.int_)\n", " ],\n", " align=True,\n", ")\n", "\n", "@njit(nogil=True, boundscheck=True)\n", "def stream_signal_func_nb(\n", " c, \n", " window, \n", " upper, \n", " lower, \n", " wait_days, \n", " zscore_state\n", "):\n", " _window = vbt.pf_nb.select_nb(c, window)\n", " \n", " if c.col % 2 == 0:\n", " x = vbt.pf_nb.select_nb(c, c.close, col=c.col)\n", " y = vbt.pf_nb.select_nb(c, c.close, col=c.col + 1)\n", " c.in_outputs.spread[c.i, c.group] = np.log(y) - np.log(x)\n", " \n", " value = c.in_outputs.spread[c.i, c.group]\n", " pre_i = c.i - _window\n", " if pre_i >= 0:\n", " pre_window_value = c.in_outputs.spread[pre_i, c.group]\n", " else:\n", " pre_window_value = np.nan\n", " zscore_in_state = vbt.enums.RollZScoreAIS(\n", " i=c.i,\n", " value=value,\n", " pre_window_value=pre_window_value,\n", " cumsum=zscore_state[\"cumsum\"][c.group],\n", " cumsum_sq=zscore_state[\"cumsum_sq\"][c.group],\n", " nancnt=zscore_state[\"nancnt\"][c.group],\n", " window=_window,\n", " minp=_window,\n", " ddof=0\n", " )\n", " zscore_out_state = vbt.nb.rolling_zscore_acc_nb(zscore_in_state)\n", " c.in_outputs.zscore[c.i, c.group] = zscore_out_state.value\n", " zscore_state[\"cumsum\"][c.group] = zscore_out_state.cumsum\n", " zscore_state[\"cumsum_sq\"][c.group] = zscore_out_state.cumsum_sq\n", " zscore_state[\"nancnt\"][c.group] = zscore_out_state.nancnt\n", " \n", " return create_signals_nb(c, upper, lower, wait_days)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from functools import partial\n", "\n", "stream_pt_portfolio = partial(\n", " iter_pt_portfolio,\n", " signal_func_nb=stream_signal_func_nb,\n", " more_signal_args=(\n", " vbt.RepEval(\n", " \"\"\"\n", " zscore_state = np.empty(target_shape[1] // 2, dtype=zscore_state_dt)\n", " zscore_state[\"cumsum\"] = 0.0\n", " zscore_state[\"cumsum_sq\"] = 0.0\n", " zscore_state[\"nancnt\"] = 0\n", " zscore_state\n", " \"\"\", \n", " context=dict(zscore_state_dt=zscore_state_dt)\n", " ),\n", " )\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "stream_pf = stream_pt_portfolio()\n", "print(stream_pf.total_return)\n", "\n", "pf = iter_pt_portfolio()\n", "print(pf.total_return)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "stream_pt_portfolio(wait_days=vbt.Param(WAIT_SPACE)) # compilation\n", "with (vbt.Timer() as timer, vbt.MemTracer() as tracer):\n", " stream_pt_portfolio(wait_days=vbt.Param(WAIT_SPACE))\n", "print(timer.elapsed())\n", "print(tracer.peak_usage())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "chunked_stream_pt_portfolio = partial(\n", " stream_pt_portfolio,\n", " chunked=dict(\n", " engine=\"threadpool\",\n", " arg_take_spec=dict(\n", " signal_args=vbt.ArgsTaker(\n", " vbt.flex_array_gl_slicer,\n", " vbt.flex_array_gl_slicer,\n", " vbt.flex_array_gl_slicer,\n", " vbt.flex_array_gl_slicer,\n", " vbt.ArraySlicer(axis=0)\n", " ),\n", " in_outputs=vbt.SequenceTaker([\n", " vbt.ArraySlicer(axis=1),\n", " vbt.ArraySlicer(axis=1)\n", " ])\n", " )\n", " )\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "chunked_stream_pt_portfolio(wait_days=vbt.Param(WAIT_SPACE)) # compilation\n", "with (vbt.Timer() as timer, vbt.MemTracer() as tracer):\n", " chunked_stream_pt_portfolio(wait_days=vbt.Param(WAIT_SPACE))\n", "\n", "print(timer.elapsed())\n", "print(tracer.peak_usage())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 4 }