Initial commit after copying files from flawed repository

This commit is contained in:
David Brazda
2024-08-30 20:49:53 +02:00
commit c11ed9d474
47 changed files with 40520 additions and 0 deletions

View File

@ -0,0 +1,771 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Notebook for the article [Stop Loss, Trailing Stop, or Take Profit? 2 Million Backtests Shed Light](https://polakowo.medium.com/stop-loss-trailing-stop-or-take-profit-2-million-backtests-shed-light-dde23bda40be)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from vectorbtpro import *\n",
"# whats_imported()\n",
"\n",
"import ipywidgets\n",
"\n",
"vbt.settings.set_theme('dark')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"seed = 42\n",
"symbols = [\n",
" \"BTC-USD\", \"ETH-USD\", \"XRP-USD\", \"BCH-USD\", \"LTC-USD\", \n",
" \"BNB-USD\", \"EOS-USD\", \"XLM-USD\", \"XMR-USD\", \"ADA-USD\"\n",
"]\n",
"start_date = vbt.utc_timestamp(\"2018-01-01\")\n",
"end_date = vbt.utc_timestamp(\"2021-01-01\")\n",
"time_delta = end_date - start_date\n",
"window_len = vbt.timedelta(\"180d\")\n",
"window_cnt = 400\n",
"exit_types = [\"SL\", \"TS\", \"TP\", \"Random\", \"Holding\"]\n",
"step = 0.01\n",
"stops = np.arange(step, 1 + step, step)\n",
"\n",
"vbt.settings.wrapping[\"freq\"] = \"d\"\n",
"vbt.settings.plotting[\"layout\"][\"template\"] = \"vbt_dark\"\n",
"vbt.settings.portfolio[\"init_cash\"] = 100.\n",
"\n",
"print(pd.Series({\n",
" \"Start date\": start_date,\n",
" \"End date\": end_date,\n",
" \"Time period (days)\": time_delta.days,\n",
" \"Assets\": len(symbols),\n",
" \"Window length\": window_len,\n",
" \"Windows\": window_cnt,\n",
" \"Exit types\": len(exit_types),\n",
" \"Stop values\": len(stops),\n",
" \"Tests per asset\": window_cnt * len(stops) * len(exit_types),\n",
" \"Tests per window\": len(symbols) * len(stops) * len(exit_types),\n",
" \"Tests per exit type\": len(symbols) * window_cnt * len(stops),\n",
" \"Tests per stop type and value\": len(symbols) * window_cnt,\n",
" \"Tests total\": len(symbols) * window_cnt * len(stops) * len(exit_types)\n",
"}))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cols = [\"Open\", \"Low\", \"High\", \"Close\", \"Volume\"]\n",
"yfdata = vbt.YFData.pull(symbols, start=start_date, end=end_date)\n",
"\n",
"print(yfdata.data.keys())\n",
"print(yfdata.data[\"BTC-USD\"].shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"yfdata.plot(symbol=\"BTC-USD\").show_svg()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv = yfdata.concat()\n",
"\n",
"print(ohlcv.keys())\n",
"print(ohlcv[\"Open\"].shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"splitter = vbt.Splitter.from_n_rolling(\n",
" ohlcv[\"Open\"].index, \n",
" n=window_cnt,\n",
" length=window_len.days\n",
")\n",
"\n",
"split_ohlcv = {}\n",
"for k, v in ohlcv.items():\n",
" split_ohlcv[k] = splitter.take(v, into=\"reset_stacked\")\n",
"print(split_ohlcv[\"Open\"].shape)\n",
"\n",
"split_indexes = splitter.take(ohlcv[\"Open\"].index)\n",
"print(split_indexes)\n",
"print(split_indexes[10])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(split_ohlcv[\"Open\"].columns)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"entries = pd.DataFrame.vbt.signals.empty_like(split_ohlcv[\"Open\"])\n",
"entries.iloc[0, :] = True\n",
"\n",
"print(entries.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# We use OHLCSTX instead of built-in stop-loss in Portfolio.from_signals\n",
"# because we want to analyze signals before simulation + it's easier to construct param grids\n",
"# For reality check, run the same setup using Portfolio.from_signals alone\n",
"\n",
"sl_ohlcstx = vbt.OHLCSTX.run(\n",
" entries, \n",
" entry_price=split_ohlcv[\"Close\"], \n",
" open=split_ohlcv[\"Open\"], \n",
" high=split_ohlcv[\"High\"], \n",
" low=split_ohlcv[\"Low\"], \n",
" close=split_ohlcv[\"Close\"], \n",
" sl_stop=list(stops),\n",
" stop_type=None\n",
")\n",
"sl_exits = sl_ohlcstx.exits.copy()\n",
"sl_price = sl_ohlcstx.close.copy()\n",
"sl_price[sl_exits] = sl_ohlcstx.stop_price\n",
"del sl_ohlcstx\n",
"\n",
"print(sl_exits.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tsl_ohlcstx = vbt.OHLCSTX.run(\n",
" entries, \n",
" entry_price=split_ohlcv[\"Close\"], \n",
" open=split_ohlcv[\"Open\"], \n",
" high=split_ohlcv[\"High\"], \n",
" low=split_ohlcv[\"Low\"], \n",
" close=split_ohlcv[\"Close\"], \n",
" tsl_stop=list(stops),\n",
" stop_type=None\n",
")\n",
"tsl_exits = tsl_ohlcstx.exits.copy()\n",
"tsl_price = tsl_ohlcstx.close.copy()\n",
"tsl_price[tsl_exits] = tsl_ohlcstx.stop_price\n",
"del tsl_ohlcstx\n",
"\n",
"print(tsl_exits.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tp_ohlcstx = vbt.OHLCSTX.run(\n",
" entries, \n",
" entry_price=split_ohlcv[\"Close\"], \n",
" open=split_ohlcv[\"Open\"], \n",
" high=split_ohlcv[\"High\"], \n",
" low=split_ohlcv[\"Low\"], \n",
" close=split_ohlcv[\"Close\"], \n",
" tp_stop=list(stops),\n",
" stop_type=None\n",
")\n",
"tp_exits = tp_ohlcstx.exits.copy()\n",
"tp_price = tp_ohlcstx.close.copy()\n",
"tp_price[tp_exits] = tp_ohlcstx.stop_price\n",
"del tp_ohlcstx\n",
"\n",
"print(tp_exits.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def rename_stop_level(df):\n",
" return df.vbt.rename_levels({\n",
" \"ohlcstx_sl_stop\": \"stop_value\",\n",
" \"ohlcstx_tsl_stop\": \"stop_value\",\n",
" \"ohlcstx_tp_stop\": \"stop_value\"\n",
" }, strict=False)\n",
"\n",
"sl_exits = rename_stop_level(sl_exits)\n",
"tsl_exits = rename_stop_level(tsl_exits)\n",
"tp_exits = rename_stop_level(tp_exits)\n",
"\n",
"sl_price = rename_stop_level(sl_price)\n",
"tsl_price = rename_stop_level(tsl_price)\n",
"tp_price = rename_stop_level(tp_price)\n",
"\n",
"print(sl_exits.columns)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(pd.Series({\n",
" \"SL\": sl_exits.vbt.signals.total().mean(),\n",
" \"TS\": tsl_exits.vbt.signals.total().mean(),\n",
" \"TP\": tp_exits.vbt.signals.total().mean()\n",
"}, name=\"avg_num_signals\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def groupby_stop_value(df):\n",
" return df.vbt.signals.total().groupby(\"stop_value\").mean()\n",
"\n",
"pd.DataFrame({\n",
" \"Stop Loss\": groupby_stop_value(sl_exits),\n",
" \"Trailing Stop\": groupby_stop_value(tsl_exits),\n",
" \"Take Profit\": groupby_stop_value(tp_exits)\n",
"}).vbt.plot(\n",
" xaxis_title=\"Stop value\", \n",
" yaxis_title=\"Avg number of signals\"\n",
").show_svg()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sl_exits.iloc[-1, :] = True\n",
"tsl_exits.iloc[-1, :] = True\n",
"tp_exits.iloc[-1, :] = True\n",
"\n",
"sl_exits = sl_exits.vbt.signals.first_after(entries)\n",
"tsl_exits = tsl_exits.vbt.signals.first_after(entries)\n",
"tp_exits = tp_exits.vbt.signals.first_after(entries)\n",
"\n",
"print(pd.Series({\n",
" \"SL\": sl_exits.vbt.signals.total().mean(),\n",
" \"TS\": tsl_exits.vbt.signals.total().mean(),\n",
" \"TP\": tp_exits.vbt.signals.total().mean()\n",
"}, name=\"avg_num_signals\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hold_exits = pd.DataFrame.vbt.signals.empty_like(sl_exits)\n",
"hold_exits.iloc[-1, :] = True\n",
"hold_price = vbt.broadcast_to(split_ohlcv[\"Close\"], sl_price)\n",
"\n",
"print(hold_exits.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rand_exits = hold_exits.vbt.shuffle(seed=seed)\n",
"rand_price = hold_price\n",
"\n",
"print(rand_exits.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"exits = pd.DataFrame.vbt.concat(\n",
" sl_exits, \n",
" tsl_exits, \n",
" tp_exits, \n",
" rand_exits, \n",
" hold_exits, \n",
" keys=pd.Index(exit_types, name=\"exit_type\")\n",
")\n",
"del sl_exits\n",
"del tsl_exits\n",
"del tp_exits\n",
"del rand_exits\n",
"del hold_exits\n",
"\n",
"print(exits.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"price = pd.DataFrame.vbt.concat(\n",
" sl_price, \n",
" tsl_price, \n",
" tp_price, \n",
" rand_price, \n",
" hold_price, \n",
" keys=pd.Index(exit_types, name=\"exit_type\")\n",
")\n",
"del sl_price\n",
"del tsl_price\n",
"del tp_price\n",
"del rand_price\n",
"del hold_price\n",
"\n",
"print(price.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(exits.columns)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(exits.vbt.getsize())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(price.vbt.getsize())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"avg_distance = entries.vbt.signals.between_ranges(target=exits)\\\n",
" .duration.mean()\\\n",
" .groupby([\"exit_type\", \"stop_value\"])\\\n",
" .mean()\\\n",
" .unstack(level=\"exit_type\")\n",
"\n",
"print(avg_distance.mean())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"avg_distance[exit_types].vbt.plot(\n",
" xaxis_title=\"Stop value\", \n",
" yaxis_title=\"Avg distance to entry\"\n",
").show_svg()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"pf = vbt.Portfolio.from_signals(\n",
" split_ohlcv[\"Close\"], \n",
" entries, \n",
" exits, \n",
" price=price\n",
")\n",
"\n",
"print(len(pf.orders))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_return = pf.total_return\n",
"del pf\n",
"\n",
"print(total_return.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import gc\n",
"\n",
"total_returns = []\n",
"for i in vbt.ProgressBar(range(len(exit_types))):\n",
" chunk_mask = exits.columns.get_level_values(\"exit_type\") == exit_types[i]\n",
" chunk_pf = vbt.Portfolio.from_signals(\n",
" split_ohlcv[\"Close\"], \n",
" entries, \n",
" exits.loc[:, chunk_mask],\n",
" price=price.loc[:, chunk_mask]\n",
" )\n",
" total_returns.append(chunk_pf.total_return)\n",
" \n",
" del chunk_pf\n",
" gc.collect()\n",
" \n",
"total_return = pd.concat(total_returns)\n",
"\n",
"print(total_return.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"return_by_type = total_return.unstack(level=\"exit_type\")[exit_types]\n",
"\n",
"print(return_by_type[\"Holding\"].describe(percentiles=[]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"purple_color = vbt.settings[\"plotting\"][\"color_schema\"][\"purple\"]\n",
"return_by_type[\"Holding\"].vbt.histplot(\n",
" xaxis_title=\"Total return\",\n",
" xaxis_tickformat=\".2%\",\n",
" yaxis_title=\"Count\",\n",
" trace_kwargs=dict(marker_color=purple_color)\n",
").show_svg()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(pd.DataFrame({\n",
" \"Mean\": return_by_type.mean(),\n",
" \"Median\": return_by_type.median(),\n",
" \"Std\": return_by_type.std(),\n",
"}))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"return_by_type.vbt.boxplot(\n",
" trace_kwargs=dict(boxpoints=False),\n",
" yaxis_title=\"Total return\",\n",
" yaxis_tickformat=\".2%\"\n",
").show_svg()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print((return_by_type > 0).mean().rename(\"win_rate\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"init_cash = vbt.settings.portfolio[\"init_cash\"]\n",
"\n",
"def get_expectancy(return_by_type, level_name):\n",
" grouped = return_by_type.groupby(level_name, axis=0)\n",
" win_rate = grouped.apply(lambda x: (x > 0).mean())\n",
" avg_win = grouped.apply(lambda x: init_cash * x[x > 0].mean())\n",
" avg_win = avg_win.fillna(0)\n",
" avg_loss = grouped.apply(lambda x: init_cash * x[x < 0].mean())\n",
" avg_loss = avg_loss.fillna(0)\n",
" return win_rate * avg_win - (1 - win_rate) * np.abs(avg_loss)\n",
" \n",
"expectancy_by_stop = get_expectancy(return_by_type, \"stop_value\")\n",
"\n",
"print(expectancy_by_stop.mean())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"expectancy_by_stop.vbt.plot(\n",
" xaxis_title=\"Stop value\", \n",
" yaxis_title=\"Expectancy\"\n",
").show_svg()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"return_values = np.sort(return_by_type[\"Holding\"].values)\n",
"idxs = np.ceil(np.linspace(0, len(return_values) - 1, 21)).astype(int)\n",
"bins = return_values[idxs][:-1]\n",
"\n",
"def bin_return(return_by_type):\n",
" classes = pd.cut(return_by_type[\"Holding\"], bins=bins, right=True)\n",
" new_level = np.array(classes.apply(lambda x: x.right))\n",
" new_level = pd.Index(new_level, name=\"bin_right\")\n",
" return return_by_type.vbt.add_levels(new_level, axis=0)\n",
"\n",
"binned_return_by_type = bin_return(return_by_type)\n",
"\n",
"expectancy_by_bin = get_expectancy(binned_return_by_type, \"bin_right\")\n",
"\n",
"expectancy_by_bin.vbt.plot(\n",
" trace_kwargs=dict(mode=\"lines\"),\n",
" xaxis_title=\"Total return of holding\",\n",
" xaxis_tickformat=\".2%\",\n",
" yaxis_title=\"Expectancy\"\n",
").show_svg()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"range_starts = pd.DatetimeIndex(list(map(lambda x: x[0], split_indexes)))\n",
"range_ends = pd.DatetimeIndex(list(map(lambda x: x[-1], split_indexes)))\n",
"\n",
"symbol_lvl = return_by_type.index.get_level_values(\"symbol\")\n",
"split_lvl = return_by_type.index.get_level_values(\"split\")\n",
"range_start_lvl = range_starts[split_lvl]\n",
"range_end_lvl = range_ends[split_lvl]\n",
"\n",
"asset_multi_select = ipywidgets.SelectMultiple(\n",
" options=symbols,\n",
" value=symbols,\n",
" rows=len(symbols),\n",
" description=\"Symbols\"\n",
")\n",
"dates = np.unique(yfdata.wrapper.index)\n",
"date_range_slider = ipywidgets.SelectionRangeSlider(\n",
" options=dates,\n",
" index=(0, len(dates)-1),\n",
" orientation=\"horizontal\",\n",
" readout=False,\n",
" continuous_update=False\n",
")\n",
"range_start_label = ipywidgets.Label()\n",
"range_end_label = ipywidgets.Label()\n",
"metric_dropdown = ipywidgets.Dropdown(\n",
" options=[\"Mean\", \"Median\", \"Win Rate\", \"Expectancy\"],\n",
" value=\"Expectancy\"\n",
")\n",
"stop_scatter = vbt.Scatter(\n",
" trace_names=exit_types,\n",
" x_labels=stops, \n",
" xaxis_title=\"Stop value\", \n",
" yaxis_title=\"Expectancy\"\n",
")\n",
"stop_scatter_img = ipywidgets.Image(\n",
" format=\"png\",\n",
" width=stop_scatter.fig.layout.width,\n",
" height=stop_scatter.fig.layout.height\n",
")\n",
"bin_scatter = vbt.Scatter(\n",
" trace_names=exit_types,\n",
" x_labels=expectancy_by_bin.index, \n",
" trace_kwargs=dict(mode=\"lines\"),\n",
" xaxis_title=\"Total return of holding\",\n",
" xaxis_tickformat=\"%\",\n",
" yaxis_title=\"Expectancy\"\n",
")\n",
"bin_scatter_img = ipywidgets.Image(\n",
" format=\"png\",\n",
" width=bin_scatter.fig.layout.width,\n",
" height=bin_scatter.fig.layout.height\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def update_scatter(*args, **kwargs):\n",
" _symbols = asset_multi_select.value\n",
" _from = date_range_slider.value[0]\n",
" _to = date_range_slider.value[1]\n",
" _metric_name = metric_dropdown.value\n",
" \n",
" range_mask = (range_start_lvl >= _from) & (range_end_lvl <= _to)\n",
" asset_mask = symbol_lvl.isin(_symbols)\n",
" filt = return_by_type[range_mask & asset_mask]\n",
" \n",
" filt_binned = bin_return(filt)\n",
" if _metric_name == \"Mean\":\n",
" filt_metric = filt.groupby(\"stop_value\").mean()\n",
" filt_bin_metric = filt_binned.groupby(\"bin_right\").mean()\n",
" elif _metric_name == \"Median\":\n",
" filt_metric = filt.groupby(\"stop_value\").median()\n",
" filt_bin_metric = filt_binned.groupby(\"bin_right\").median()\n",
" elif _metric_name == \"Win Rate\":\n",
" filt_metric = (filt > 0).groupby(\"stop_value\").mean()\n",
" filt_bin_metric = (filt_binned > 0).groupby(\"bin_right\").mean()\n",
" elif _metric_name == \"Expectancy\":\n",
" filt_metric = get_expectancy(filt, \"stop_value\")\n",
" filt_bin_metric = get_expectancy(filt_binned, \"bin_right\")\n",
" \n",
" stop_scatter.fig.update_layout(yaxis_title=_metric_name)\n",
" stop_scatter.update(filt_metric)\n",
" stop_scatter_img.value = stop_scatter.fig.to_image(format=\"png\")\n",
" \n",
" bin_scatter.fig.update_layout(yaxis_title=_metric_name)\n",
" bin_scatter.update(filt_bin_metric)\n",
" bin_scatter_img.value = bin_scatter.fig.to_image(format=\"png\")\n",
" \n",
" range_start_label.value = np.datetime_as_string(_from.to_datetime64(), unit=\"D\")\n",
" range_end_label.value = np.datetime_as_string(_to.to_datetime64(), unit=\"D\")\n",
" \n",
"asset_multi_select.observe(update_scatter, names=\"value\")\n",
"date_range_slider.observe(update_scatter, names=\"value\")\n",
"metric_dropdown.observe(update_scatter, names=\"value\")\n",
"update_scatter()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dashboard = ipywidgets.VBox([\n",
" asset_multi_select,\n",
" ipywidgets.HBox([\n",
" range_start_label,\n",
" date_range_slider,\n",
" range_end_label\n",
" ]),\n",
" metric_dropdown,\n",
" stop_scatter_img,\n",
" bin_scatter_img\n",
"])\n",
"dashboard"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dashboard.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}