{ "cells": [ { "cell_type": "markdown", "id": "c69786c2-7c5b-4b1e-bd0c-52380c8df261", "metadata": {}, "source": [ "# Cross-validation" ] }, { "cell_type": "code", "execution_count": null, "id": "b2751478-95ce-47d2-9e80-4d47ed0c7b36", "metadata": {}, "outputs": [], "source": [ "from vectorbtpro import *\n", "# whats_imported()\n", "\n", "vbt.settings.set_theme(\"dark\")" ] }, { "cell_type": "code", "execution_count": null, "id": "6b45492f-d152-43e2-88a9-ab95bbffd546", "metadata": {}, "outputs": [], "source": [ "data = vbt.BinanceData.pull(\"BTCUSDT\", end=\"2022-11-01 UTC\")\n", "data.index" ] }, { "cell_type": "code", "execution_count": null, "id": "45122110-905e-4549-92f4-205c98bd9faa", "metadata": {}, "outputs": [], "source": [ "@vbt.parameterized(merge_func=\"concat\")\n", "def sma_crossover_perf(data, fast_window, slow_window):\n", " fast_sma = data.run(\"sma\", fast_window, short_name=\"fast_sma\")\n", " slow_sma = data.run(\"sma\", slow_window, short_name=\"slow_sma\")\n", " entries = fast_sma.real_crossed_above(slow_sma)\n", " exits = fast_sma.real_crossed_below(slow_sma)\n", " pf = vbt.Portfolio.from_signals(\n", " data, entries, exits, direction=\"both\")\n", " return pf.sharpe_ratio" ] }, { "cell_type": "code", "execution_count": null, "id": "93b5c9e1-7d18-4268-9c2f-91a5404bba0a", "metadata": {}, "outputs": [], "source": [ "perf = sma_crossover_perf(\n", " data[\"2020\":\"2020\"],\n", " vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n", " vbt.Param(np.arange(5, 50)),\n", " _execute_kwargs=dict(\n", " clear_cache=50,\n", " collect_garbage=50\n", " )\n", ")\n", "perf" ] }, { "cell_type": "code", "execution_count": null, "id": "804d9d36-d0c6-46ab-b0c4-5d00285f6b36", "metadata": {}, "outputs": [], "source": [ "perf.sort_values(ascending=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "5f9a5d58-aebe-4a34-8847-0bc91817e083", "metadata": {}, "outputs": [], "source": [ "best_fast_window, best_slow_window = perf.idxmax()\n", "sma_crossover_perf(\n", " data[\"2021\":\"2021\"],\n", " best_fast_window,\n", " best_slow_window\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "933ad509-009a-4f7a-9de2-6e04d23a9c6f", "metadata": {}, "outputs": [], "source": [ "data[\"2021\":\"2021\"].run(\"from_holding\").sharpe_ratio" ] }, { "cell_type": "code", "execution_count": null, "id": "075e0d7a-bb55-4d69-bcbe-277497058dc2", "metadata": {}, "outputs": [], "source": [ "start_index = data.index[0]\n", "period = pd.Timedelta(days=180)\n", "all_is_bounds = {}\n", "all_is_bl_perf = {}\n", "all_is_perf = {}\n", "all_oos_bounds = {}\n", "all_oos_bl_perf = {}\n", "all_oos_perf = {}\n", "split_idx = 0\n", "period_idx = 0\n", "\n", "with vbt.ProgressBar() as pbar:\n", " while start_index + 2 * period <= data.index[-1]:\n", " pbar.set_prefix(str(start_index))\n", "\n", " is_start_index = start_index\n", " is_end_index = start_index + period - pd.Timedelta(nanoseconds=1)\n", " is_data = data[is_start_index : is_end_index]\n", " is_bl_perf = is_data.run(\"from_holding\").sharpe_ratio\n", " is_perf = sma_crossover_perf(\n", " is_data,\n", " vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n", " vbt.Param(np.arange(5, 50)),\n", " _execute_kwargs=dict(\n", " clear_cache=50,\n", " collect_garbage=50\n", " )\n", " )\n", "\n", " oos_start_index = start_index + period\n", " oos_end_index = start_index + 2 * period - pd.Timedelta(nanoseconds=1)\n", " oos_data = data[oos_start_index : oos_end_index]\n", " oos_bl_perf = oos_data.run(\"from_holding\").sharpe_ratio\n", " best_fw, best_sw = is_perf.idxmax()\n", " oos_perf = sma_crossover_perf(oos_data, best_fw, best_sw)\n", " oos_perf_index = is_perf.index[is_perf.index == (best_fw, best_sw)]\n", " oos_perf = pd.Series([oos_perf], index=oos_perf_index)\n", "\n", " all_is_bounds[period_idx] = (is_start_index, is_end_index)\n", " all_oos_bounds[period_idx + 1] = (oos_start_index, oos_end_index)\n", " all_is_bl_perf[(split_idx, period_idx)] = is_bl_perf\n", " all_oos_bl_perf[(split_idx, period_idx + 1)] = oos_bl_perf\n", " all_is_perf[(split_idx, period_idx)] = is_perf\n", " all_oos_perf[(split_idx, period_idx + 1)] = oos_perf\n", " start_index = start_index + period\n", " split_idx += 1\n", " period_idx += 1\n", " pbar.update()" ] }, { "cell_type": "code", "execution_count": null, "id": "e9cb8334-c44c-4580-819a-6eb501e6bb1b", "metadata": {}, "outputs": [], "source": [ "is_period_ranges = pd.DataFrame.from_dict(\n", " all_is_bounds, \n", " orient=\"index\",\n", " columns=[\"start\", \"end\"]\n", ")\n", "is_period_ranges.index.name = \"period\"\n", "oos_period_ranges = pd.DataFrame.from_dict(\n", " all_oos_bounds, \n", " orient=\"index\",\n", " columns=[\"start\", \"end\"]\n", ")\n", "oos_period_ranges.index.name = \"period\"\n", "period_ranges = pd.concat((is_period_ranges, oos_period_ranges))\n", "period_ranges = period_ranges.drop_duplicates()\n", "print(period_ranges)" ] }, { "cell_type": "code", "execution_count": null, "id": "9800c089-7514-4a16-a72c-65ab7455ae26", "metadata": {}, "outputs": [], "source": [ "is_bl_perf = pd.Series(all_is_bl_perf)\n", "is_bl_perf.index.names = [\"split\", \"period\"]\n", "oos_bl_perf = pd.Series(all_oos_bl_perf)\n", "oos_bl_perf.index.names = [\"split\", \"period\"]\n", "bl_perf = pd.concat((\n", " is_bl_perf.vbt.select_levels(\"period\"), \n", " oos_bl_perf.vbt.select_levels(\"period\")\n", "))\n", "bl_perf = bl_perf.drop_duplicates()\n", "bl_perf" ] }, { "cell_type": "code", "execution_count": null, "id": "44f04653-946d-47ec-8857-f063f07e7cc3", "metadata": {}, "outputs": [], "source": [ "is_perf = pd.concat(all_is_perf, names=[\"split\", \"period\"])\n", "is_perf" ] }, { "cell_type": "code", "execution_count": null, "id": "f20ae091-36b8-4811-b789-2a99dc9f0f81", "metadata": {}, "outputs": [], "source": [ "oos_perf = pd.concat(all_oos_perf, names=[\"split\", \"period\"])\n", "oos_perf" ] }, { "cell_type": "code", "execution_count": null, "id": "e73f8adc-bf80-4955-a86e-ebcb5f8254b5", "metadata": {}, "outputs": [], "source": [ "is_best_mask = is_perf.index.vbt.drop_levels(\"period\").isin(\n", " oos_perf.index.vbt.drop_levels(\"period\"))\n", "is_best_perf = is_perf[is_best_mask]\n", "is_best_perf" ] }, { "cell_type": "code", "execution_count": null, "id": "b77ffdd0-dbd2-4473-9eb4-ad7d36fc7625", "metadata": {}, "outputs": [], "source": [ "print(pd.concat((\n", " is_perf.describe(),\n", " is_best_perf.describe(),\n", " is_bl_perf.describe(),\n", " oos_perf.describe(),\n", " oos_bl_perf.describe()\n", "), axis=1, keys=[\n", " \"IS\", \n", " \"IS (Best)\", \n", " \"IS (Baseline)\", \n", " \"OOS (Test)\", \n", " \"OOS (Baseline)\"\n", "]))" ] }, { "cell_type": "code", "execution_count": null, "id": "a0f37d37-8051-4f48-b6bd-ad9ee882bd0b", "metadata": {}, "outputs": [], "source": [ "fig = is_perf.vbt.boxplot(\n", " by_level=\"period\",\n", " trace_kwargs=dict(\n", " line=dict(color=\"lightskyblue\"), \n", " opacity=0.4,\n", " showlegend=False\n", " ),\n", " xaxis_title=\"Period\", \n", " yaxis_title=\"Sharpe\",\n", ")\n", "fig = is_best_perf.vbt.select_levels(\"period\").vbt.plot(\n", " trace_kwargs=dict(\n", " name=\"Best\", \n", " line=dict(color=\"limegreen\", dash=\"dash\")\n", " ), \n", " fig=fig\n", ")\n", "fig = bl_perf.vbt.plot(\n", " trace_kwargs=dict(\n", " name=\"Baseline\", \n", " line=dict(color=\"orange\", dash=\"dash\")\n", " ), \n", " fig=fig\n", ")\n", "fig = oos_perf.vbt.select_levels(\"period\").vbt.plot(\n", " trace_kwargs=dict(\n", " name=\"Test\", \n", " line=dict(color=\"orangered\")\n", " ), \n", " fig=fig\n", ")\n", "fig.show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "a3e52247-958a-40ba-af30-7a5f1a55b744", "metadata": {}, "outputs": [], "source": [ "is_perf_split6 = is_perf.xs(6, level=\"split\")\n", "is_perf_split6.describe()" ] }, { "cell_type": "code", "execution_count": null, "id": "53a0d648-a49e-48c5-b7a2-23fb8b18db03", "metadata": {}, "outputs": [], "source": [ "first_left_bound = period_ranges.loc[6, \"start\"]\n", "first_right_bound = period_ranges.loc[6, \"end\"]\n", "data[first_left_bound : first_right_bound].plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "11a80453-5f53-4a7e-92b8-79834dd546af", "metadata": {}, "outputs": [], "source": [ "oos_perf.xs(6, level=\"period\")" ] }, { "cell_type": "code", "execution_count": null, "id": "8b01f9be-a5b8-433d-aab3-c0c51219ce56", "metadata": {}, "outputs": [], "source": [ "is_perf_split6.quantile(0.25)" ] }, { "cell_type": "markdown", "id": "c0b946fb-81de-4183-9d47-9defc4f0f44e", "metadata": {}, "source": [ "## Splitter" ] }, { "cell_type": "code", "execution_count": null, "id": "113d8739-60b1-40d3-b031-984d7c7adf17", "metadata": {}, "outputs": [], "source": [ "splitter = vbt.Splitter.from_rolling(\n", " data.index, \n", " length=360, \n", " split=0.5,\n", " set_labels=[\"IS\", \"OOS\"]\n", ")\n", "splitter.plot().show_svg()" ] }, { "cell_type": "markdown", "id": "9e0edb2e-8e5b-4554-8c3b-8a84b54279fe", "metadata": {}, "source": [ "### Schema" ] }, { "cell_type": "code", "execution_count": null, "id": "92af91ef-5b3e-4869-9ffd-cf5451714e94", "metadata": {}, "outputs": [], "source": [ "print(splitter.splits)" ] }, { "cell_type": "code", "execution_count": null, "id": "31376ef0-288f-461e-8fa7-2023c85248bf", "metadata": {}, "outputs": [], "source": [ "splitter.index" ] }, { "cell_type": "code", "execution_count": null, "id": "ed969ea9-de5b-4501-9093-7628fb3241ae", "metadata": {}, "outputs": [], "source": [ "splitter.wrapper.index" ] }, { "cell_type": "code", "execution_count": null, "id": "c204a8e8-a640-4056-bbdd-2bfb81bbd7a7", "metadata": {}, "outputs": [], "source": [ "splitter.wrapper.columns" ] }, { "cell_type": "code", "execution_count": null, "id": "ac099a17-7a4a-4227-8782-f71f089e5943", "metadata": {}, "outputs": [], "source": [ "oos_splitter = splitter[\"OOS\"]\n", "print(oos_splitter.splits)" ] }, { "cell_type": "markdown", "id": "f043f402-5a79-4a74-a355-12bac03e0235", "metadata": {}, "source": [ "#### Range format" ] }, { "cell_type": "code", "execution_count": null, "id": "87ab6012-b22b-4a7d-81f7-0b2d8f6ffce9", "metadata": {}, "outputs": [], "source": [ "index = vbt.date_range(\"2020\", periods=14)\n", "index[slice(1, 7)]" ] }, { "cell_type": "code", "execution_count": null, "id": "8c6462f8-0f46-4237-a499-b63cd961b55b", "metadata": {}, "outputs": [], "source": [ "index[1], index[6]" ] }, { "cell_type": "markdown", "id": "c2289e7e-6dbc-46cd-9026-09716dd283c4", "metadata": {}, "source": [ "##### Relative" ] }, { "cell_type": "code", "execution_count": null, "id": "980722dd-e524-4ab7-9e75-4871fdbcf31d", "metadata": {}, "outputs": [], "source": [ "rel_range = vbt.RelRange(offset=10, length=40)\n", "rel_range" ] }, { "cell_type": "code", "execution_count": null, "id": "9e6c987f-ead6-47f2-978a-2c46bbe995b0", "metadata": {}, "outputs": [], "source": [ "rel_range.to_slice(total_len=len(splitter.index), prev_end=100)" ] }, { "cell_type": "markdown", "id": "f835ba92-9d19-484f-aead-6c11c99a4ad6", "metadata": {}, "source": [ "#### Array format" ] }, { "cell_type": "code", "execution_count": null, "id": "41c25ca4-bf22-4e11-bade-d115f4b692f0", "metadata": {}, "outputs": [], "source": [ "index = vbt.date_range(\"2020\", \"2021\", freq=\"1min\")\n", "range_ = np.arange(len(index))\n", "range_.nbytes / 1024 / 1024" ] }, { "cell_type": "code", "execution_count": null, "id": "91d67769-695f-4a29-83f6-1b09d77f9397", "metadata": {}, "outputs": [], "source": [ "range_ = np.full(len(index), True)\n", "range_.nbytes / 1024 / 1024" ] }, { "cell_type": "code", "execution_count": null, "id": "95a932ae-317d-4168-81ba-4045716d6eda", "metadata": {}, "outputs": [], "source": [ "splitter.splits_arr.dtype" ] }, { "cell_type": "code", "execution_count": null, "id": "f73dab8c-8488-48bc-8824-9f263116fa9b", "metadata": {}, "outputs": [], "source": [ "id(slice(0, 180, None))" ] }, { "cell_type": "code", "execution_count": null, "id": "328d2ec2-d2cc-4550-b0ae-273287cdd05f", "metadata": {}, "outputs": [], "source": [ "range_00 = np.arange(0, 5)\n", "range_01 = np.arange(5, 15)\n", "range_10 = np.arange(15, 30)\n", "range_11 = np.arange(30, 50)\n", "\n", "ind_splitter = vbt.Splitter.from_splits(\n", " data.index,\n", " [[range_00, range_01], [range_10, range_11]],\n", " fix_ranges=False\n", ")\n", "print(ind_splitter.splits)" ] }, { "cell_type": "code", "execution_count": null, "id": "deae0ac0-0218-4835-aedd-6dbc5696cc75", "metadata": {}, "outputs": [], "source": [ "ind_splitter.splits.loc[0, \"set_1\"]" ] }, { "cell_type": "code", "execution_count": null, "id": "35f60dad-3a36-4c62-b07d-c36fb7a1b9da", "metadata": {}, "outputs": [], "source": [ "ind_splitter.splits.loc[0, \"set_1\"].range_" ] }, { "cell_type": "markdown", "id": "6326885f-9be3-4416-a868-187bfc2808d6", "metadata": {}, "source": [ "### Preparation" ] }, { "cell_type": "markdown", "id": "79a85a1f-eae2-49ff-b239-574ff90eadc4", "metadata": {}, "source": [ "#### Splits" ] }, { "cell_type": "code", "execution_count": null, "id": "acad6284-91bb-48d0-86bd-f3324236fa40", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.split_range(\n", " slice(None),\n", " (vbt.RelRange(length=0.75), vbt.RelRange()),\n", " index=data.index\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "4674b573-34fd-4d15-bc81-0adda3784086", "metadata": {}, "outputs": [], "source": [ "splitter.split_range(\n", " slice(None),\n", " (vbt.RelRange(length=0.75), vbt.RelRange())\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "244f8814-d37e-4804-97fc-4586223f308d", "metadata": {}, "outputs": [], "source": [ "data[slice(0, 1426, None)]" ] }, { "cell_type": "code", "execution_count": null, "id": "a2637af2-a0d0-47bf-bf97-a9cfc9143a06", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.split_range(\n", " slice(None), \n", " 0.75, \n", " index=data.index\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "d79151bf-1d01-436b-a4b4-3f73e1016e4f", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.split_range(\n", " slice(None), \n", " -0.25,\n", " index=data.index\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "ab0e6009-1b0e-4f0f-838a-cfe4a87f7406", "metadata": {}, "outputs": [], "source": [ "int(0.75 * len(data.index))" ] }, { "cell_type": "code", "execution_count": null, "id": "c87ffde6-b565-457a-bb97-0be21789705e", "metadata": {}, "outputs": [], "source": [ "len(data.index) - int(0.25 * len(data.index))" ] }, { "cell_type": "code", "execution_count": null, "id": "653cc6d7-2236-4b12-ba92-a115cab15656", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.split_range(\n", " slice(None), \n", " (vbt.RelRange(), vbt.RelRange(length=0.25)),\n", " backwards=True,\n", " index=data.index\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "0065dda0-0bfd-4ce0-b6bc-693c7f0960a5", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.split_range(\n", " slice(None), \n", " (1.0, 30), \n", " backwards=True,\n", " index=data.index\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "a4c1bc39-70e6-4b94-97d5-1472c705e70b", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.split_range(\n", " slice(None), \n", " (\n", " vbt.RelRange(length=0.4, length_space=\"all\"), \n", " vbt.RelRange(length=0.4, length_space=\"all\"),\n", " vbt.RelRange()\n", " ),\n", " index=data.index\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "d2c201c9-f114-4f3b-ad47-bcee006d7953", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.split_range(\n", " slice(None),\n", " (vbt.RelRange(length=0.75), vbt.RelRange(offset=1)),\n", " index=data.index\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "6f23a330-cb2f-4a15-8a09-08504c4cd48a", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.split_range(\n", " slice(None), \n", " (\n", " vbt.RelRange(length=0.75), \n", " vbt.RelRange(length=1, is_gap=True),\n", " vbt.RelRange()\n", " ),\n", " index=data.index\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "958e0e1c-2735-4568-b05b-b29c8b4f7560", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.split_range(\n", " slice(None), \n", " (np.array([3, 4, 5]), np.array([6, 8, 10])),\n", " index=data.index\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "a60e0aaa-d581-4530-a747-3067c8678650", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.split_range(\n", " slice(None), \n", " (np.array([3, 4, 5]), np.array([6, 8, 10])),\n", " range_format=\"indices\",\n", " index=data.index\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "4249fa3b-ef6f-41b7-8d8d-81723e6ce48a", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.split_range(\n", " slice(None), \n", " (slice(\"2020\", \"2021\"), slice(\"2021\", \"2022\")),\n", " index=data.index\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "bfbae80f-a314-41f9-8c49-2111f4df01d9", "metadata": {}, "outputs": [], "source": [ "data.index[867:1233]" ] }, { "cell_type": "code", "execution_count": null, "id": "debcb754-e691-4e59-b318-61486af3fbf4", "metadata": {}, "outputs": [], "source": [ "data.index[1233:1598]" ] }, { "cell_type": "code", "execution_count": null, "id": "7d1340f2-d1b9-4fbe-9898-d41620e3d30d", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.split_range(\n", " slice(None), \n", " (\n", " vbt.RelRange(length=\"180 days\"), \n", " vbt.RelRange(offset=\"1 day\", length=\"90 days\")\n", " ),\n", " index=data.index\n", ")" ] }, { "cell_type": "markdown", "id": "8638db99-b5a3-4ce8-adcc-f3ebf7c59b71", "metadata": {}, "source": [ "#### Method" ] }, { "cell_type": "code", "execution_count": null, "id": "b6a99b43-4326-4d37-85fc-2f6b0076ebd4", "metadata": {}, "outputs": [], "source": [ "manual_splitter = vbt.Splitter.from_splits(\n", " data.index,\n", " [\n", " (vbt.RelRange(), vbt.RelRange(offset=0.5, length=0.25, length_space=\"all\")),\n", " (vbt.RelRange(), vbt.RelRange(offset=0.25, length=0.25, length_space=\"all\")),\n", " (vbt.RelRange(), vbt.RelRange(offset=0, length=0.25, length_space=\"all\")),\n", " ],\n", " split_range_kwargs=dict(backwards=True),\n", " set_labels=[\"IS\", \"OOS\"]\n", ")\n", "print(manual_splitter.splits)" ] }, { "cell_type": "code", "execution_count": null, "id": "871ceeb6-533d-48ce-803c-49106fc6d807", "metadata": {}, "outputs": [], "source": [ "manual_splitter.plot().show_svg()" ] }, { "cell_type": "markdown", "id": "22246385-ae17-47db-8815-71788c60453d", "metadata": {}, "source": [ "### Generation" ] }, { "cell_type": "markdown", "id": "e6dbecb8-fd62-4a3d-9682-24deb81a8da0", "metadata": {}, "source": [ "#### Rolling" ] }, { "cell_type": "code", "execution_count": null, "id": "d0ab8ccc-dfe7-4612-8a39-3017795e0486", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.from_rolling(\n", " data.index,\n", " length=360,\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "02d5d230-30c0-4156-a493-6ef4157d727a", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.from_rolling(\n", " data.index, \n", " length=360,\n", " offset=90\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "ba270114-7b18-45d0-a542-7bcfe3c0ddc3", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.from_rolling(\n", " data.index, \n", " length=360,\n", " offset=-0.5\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "f47fb9c2-eebd-40f0-b901-da3dd54ae721", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.from_rolling(\n", " data.index, \n", " length=360,\n", " split=0.5\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "66314ec5-2a84-4343-8b08-9712bccc2d37", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.from_rolling(\n", " data.index,\n", " length=360,\n", " split=0.5,\n", " offset_anchor_set=None\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "2c0661b1-df61-4792-9da6-720d6866e7c5", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.from_n_rolling(\n", " data.index,\n", " n=5,\n", " split=0.5\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "34c752b3-ed6c-4752-b835-e6a0003ff1e5", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.from_n_rolling(\n", " data.index,\n", " n=3,\n", " length=360,\n", " split=0.5\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "1b40afcc-b3d1-47c3-9f84-f30d1840333e", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.from_n_rolling(\n", " data.index,\n", " n=7,\n", " length=360,\n", " split=0.5\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "21f304b2-205c-4cb3-a231-905e266d437a", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.from_expanding(\n", " data.index, \n", " min_length=360,\n", " offset=180,\n", " split=-180\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "59a2a68e-b863-4526-aa79-6694b825ac4f", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.from_n_expanding(\n", " data.index, \n", " n=5,\n", " min_length=360,\n", " split=-180\n", ").plot().show_svg()" ] }, { "cell_type": "markdown", "id": "2bd6889b-8da8-4bc6-be03-23a4e7eca992", "metadata": {}, "source": [ "#### Anchored" ] }, { "cell_type": "code", "execution_count": null, "id": "1ef7b028-471b-4033-a3c4-1aaa3354a237", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.from_ranges(\n", " data.index,\n", " every=\"Y\",\n", " split=0.5\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "2eed590b-93be-4ae5-967d-8b9c04bf3990", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.from_ranges(\n", " data.index,\n", " every=\"Q\",\n", " lookback_period=\"Y\",\n", " split=0.5\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "69154548-b1e8-4ecd-95fd-9be2f5466ed4", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.from_ranges(\n", " data.index,\n", " every=\"Q\",\n", " lookback_period=\"Y\",\n", " split=(\n", " vbt.RepEval(\"index.month != index.month[-1]\"),\n", " vbt.RepEval(\"index.month == index.month[-1]\")\n", " )\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "1562819f-2715-41b2-977e-6e046aba4e0b", "metadata": {}, "outputs": [], "source": [ "def qyear(index):\n", " return index.to_period(\"Q\")\n", "\n", "vbt.Splitter.from_ranges(\n", " data.index,\n", " start=0,\n", " fixed_start=True,\n", " every=\"Q\",\n", " closed_end=True,\n", " split=(\n", " lambda index: qyear(index) != qyear(index)[-1],\n", " lambda index: qyear(index) == qyear(index)[-1]\n", " )\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "432485a6-f8f4-4970-8cec-56689fc1969d", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.from_grouper(\n", " data.index,\n", " by=\"Y\",\n", " split=0.5\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "5cd1d516-094e-4f32-a202-6d49e700386c", "metadata": {}, "outputs": [], "source": [ "def is_split_complete(index, split):\n", " first_range = split[0]\n", " first_index = index[first_range][0]\n", " last_range = split[-1]\n", " last_index = index[last_range][-1]\n", " return first_index.is_year_start and last_index.is_year_end\n", "\n", "vbt.Splitter.from_grouper(\n", " data.index,\n", " by=\"Y\",\n", " split=0.5,\n", " split_check_template=vbt.RepFunc(is_split_complete)\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "6cc38232-edfb-4395-899f-47ed3aa0723c", "metadata": {}, "outputs": [], "source": [ "def format_split_labels(index, splits_arr):\n", " years = map(lambda x: index[x[0]][0].year, splits_arr)\n", " return pd.Index(years, name=\"split_year\")\n", "\n", "vbt.Splitter.from_grouper(\n", " data.index,\n", " by=\"Y\",\n", " split=0.5,\n", " split_check_template=vbt.RepFunc(is_split_complete),\n", " split_labels=vbt.RepFunc(format_split_labels)\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "11d702be-5df5-42de-9491-9b38ac7aae06", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.from_grouper(\n", " data.index,\n", " by=data.index.year,\n", " split=0.5,\n", " split_check_template=vbt.RepFunc(is_split_complete)\n", ").plot().show_svg()" ] }, { "cell_type": "markdown", "id": "38d4f68a-0372-48e5-bc1a-c45171942e7e", "metadata": {}, "source": [ "#### Random" ] }, { "cell_type": "code", "execution_count": null, "id": "e72d4e77-c387-4c7e-bcd5-07e8c15ac15c", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.from_n_random(\n", " data.index,\n", " n=50,\n", " min_length=360,\n", " seed=42,\n", " split=0.5\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "dbeea5da-bceb-4e8e-866b-2a0e608995f2", "metadata": {}, "outputs": [], "source": [ "vbt.Splitter.from_n_random(\n", " data.index,\n", " n=50,\n", " min_length=60,\n", " max_length=480,\n", " seed=42,\n", " split=0.5\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "b2c41a6c-b579-499c-98af-b7332cdaa7ab", "metadata": {}, "outputs": [], "source": [ "def start_p_func(i, indices):\n", " return indices / indices.sum()\n", "\n", "vbt.Splitter.from_n_random(\n", " data.index,\n", " n=50,\n", " min_length=60,\n", " max_length=480,\n", " seed=42,\n", " start_p_func=start_p_func,\n", " split=0.5\n", ").plot().show_svg()" ] }, { "cell_type": "markdown", "id": "bf44a54a-5349-4b2e-a92a-03ecdda01e00", "metadata": {}, "source": [ "#### Scikit-learn" ] }, { "cell_type": "code", "execution_count": null, "id": "3992f4a8-d943-4256-939f-6f3be3767df6", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import KFold\n", "\n", "vbt.Splitter.from_sklearn(\n", " data.index, \n", " KFold(n_splits=5)\n", ").plot().show_svg()" ] }, { "cell_type": "markdown", "id": "43c93cc4-b494-4119-923a-c01e5e84b458", "metadata": {}, "source": [ "#### Dynamic" ] }, { "cell_type": "code", "execution_count": null, "id": "a55967e9-58b6-46a4-92eb-47de2c99ee40", "metadata": {}, "outputs": [], "source": [ "def split_func(index, prev_start):\n", " if prev_start is None:\n", " prev_start = index[0]\n", " new_start = prev_start + pd.offsets.MonthBegin(1)\n", " new_end = new_start + pd.DateOffset(years=1)\n", " if new_end > index[-1] + index.freq:\n", " return None\n", " return [\n", " slice(new_start, new_start + pd.offsets.MonthBegin(9)),\n", " slice(new_start + pd.offsets.MonthBegin(9), new_end)\n", " ]\n", "\n", "vbt.Splitter.from_split_func(\n", " data.index,\n", " split_func=split_func,\n", " split_args=(vbt.Rep(\"index\"), vbt.Rep(\"prev_start\")),\n", " range_bounds_kwargs=dict(index_bounds=True)\n", ").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "3dcf9638-36e8-4304-99e4-c30e1970cfd5", "metadata": {}, "outputs": [], "source": [ "def get_next_monday(from_date):\n", " if from_date.weekday == 0 and from_date.ceil(\"H\").hour <= 9:\n", " return from_date.floor(\"D\")\n", " return from_date.floor(\"D\") + pd.offsets.Week(n=0, weekday=0)\n", "\n", "def get_next_business_range(from_date):\n", " monday_0000 = get_next_monday(from_date)\n", " monday_0900 = monday_0000 + pd.DateOffset(hours=9)\n", " friday_1700 = monday_0900 + pd.DateOffset(days=4, hours=8)\n", " return slice(monday_0900, friday_1700)\n", "\n", "def split_func(index, bounds):\n", " if len(bounds) == 0:\n", " from_date = index[0]\n", " else:\n", " from_date = bounds[-1][1][0]\n", " train_range = get_next_business_range(from_date)\n", " test_range = get_next_business_range(train_range.stop)\n", " if test_range.stop > index[-1] + index.freq:\n", " return None\n", " return train_range, test_range\n", "\n", "vbt.Splitter.from_split_func(\n", " vbt.date_range(\"2020-01\", \"2020-03\", freq=\"15min\"),\n", " split_func=split_func,\n", " split_args=(vbt.Rep(\"index\"), vbt.Rep(\"bounds\")),\n", " range_bounds_kwargs=dict(index_bounds=True)\n", ").plot().show_svg()" ] }, { "cell_type": "markdown", "id": "4ab3b2d1-5fb5-4978-95e0-2ee627abe829", "metadata": {}, "source": [ "### Validation" ] }, { "cell_type": "code", "execution_count": null, "id": "0b84d3f9-22a1-46b3-9593-4ebe18060fc1", "metadata": {}, "outputs": [], "source": [ "splitter = vbt.Splitter.from_ranges(\n", " data.index,\n", " every=\"Y\",\n", " closed_end=True,\n", " split=0.5,\n", " set_labels=[\"IS\", \"OOS\"]\n", ")\n", "splitter.plot().show_svg()" ] }, { "cell_type": "markdown", "id": "589b9b90-e825-4cf6-a402-4720fd82e6b0", "metadata": {}, "source": [ "#### Bounds" ] }, { "cell_type": "code", "execution_count": null, "id": "9aa53234-fab9-4db4-b0f6-955f8c66f467", "metadata": {}, "outputs": [], "source": [ "bounds_arr = splitter.get_bounds_arr()\n", "bounds_arr.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "f364195f-9ef1-43ee-b8a9-af7835d147a2", "metadata": {}, "outputs": [], "source": [ "print(bounds_arr)" ] }, { "cell_type": "code", "execution_count": null, "id": "b3051c7d-3186-49a4-9092-d837f79c09c8", "metadata": {}, "outputs": [], "source": [ "bounds = splitter.get_bounds(index_bounds=True)\n", "bounds.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "9d76d199-12db-4574-a8f4-0b87107d8a0d", "metadata": {}, "outputs": [], "source": [ "print(bounds)" ] }, { "cell_type": "code", "execution_count": null, "id": "a0818b81-9735-481d-b510-14140ba6c576", "metadata": {}, "outputs": [], "source": [ "bounds.loc[(0, \"OOS\"), \"end\"]" ] }, { "cell_type": "code", "execution_count": null, "id": "21737903-0707-410c-a1c1-133178784fcf", "metadata": {}, "outputs": [], "source": [ "bounds.loc[(1, \"IS\"), \"start\"]" ] }, { "cell_type": "markdown", "id": "c5b5d61b-66fe-4057-b699-64d1863d061c", "metadata": {}, "source": [ "#### Masks" ] }, { "cell_type": "code", "execution_count": null, "id": "32b1ad13-4523-41d9-b424-637f1b25b4a2", "metadata": {}, "outputs": [], "source": [ "mask = splitter.get_mask()\n", "mask.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "a21e4d35-4403-4d5e-ad67-0086da9444ce", "metadata": {}, "outputs": [], "source": [ "print(mask)" ] }, { "cell_type": "code", "execution_count": null, "id": "328a93e7-7b14-42d8-acc6-3eba2a72ec4f", "metadata": {}, "outputs": [], "source": [ "mask[\"2021\":\"2021\"].any()" ] }, { "cell_type": "code", "execution_count": null, "id": "8bccca0a-65be-4810-bd5e-5222edfba6c4", "metadata": {}, "outputs": [], "source": [ "print(mask.resample(vbt.offset(\"Y\")).sum())" ] }, { "cell_type": "code", "execution_count": null, "id": "629ea5be-73c7-4799-8d07-ab28a90df329", "metadata": {}, "outputs": [], "source": [ "results = []\n", "for mask in splitter.get_iter_split_masks():\n", " results.append(mask.resample(vbt.offset(\"Y\")).sum())\n", "print(pd.concat(results, axis=1, keys=splitter.split_labels))" ] }, { "cell_type": "markdown", "id": "ec5cbff7-c630-4d35-a607-cf4f7c482f0f", "metadata": {}, "source": [ "#### Coverage" ] }, { "cell_type": "code", "execution_count": null, "id": "76bb4bb3-027b-465f-be37-fb44cb567bc6", "metadata": {}, "outputs": [], "source": [ "splitter.get_split_coverage()" ] }, { "cell_type": "code", "execution_count": null, "id": "071e5e14-def1-4133-8682-d4e214810079", "metadata": {}, "outputs": [], "source": [ "splitter.get_set_coverage()" ] }, { "cell_type": "code", "execution_count": null, "id": "d885fac2-0a56-4e4e-81d0-52a29b8cd572", "metadata": {}, "outputs": [], "source": [ "splitter.get_range_coverage()" ] }, { "cell_type": "code", "execution_count": null, "id": "7c4301ee-8998-44d5-b112-0e463f0213ea", "metadata": {}, "outputs": [], "source": [ "splitter.get_coverage()" ] }, { "cell_type": "code", "execution_count": null, "id": "4d328277-71e9-49c1-8b54-d7ecf08682e2", "metadata": {}, "outputs": [], "source": [ "splitter.index_bounds.loc[(2, \"OOS\"), \"start\"].is_leap_year" ] }, { "cell_type": "code", "execution_count": null, "id": "a8e94289-4550-43bf-9cca-f13cb6e195ad", "metadata": {}, "outputs": [], "source": [ "splitter.get_range_coverage(relative=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "0c9e5444-4276-42bc-b0c6-0799c7606345", "metadata": {}, "outputs": [], "source": [ "splitter.get_set_coverage(relative=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "cf6715aa-0a33-4202-9b38-3dc8f6b24ebc", "metadata": {}, "outputs": [], "source": [ "splitter.get_split_coverage(overlapping=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "1567e6fe-85e5-4075-b4a4-f0490d69bfb4", "metadata": {}, "outputs": [], "source": [ "splitter.get_set_coverage(overlapping=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "a1935522-f1f7-460d-939b-8134fe1287e1", "metadata": {}, "outputs": [], "source": [ "splitter.get_coverage(overlapping=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "2466892b-1521-4b7d-94bd-54ab21adf816", "metadata": {}, "outputs": [], "source": [ "splitter.plot_coverage().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "b4f46cc9-edeb-42bb-b9d1-fbb26d9510e9", "metadata": {}, "outputs": [], "source": [ "print(splitter.get_overlap_matrix(by=\"range\", normalize=False))" ] }, { "cell_type": "markdown", "id": "0f125754-29e4-4d22-9f0f-defb3db268ce", "metadata": {}, "source": [ "#### Grouping" ] }, { "cell_type": "code", "execution_count": null, "id": "7af58745-fa11-4a7a-bd1e-833df3d3a8b4", "metadata": {}, "outputs": [], "source": [ "print(splitter.get_bounds(index_bounds=True, set_group_by=True))" ] }, { "cell_type": "markdown", "id": "b5d73373-ed8d-4943-860f-05342ca2c511", "metadata": {}, "source": [ "### Manipulation" ] }, { "cell_type": "code", "execution_count": null, "id": "2d1d4116-8e59-47d2-9364-c44aa908d2f0", "metadata": {}, "outputs": [], "source": [ "splitter = vbt.Splitter.from_grouper(\n", " data.index, \n", " by=data.index.year.rename(\"split_year\")\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "6b9c5b86-7aec-4496-94d9-4c592766e167", "metadata": {}, "outputs": [], "source": [ "splitter.stats()" ] }, { "cell_type": "code", "execution_count": null, "id": "0aa022d7-6eb4-471f-aa5a-ab26c5deee58", "metadata": {}, "outputs": [], "source": [ "splitter.plots().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "98537c1f-efbb-4574-9f57-5bdb07b452b9", "metadata": {}, "outputs": [], "source": [ "splitter = splitter.iloc[1:-1]\n", "splitter.stats()" ] }, { "cell_type": "code", "execution_count": null, "id": "a442aad5-ead3-4d4e-b049-7e3090200ddf", "metadata": {}, "outputs": [], "source": [ "def new_split(index):\n", " return [\n", " np.isin(index.quarter, [1, 2]), \n", " index.quarter == 3, \n", " index.quarter == 4\n", " ]\n", "\n", "splitter = splitter.split_set(\n", " vbt.RepFunc(new_split),\n", " new_set_labels=[\"train\", \"valid\", \"test\"]\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "a938abed-831a-4cb2-9729-9b015d819e38", "metadata": {}, "outputs": [], "source": [ "splitter.stats()" ] }, { "cell_type": "code", "execution_count": null, "id": "0f9125b7-6d85-4117-ad6b-37b24bf3c50d", "metadata": {}, "outputs": [], "source": [ "splitter.plots().show_svg()" ] }, { "cell_type": "markdown", "id": "35f5cfe5-0626-4372-a90b-df68e155d994", "metadata": {}, "source": [ "#### Homework" ] }, { "cell_type": "code", "execution_count": null, "id": "3d9fb4df-5ca0-43f5-869c-4721c3993402", "metadata": {}, "outputs": [], "source": [ "splitter = splitter.merge_sets(columns=[\"valid\", \"test\"], new_set_label=\"test\")\n", "splitter.plots().show_svg()" ] }, { "cell_type": "markdown", "id": "d00fdcdf-0bd1-49ed-ade9-411a2ae3d79c", "metadata": {}, "source": [ "## Applications" ] }, { "cell_type": "markdown", "id": "3ebb6158-e395-49e9-bcc8-cd258c17783f", "metadata": {}, "source": [ "### Taking" ] }, { "cell_type": "markdown", "id": "1aa23c97-e221-48ed-8a5e-484fda11e955", "metadata": {}, "source": [ "#### Without stacking" ] }, { "cell_type": "code", "execution_count": null, "id": "ed524fc3-b782-43dc-a8a6-0bced61a3708", "metadata": {}, "outputs": [], "source": [ "close_slices = splitter.take(data.close)\n", "close_slices" ] }, { "cell_type": "code", "execution_count": null, "id": "74d768fe-ce73-41c1-945f-c21fbb7fd662", "metadata": {}, "outputs": [], "source": [ "close_slices[2020, \"test\"]" ] }, { "cell_type": "code", "execution_count": null, "id": "79ac7f33-08aa-4bb7-87d2-f8e80c94db0c", "metadata": {}, "outputs": [], "source": [ "def get_total_return(sr):\n", " return sr.vbt.to_returns().vbt.returns.total()\n", "\n", "close_slices.apply(get_total_return)" ] }, { "cell_type": "markdown", "id": "02213852-3258-46d6-97a4-32a3a5f19820", "metadata": {}, "source": [ "##### Complex objects" ] }, { "cell_type": "code", "execution_count": null, "id": "c2d3208f-2454-4501-8cd2-e94694238a56", "metadata": {}, "outputs": [], "source": [ "trendlb = data.run(\"trendlb\", 1.0, 0.5)\n", "trendlb.plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "4926dbe3-ecc6-445d-868a-46a88af5756c", "metadata": {}, "outputs": [], "source": [ "grouper = pd.Index(trendlb.labels.map({1: \"U\", 0: \"D\"}), name=\"trend\")\n", "trend_splitter = vbt.Splitter.from_grouper(data.index, grouper)\n", "trend_splitter.plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "f41b604c-84ca-4ea2-a293-0d40878eb65c", "metadata": {}, "outputs": [], "source": [ "hold_pf = vbt.Portfolio.from_holding(data)\n", "hold_returns_acc = hold_pf.returns_acc\n", "\n", "fast_sma, slow_sma = vbt.talib(\"SMA\").run_combs(\n", " data.close, np.arange(5, 50), short_names=[\"fast_sma\", \"slow_sma\"])\n", "entries = fast_sma.real_crossed_above(slow_sma)\n", "exits = fast_sma.real_crossed_below(slow_sma)\n", "strat_pf = vbt.Portfolio.from_signals(\n", " data, entries, exits, direction=\"both\")\n", "strat_returns_acc = strat_pf.returns_acc" ] }, { "cell_type": "code", "execution_count": null, "id": "84335fef-a4c9-44a3-8f68-61ab14e6d01d", "metadata": {}, "outputs": [], "source": [ "hold_returns_acc_slices = trend_splitter.take(hold_returns_acc)\n", "strat_returns_acc_slices = trend_splitter.take(strat_returns_acc)" ] }, { "cell_type": "code", "execution_count": null, "id": "faae3461-56b6-4f50-aad8-f196745375e3", "metadata": {}, "outputs": [], "source": [ "hold_returns_acc_slices[\"U\"].sharpe_ratio()" ] }, { "cell_type": "code", "execution_count": null, "id": "91ae1996-5fce-459d-a08c-58950ca7ac83", "metadata": {}, "outputs": [], "source": [ "strat_returns_acc_slices[\"U\"].sharpe_ratio().vbt.heatmap(\n", " x_level=\"fast_sma_timeperiod\", \n", " y_level=\"slow_sma_timeperiod\",\n", " symmetric=True\n", ").show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "ea0824b5-c6b5-48a5-9abd-3b8d582afeca", "metadata": {}, "outputs": [], "source": [ "hold_returns_acc_slices[\"D\"].sharpe_ratio()" ] }, { "cell_type": "code", "execution_count": null, "id": "dff164c5-5840-4bda-b25a-b290be490d2e", "metadata": {}, "outputs": [], "source": [ "strat_returns_acc_slices[\"D\"].sharpe_ratio().vbt.heatmap(\n", " x_level=\"fast_sma_timeperiod\", \n", " y_level=\"slow_sma_timeperiod\",\n", " symmetric=True\n", ").show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "42039308-cbeb-4a20-928c-8c1b4588b84e", "metadata": {}, "outputs": [], "source": [ "trend_splitter = trend_splitter.break_up_splits(\"by_gap\", sort=True)\n", "trend_splitter.plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "6e564cf0-f138-4552-afc6-0ce34502e3e8", "metadata": {}, "outputs": [], "source": [ "strat_pf_slices = strat_pf.split(trend_splitter)\n", "strat_pf_slices" ] }, { "cell_type": "code", "execution_count": null, "id": "247f3770-249d-4612-bb7c-4c3d64e141f9", "metadata": {}, "outputs": [], "source": [ "trend_range_perf = strat_pf_slices.apply(lambda pf: pf.sharpe_ratio)\n", "median_trend_perf = trend_range_perf.median(axis=1)\n", "median_trend_perf" ] }, { "cell_type": "code", "execution_count": null, "id": "483eaa0d-cde1-4c30-873e-cc8d668139f8", "metadata": {}, "outputs": [], "source": [ "trend_perf_ts = data.symbol_wrapper.fill().rename(\"trend_perf\")\n", "for label, sr in trend_splitter.bounds.iterrows():\n", " trend_perf_ts.iloc[sr[\"start\"]:sr[\"end\"]] = median_trend_perf[label]\n", "data.close.vbt.overlay_with_heatmap(trend_perf_ts).show_svg()" ] }, { "cell_type": "markdown", "id": "7f013d68-bbc6-43b4-ae38-0414781602a4", "metadata": {}, "source": [ "#### Column stacking" ] }, { "cell_type": "code", "execution_count": null, "id": "c899aa78-29d8-4833-b307-2949132e0b78", "metadata": {}, "outputs": [], "source": [ "close_stacked = pd.concat(\n", " close_slices.values.tolist(), \n", " axis=1, \n", " keys=close_slices.index\n", ")\n", "print(close_stacked)" ] }, { "cell_type": "code", "execution_count": null, "id": "4e83e120-ee01-4664-81bb-39b4c084d3b6", "metadata": {}, "outputs": [], "source": [ "get_total_return(close_stacked)" ] }, { "cell_type": "code", "execution_count": null, "id": "20abc610-b2a9-4ecd-a724-f3c267e71636", "metadata": {}, "outputs": [], "source": [ "close_stacked = splitter.take(data.close, into=\"stacked\")\n", "close_stacked.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "5d67665d-ea55-400c-ace4-bd5a2adaef12", "metadata": {}, "outputs": [], "source": [ "close_stacked = splitter.take(data.close, into=\"reset_stacked\")\n", "print(close_stacked)" ] }, { "cell_type": "code", "execution_count": null, "id": "c6a65f09-264e-40d4-a01b-f8ef208e331d", "metadata": {}, "outputs": [], "source": [ "close_stacked = splitter.take(data.close, into=\"from_end_stacked\")\n", "print(close_stacked)" ] }, { "cell_type": "code", "execution_count": null, "id": "25c891ae-ffab-4ef1-a97d-0de12a1c0f63", "metadata": {}, "outputs": [], "source": [ "close_stacked = splitter.take(data.close, into=\"reset_stacked_by_set\")\n", "close_stacked" ] }, { "cell_type": "code", "execution_count": null, "id": "78f294b1-a07b-4f1e-a43f-bf6b33ec0d13", "metadata": {}, "outputs": [], "source": [ "print(close_stacked[\"train\"])" ] }, { "cell_type": "code", "execution_count": null, "id": "94613e5d-2c2f-48ba-9714-e95b6d706b7a", "metadata": {}, "outputs": [], "source": [ "print(182 * 8)\n", "print(1461 * 8)\n", "print(1 - 1456 / 11688)" ] }, { "cell_type": "code", "execution_count": null, "id": "f72feaad-a8d0-4249-9719-7e0f5811f9c5", "metadata": {}, "outputs": [], "source": [ "index_slices = splitter.take(data.index)\n", "index_slices" ] }, { "cell_type": "code", "execution_count": null, "id": "0e0b9776-b7a4-4dc2-bcf5-3f73c47d9d5d", "metadata": {}, "outputs": [], "source": [ "close_stacked_wb = splitter.take(\n", " data.close, \n", " into=\"reset_stacked_by_set\",\n", " attach_bounds=\"index\",\n", " right_inclusive=True\n", ")\n", "print(close_stacked_wb[\"train\"])" ] }, { "cell_type": "code", "execution_count": null, "id": "48b59d45-5ddc-4777-969b-1582b4660055", "metadata": {}, "outputs": [], "source": [ "@vbt.parameterized(merge_func=\"concat\")\n", "def set_sma_crossover_perf(close, fast_window, slow_window, freq):\n", " fast_sma = vbt.talib(\"sma\").run(\n", " close, fast_window, short_name=\"fast_sma\", hide_params=True) \n", " slow_sma = vbt.talib(\"sma\").run(\n", " close, slow_window, short_name=\"slow_sma\", hide_params=True) \n", " entries = fast_sma.real_crossed_above(slow_sma)\n", " exits = fast_sma.real_crossed_below(slow_sma)\n", " pf = vbt.Portfolio.from_signals(\n", " close, entries, exits, freq=freq, direction=\"both\")\n", " return pf.sharpe_ratio" ] }, { "cell_type": "code", "execution_count": null, "id": "21ec3795-d820-4f7d-b6fa-de6710fac0cd", "metadata": {}, "outputs": [], "source": [ "train_perf = set_sma_crossover_perf(\n", " close_stacked[\"train\"],\n", " vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n", " vbt.Param(np.arange(5, 50)),\n", " data.index.freq,\n", " _execute_kwargs=dict(\n", " clear_cache=50,\n", " collect_garbage=50\n", " )\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "63202321-0689-4aca-9e82-7ece4fd18a0e", "metadata": {}, "outputs": [], "source": [ "train_perf" ] }, { "cell_type": "code", "execution_count": null, "id": "75669ef5-ad2a-4122-8be9-c510ca252fb0", "metadata": {}, "outputs": [], "source": [ "train_perf.vbt.heatmap(\n", " x_level=\"fast_window\",\n", " y_level=\"slow_window\",\n", " slider_level=\"split_year\",\n", " symmetric=True\n", ").show_svg() # replace with show()" ] }, { "cell_type": "code", "execution_count": null, "id": "66734f37-624d-43ed-83ae-a9b33ace0085", "metadata": {}, "outputs": [], "source": [ "@njit\n", "def prox_median_nb(arr):\n", " if (~np.isnan(arr)).sum() < 20:\n", " return np.nan\n", " return np.nanmedian(arr)\n", "\n", "prox_perf_list = []\n", "for split_label, perf_sr in train_perf.groupby(\"split_year\"):\n", " perf_df = perf_sr.vbt.unstack_to_df(0, [1, 2])\n", " prox_perf_df = perf_df.vbt.proximity_apply(2, prox_median_nb)\n", " prox_perf_sr = prox_perf_df.stack([0, 1])\n", " prox_perf_list.append(prox_perf_sr.reindex(perf_sr.index))\n", "\n", "train_prox_perf = pd.concat(prox_perf_list)\n", "train_prox_perf" ] }, { "cell_type": "code", "execution_count": null, "id": "d64b2b46-598e-4211-8e66-fd2824156f5a", "metadata": {}, "outputs": [], "source": [ "train_prox_perf.vbt.heatmap(\n", " x_level=\"fast_window\",\n", " y_level=\"slow_window\",\n", " slider_level=\"split_year\",\n", " symmetric=True\n", ").show_svg() # replace with show()" ] }, { "cell_type": "code", "execution_count": null, "id": "a2bd48a4-74c9-4e9d-a2a4-c1a726ca1619", "metadata": {}, "outputs": [], "source": [ "best_params = train_prox_perf.groupby(\"split_year\").idxmax()\n", "best_params = train_prox_perf[best_params].index\n", "train_prox_perf[best_params]" ] }, { "cell_type": "code", "execution_count": null, "id": "c99e5a80-3fa4-453b-ad33-88c825f2dfa7", "metadata": {}, "outputs": [], "source": [ "test_perf = set_sma_crossover_perf(\n", " vbt.RepEval(\n", " \"test_close.iloc[:, [config_idx]]\", \n", " context=dict(test_close=close_stacked[\"test\"])\n", " ),\n", " vbt.Param(best_params.get_level_values(\"fast_window\"), level=0),\n", " vbt.Param(best_params.get_level_values(\"slow_window\"), level=0),\n", " data.index.freq\n", ")\n", "test_perf" ] }, { "cell_type": "code", "execution_count": null, "id": "ee16a258-2788-402b-bf2b-c065254c1c3e", "metadata": {}, "outputs": [], "source": [ "def get_index_sharpe(index):\n", " return data.loc[index].run(\"from_holding\").sharpe_ratio\n", "\n", "index_slices.xs(\"test\", level=\"set\").apply(get_index_sharpe)" ] }, { "cell_type": "markdown", "id": "dd73c429-db22-40ff-8a34-f773ae0e7762", "metadata": {}, "source": [ "#### Row stacking" ] }, { "cell_type": "code", "execution_count": null, "id": "e890c631-33b8-4822-914d-35432379cedf", "metadata": {}, "outputs": [], "source": [ "block_size = int(3.15 * len(data.index) ** (1 / 3))\n", "block_splitter = vbt.Splitter.from_rolling(\n", " data.index, \n", " length=block_size, \n", " offset=1,\n", " offset_anchor=\"prev_start\"\n", ")\n", "block_splitter.n_splits" ] }, { "cell_type": "code", "execution_count": null, "id": "5a04c2d4-c53a-4a42-9cf1-0f912b7df08e", "metadata": {}, "outputs": [], "source": [ "size = int(block_splitter.n_splits / block_size)\n", "sample_splitter = block_splitter.shuffle_splits(size=size, replace=True)\n", "sample_splitter.plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "42ae502d-3e72-458c-9d8a-09d475968013", "metadata": {}, "outputs": [], "source": [ "returns = data.returns\n", "sample_rets = sample_splitter.take(returns, into=\"stacked\", stack_axis=0)\n", "sample_rets" ] }, { "cell_type": "code", "execution_count": null, "id": "59eb80cc-0738-4024-873b-98009964c5d4", "metadata": {}, "outputs": [], "source": [ "sample_rets.index = data.index[:len(sample_rets)]\n", "sample_cumrets = data.close[0] * (sample_rets + 1).cumprod()\n", "sample_cumrets.vbt.plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "78e88fe1-8433-4ce8-9beb-4fc24878745a", "metadata": {}, "outputs": [], "source": [ "samples_rets_list = []\n", "for i in vbt.ProgressBar(range(1000)):\n", " sample_spl = block_splitter.shuffle_splits(size=size, replace=True)\n", " sample_rets = sample_spl.take(returns, into=\"stacked\", stack_axis=0)\n", " sample_rets.index = returns.index[:len(sample_rets)]\n", " sample_rets.name = i\n", " samples_rets_list.append(sample_rets)\n", "sample_rets_stacked = pd.concat(samples_rets_list, axis=1)" ] }, { "cell_type": "code", "execution_count": null, "id": "e909ffcc-d18f-4351-9626-714c81c18321", "metadata": {}, "outputs": [], "source": [ "sample_sharpe = sample_rets_stacked.vbt.returns.sharpe_ratio()\n", "sample_sharpe.vbt.boxplot(horizontal=True).show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "0815456c-9b75-4ab8-b499-faa9947406ea", "metadata": {}, "outputs": [], "source": [ "sample_sharpe.quantile(0.025), sample_sharpe.quantile(0.975)" ] }, { "cell_type": "markdown", "id": "1fee36b8-a009-4b73-aeaa-18f918c92c55", "metadata": {}, "source": [ "### Applying" ] }, { "cell_type": "code", "execution_count": null, "id": "b8ee38db-2b98-488c-b53c-f539e0bccedb", "metadata": {}, "outputs": [], "source": [ "splitter.apply(\n", " get_total_return,\n", " vbt.Takeable(data.close),\n", " merge_func=\"concat\"\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "6d86ce45-9b6b-43e2-8e2e-79944cd4f413", "metadata": {}, "outputs": [], "source": [ "splitter.apply(\n", " get_total_return,\n", " vbt.RepFunc(lambda range_: data.close[range_]),\n", " merge_func=\"concat\"\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "7ae1a59f-4466-4383-8a51-668c24f4321e", "metadata": {}, "outputs": [], "source": [ "def get_total_return(range_, data):\n", " return data.returns[range_].vbt.returns.total()\n", "\n", "splitter.apply(\n", " get_total_return,\n", " vbt.Rep(\"range_\"),\n", " data,\n", " merge_func=\"concat\"\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "35f59608-ee6f-4e16-a9a7-ad9dcaaab0cd", "metadata": {}, "outputs": [], "source": [ "def get_total_return(data):\n", " return data.returns.vbt.returns.total()\n", "\n", "splitter.apply(\n", " get_total_return,\n", " vbt.Takeable(data),\n", " merge_func=\"concat\"\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "11b5fa1d-a6a7-4703-a897-be517ecc396b", "metadata": {}, "outputs": [], "source": [ "splitter.apply(\n", " get_total_return,\n", " vbt.Takeable(data),\n", " set_group_by=True,\n", " merge_func=\"concat\"\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "62816646-c23f-4c12-8dfb-c0e327badeb6", "metadata": {}, "outputs": [], "source": [ "splitter.apply(\n", " get_total_return,\n", " vbt.Takeable(data),\n", " split=[2020, 2021],\n", " set_=\"train\",\n", " merge_func=\"concat\"\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "f8f03eb7-2835-45c7-9879-7b951c7a4647", "metadata": {}, "outputs": [], "source": [ "train_perf = splitter.apply(\n", " sma_crossover_perf,\n", " vbt.Takeable(data),\n", " vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n", " vbt.Param(np.arange(5, 50)),\n", " _execute_kwargs=dict(\n", " clear_cache=50,\n", " collect_garbage=50\n", " ),\n", " set_=\"train\",\n", " merge_func=\"concat\",\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "0ebb869c-1edf-491f-ad26-bcba207c0f26", "metadata": {}, "outputs": [], "source": [ "train_perf" ] }, { "cell_type": "code", "execution_count": null, "id": "261c2dd1-2f5e-480e-a2c8-7593ac26771b", "metadata": {}, "outputs": [], "source": [ "best_params = train_perf.groupby(\"split_year\").idxmax()\n", "best_params = train_perf[best_params].index\n", "train_perf[best_params]" ] }, { "cell_type": "code", "execution_count": null, "id": "1749022b-1d3c-4ccb-b6a1-9fe0a06e52a5", "metadata": {}, "outputs": [], "source": [ "best_fast_windows = best_params.get_level_values(\"fast_window\")\n", "best_slow_windows = best_params.get_level_values(\"slow_window\")\n", "\n", "test_perf = splitter.apply(\n", " sma_crossover_perf,\n", " vbt.Takeable(data),\n", " vbt.RepFunc(lambda split_idx: best_fast_windows[split_idx]),\n", " vbt.RepFunc(lambda split_idx: best_slow_windows[split_idx]),\n", " set_=\"test\",\n", " merge_func=\"concat\"\n", ")\n", "test_perf" ] }, { "cell_type": "markdown", "id": "432e35f6-98b2-4278-9616-54a2206eb182", "metadata": {}, "source": [ "#### Iteration schemes" ] }, { "cell_type": "code", "execution_count": null, "id": "e079d58d-f171-41ef-b685-80b502cb31b3", "metadata": {}, "outputs": [], "source": [ "def cv_sma_crossover(\n", " data, \n", " fast_windows, \n", " slow_windows, \n", " split_idx,\n", " set_idx,\n", " train_perf_list\n", "):\n", " if set_idx == 0:\n", " train_perf = sma_crossover_perf(\n", " data,\n", " vbt.Param(fast_windows, condition=\"x < slow_window\"),\n", " vbt.Param(slow_windows),\n", " _execute_kwargs=dict(\n", " clear_cache=50,\n", " collect_garbage=50\n", " )\n", " )\n", " train_perf_list.append(train_perf)\n", " best_params = train_perf.idxmax()\n", " return train_perf[[best_params]]\n", " else:\n", " train_perf = train_perf_list[split_idx]\n", " best_params = train_perf.idxmax()\n", " test_perf = sma_crossover_perf(\n", " data,\n", " vbt.Param([best_params[0]]),\n", " vbt.Param([best_params[1]]),\n", " )\n", " return test_perf\n", " \n", "train_perf_list = []\n", "cv_perf = splitter.apply(\n", " cv_sma_crossover,\n", " vbt.Takeable(data),\n", " np.arange(5, 50),\n", " np.arange(5, 50),\n", " vbt.Rep(\"split_idx\"),\n", " vbt.Rep(\"set_idx\"),\n", " train_perf_list,\n", " iteration=\"set_major\",\n", " merge_func=\"concat\",\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "c7f1f4a1-fe05-44c2-b2c3-fcbb432bdeaa", "metadata": {}, "outputs": [], "source": [ "train_perf = pd.concat(train_perf_list, keys=splitter.split_labels)\n", "train_perf" ] }, { "cell_type": "code", "execution_count": null, "id": "dd134bf0-8c5d-4365-871b-a5a0aa232e4b", "metadata": {}, "outputs": [], "source": [ "cv_perf" ] }, { "cell_type": "markdown", "id": "8da1d36b-6b51-4429-be5c-ed0acbcfdf24", "metadata": {}, "source": [ "#### Merging" ] }, { "cell_type": "code", "execution_count": null, "id": "23da7a75-db32-451d-8a7d-8bf729584e99", "metadata": {}, "outputs": [], "source": [ "def get_entries_and_exits(data, fast_window, slow_window):\n", " fast_sma = data.run(\"sma\", fast_window, short_name=\"fast_sma\")\n", " slow_sma = data.run(\"sma\", slow_window, short_name=\"slow_sma\")\n", " entries = fast_sma.real_crossed_above(slow_sma)\n", " exits = fast_sma.real_crossed_below(slow_sma)\n", " return entries, exits\n", "\n", "entries, exits = splitter.apply(\n", " get_entries_and_exits,\n", " vbt.Takeable(data),\n", " 20,\n", " 30,\n", " merge_func=\"column_stack\"\n", ")\n", "\n", "print(entries)" ] }, { "cell_type": "code", "execution_count": null, "id": "2822ee69-5a35-4700-b025-442d2e9396fc", "metadata": {}, "outputs": [], "source": [ "entries, exits = splitter.apply(\n", " get_entries_and_exits,\n", " vbt.Takeable(data),\n", " 20,\n", " 30,\n", " merge_all=False,\n", " merge_func=\"row_stack\"\n", ")\n", "\n", "entries.loc[2018]" ] }, { "cell_type": "code", "execution_count": null, "id": "80266dbb-b700-4389-8956-5a253b18b816", "metadata": {}, "outputs": [], "source": [ "def get_signal_count(*args, **kwargs):\n", " entries, exits = get_entries_and_exits(*args, **kwargs)\n", " return entries.vbt.signals.total(), exits.vbt.signals.total()\n", "\n", "entry_count, exit_count = splitter.apply(\n", " get_signal_count,\n", " vbt.Takeable(data),\n", " 20,\n", " 30,\n", " merge_func=\"concat\",\n", " attach_bounds=\"index\"\n", ")\n", "entry_count" ] }, { "cell_type": "code", "execution_count": null, "id": "56842a7c-290c-41ca-8e39-2fe148b9a352", "metadata": {}, "outputs": [], "source": [ "def plot_entries_and_exits(results, data, keys):\n", " set_labels = keys.get_level_values(\"set\")\n", " fig = data.plot(plot_volume=False)\n", " train_seen = False\n", " test_seen = False\n", "\n", " for i in range(len(results)):\n", " entries, exits = results[i]\n", " set_label = set_labels[i]\n", " if set_label == \"train\":\n", " entries.vbt.signals.plot_as_entries(\n", " data.close,\n", " trace_kwargs=dict(\n", " marker=dict(color=\"limegreen\"), \n", " name=f\"Entries ({set_label})\",\n", " legendgroup=f\"Entries ({set_label})\",\n", " showlegend=not train_seen\n", " ),\n", " fig=fig\n", " ),\n", " exits.vbt.signals.plot_as_exits(\n", " data.close,\n", " trace_kwargs=dict(\n", " marker=dict(color=\"orange\"), \n", " name=f\"Exits ({set_label})\",\n", " legendgroup=f\"Exits ({set_label})\",\n", " showlegend=not train_seen\n", " ),\n", " fig=fig\n", " )\n", " train_seen = True\n", " else:\n", " entries.vbt.signals.plot_as_entries(\n", " data.close,\n", " trace_kwargs=dict(\n", " marker=dict(color=\"skyblue\"), \n", " name=f\"Entries ({set_label})\",\n", " legendgroup=f\"Entries ({set_label})\",\n", " showlegend=not test_seen\n", " ),\n", " fig=fig\n", " ),\n", " exits.vbt.signals.plot_as_exits(\n", " data.close,\n", " trace_kwargs=dict(\n", " marker=dict(color=\"magenta\"), \n", " name=f\"Exits ({set_label})\",\n", " legendgroup=f\"Entries ({set_label})\",\n", " showlegend=not test_seen\n", " ),\n", " fig=fig\n", " )\n", " test_seen = True\n", " return fig\n", "\n", "splitter.apply(\n", " get_entries_and_exits,\n", " vbt.Takeable(data),\n", " 20,\n", " 30,\n", " merge_func=plot_entries_and_exits,\n", " merge_kwargs=dict(data=data, keys=vbt.Rep(\"keys\")),\n", ").show_svg()" ] }, { "cell_type": "markdown", "id": "93378394-5bd8-49e4-ad0a-e9d0eb25f58d", "metadata": {}, "source": [ "#### Decorators" ] }, { "cell_type": "code", "execution_count": null, "id": "63999a23-9b3e-41e9-ba9e-aefc4775c60c", "metadata": {}, "outputs": [], "source": [ "@vbt.split(splitter=splitter)\n", "def get_split_total_return(data):\n", " return data.returns.vbt.returns.total()\n", "\n", "get_split_total_return(vbt.Takeable(data))" ] }, { "cell_type": "code", "execution_count": null, "id": "47f5be24-bd93-4f16-9849-2774d7d29617", "metadata": {}, "outputs": [], "source": [ "def get_total_return(data):\n", " return data.returns.vbt.returns.total()\n", "\n", "get_split_total_return = vbt.split(\n", " get_total_return, \n", " splitter=splitter\n", ")\n", "get_split_total_return(vbt.Takeable(data))" ] }, { "cell_type": "code", "execution_count": null, "id": "e508211b-3517-4925-ae8d-f248e94579d3", "metadata": {}, "outputs": [], "source": [ "@vbt.split\n", "def get_split_total_return(data):\n", " return data.returns.vbt.returns.total()\n", "\n", "get_split_total_return(vbt.Takeable(data), _splitter=splitter)" ] }, { "cell_type": "code", "execution_count": null, "id": "bfaab288-43f0-454e-88d2-4b8b36b3932c", "metadata": {}, "outputs": [], "source": [ "get_split_total_return(\n", " vbt.Takeable(data.loc[\"2020\":\"2020\"]), \n", " _splitter=\"from_rolling\", \n", " _splitter_kwargs=dict(length=\"30d\")\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "20d941b7-4419-4dfb-81f0-72f1b0839888", "metadata": {}, "outputs": [], "source": [ "get_total_return_by_month = vbt.split(\n", " get_total_return,\n", " splitter=\"from_grouper\", \n", " splitter_kwargs=dict(by=vbt.RepEval(\"index.to_period('M')\")),\n", " takeable_args=[\"data\"]\n", ")\n", "\n", "get_total_return_by_month(data.loc[\"2020\":\"2020\"])" ] }, { "cell_type": "code", "execution_count": null, "id": "d61a83eb-c72c-4257-a195-a34a82378f56", "metadata": {}, "outputs": [], "source": [ "cv_sma_crossover_perf = vbt.split(\n", " sma_crossover_perf, \n", " splitter=\"from_single\",\n", " splitter_kwargs=dict(split=0.6, set_labels=[\"train\", \"test\"]),\n", " takeable_args=[\"data\"],\n", " merge_func=\"concat\",\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "27387cd2-f223-4cbd-b2e5-65aef60e512c", "metadata": {}, "outputs": [], "source": [ "train_perf = cv_sma_crossover_perf(\n", " data.loc[\"2020\":\"2021\"],\n", " vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n", " vbt.Param(np.arange(5, 50)),\n", " p_execute_kwargs=dict(\n", " clear_cache=50,\n", " collect_garbage=50\n", " ),\n", " _forward_kwargs_as={\n", " \"p_execute_kwargs\": \"_execute_kwargs\"\n", " },\n", " _apply_kwargs=dict(set_=\"train\")\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "9878d18a-7b05-4b83-9ca5-8d291dae2d5d", "metadata": {}, "outputs": [], "source": [ "train_perf" ] }, { "cell_type": "code", "execution_count": null, "id": "9b4665fe-f78c-4257-922d-f05b351171ec", "metadata": {}, "outputs": [], "source": [ "test_perf = cv_sma_crossover_perf(\n", " data.loc[\"2020\":\"2021\"],\n", " train_perf.idxmax()[0],\n", " train_perf.idxmax()[1],\n", " _apply_kwargs=dict(set_=\"test\")\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "7a921ee5-4dd4-4c8e-be52-430a912059a6", "metadata": {}, "outputs": [], "source": [ "test_perf" ] }, { "cell_type": "code", "execution_count": null, "id": "8ebf0a46-778d-4186-bc7a-dfa4e897c872", "metadata": {}, "outputs": [], "source": [ "@njit(nogil=True)\n", "def sma_crossover_perf_nb(close, fast_window, slow_window, ann_factor):\n", " fast_sma = vbt.nb.ma_nb(close, fast_window)\n", " slow_sma = vbt.nb.ma_nb(close, slow_window)\n", " entries = vbt.nb.crossed_above_nb(fast_sma, slow_sma)\n", " exits = vbt.nb.crossed_above_nb(slow_sma, fast_sma)\n", " sim_out = vbt.pf_nb.from_signals_nb(\n", " target_shape=close.shape,\n", " group_lens=np.full(close.shape[1], 1),\n", " close=close,\n", " long_entries=entries,\n", " short_entries=exits,\n", " save_returns=True\n", " )\n", " return vbt.ret_nb.sharpe_ratio_nb(\n", " sim_out.in_outputs.returns, \n", " ann_factor\n", " )" ] }, { "cell_type": "code", "execution_count": null, "id": "01f161db-2548-4627-a385-40a2d84c9034", "metadata": {}, "outputs": [], "source": [ "sma_crossover_perf_nb(vbt.to_2d_array(data.close), 20, 30, 365)" ] }, { "cell_type": "code", "execution_count": null, "id": "9e35b428-f5e6-4000-afc7-8ef9bd3aeffc", "metadata": {}, "outputs": [], "source": [ "cv_sma_crossover_perf = vbt.cv_split(\n", " sma_crossover_perf_nb,\n", " splitter=\"from_rolling\",\n", " splitter_kwargs=dict(\n", " length=360, \n", " split=0.5, \n", " set_labels=[\"train\", \"test\"]\n", " ),\n", " takeable_args=[\"close\"],\n", " merge_func=\"concat\",\n", " parameterized_kwargs=dict(\n", " engine=\"dask\", \n", " chunk_len=\"auto\",\n", " )\n", ")\n", "\n", "grid_perf, best_perf = cv_sma_crossover_perf(\n", " vbt.to_2d_array(data.close),\n", " vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n", " vbt.Param(np.arange(5, 50)),\n", " pd.Timedelta(days=365) // data.index.freq,\n", " _merge_kwargs=dict(wrapper=data.symbol_wrapper),\n", " _index=data.index,\n", " _return_grid=\"all\"\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "ca4288fd-79ea-4b0b-a007-7ca2d8d85472", "metadata": {}, "outputs": [], "source": [ "grid_perf" ] }, { "cell_type": "code", "execution_count": null, "id": "75fd72d5-7004-46a5-a47d-d45ee3b95381", "metadata": {}, "outputs": [], "source": [ "best_perf" ] }, { "cell_type": "code", "execution_count": null, "id": "87e59f91-a076-4b75-a1a6-64ad04f35cbe", "metadata": {}, "outputs": [], "source": [ "best_train_perf = best_perf.xs(\"train\", level=\"set\")\n", "best_test_perf = best_perf.xs(\"test\", level=\"set\")\n", "best_train_perf.corr(best_test_perf)" ] }, { "cell_type": "code", "execution_count": null, "id": "e548f6f6-ac62-46c3-b6ef-40467a4564fe", "metadata": {}, "outputs": [], "source": [ "param_cross_set_corr = grid_perf\\\n", " .unstack(\"set\")\\\n", " .groupby([\"fast_window\", \"slow_window\"])\\\n", " .apply(lambda x: x[\"train\"].corr(x[\"test\"]))\n", "param_cross_set_corr.vbt.heatmap(symmetric=True).show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "4a40f160-6159-4b52-bc15-13f68c36be31", "metadata": {}, "outputs": [], "source": [ "grid_test_perf = grid_perf.xs(\"test\", level=\"set\")\n", "grid_df = grid_test_perf.rename(\"grid\").reset_index()\n", "del grid_df[\"fast_window\"]\n", "del grid_df[\"slow_window\"]\n", "best_df = best_test_perf.rename(\"best\").reset_index()\n", "del best_df[\"fast_window\"]\n", "del best_df[\"slow_window\"]\n", "merged_df = pd.merge(grid_df, best_df, on=[\"split\", \"symbol\"])\n", "grid_better_mask = merged_df[\"grid\"] > merged_df[\"best\"]\n", "grid_better_mask.index = grid_test_perf.index\n", "grid_better_cnt = grid_better_mask.groupby([\"split\", \"symbol\"]).mean()\n", "grid_better_cnt" ] }, { "cell_type": "code", "execution_count": null, "id": "c0f127bb-3b1a-4425-a1bc-ec6be6a27f3e", "metadata": {}, "outputs": [], "source": [ "cv_splitter = cv_sma_crossover_perf(\n", " _index=data.index, \n", " _return_splitter=True\n", ")\n", "stacked_close = cv_splitter.take(\n", " data.close, \n", " into=\"reset_stacked\",\n", " set_=\"test\"\n", ")\n", "hold_pf = vbt.Portfolio.from_holding(stacked_close, freq=\"daily\")\n", "hold_perf = hold_pf.sharpe_ratio\n", "hold_perf" ] }, { "cell_type": "markdown", "id": "935ca582-ef09-40f9-b6ff-c303c98989b1", "metadata": {}, "source": [ "### Modeling" ] }, { "cell_type": "code", "execution_count": null, "id": "0f9cc747-d0b4-470f-a262-7cfc7ad1d62e", "metadata": {}, "outputs": [], "source": [ "X = data.run(\"talib\")\n", "X.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "7fdb5b1c-e4cc-4472-bdb1-4629492306e7", "metadata": {}, "outputs": [], "source": [ "trendlb = data.run(\"trendlb\", 1.0, 0.5, mode=\"binary\")\n", "y = trendlb.labels\n", "y.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "ce1e42f0-7f1f-474c-865b-2fabd6dfc907", "metadata": {}, "outputs": [], "source": [ "X = X.replace([-np.inf, np.inf], np.nan)\n", "invalid_column_mask = X.isnull().all(axis=0) | (X.nunique() == 1)\n", "X = X.loc[:, ~invalid_column_mask]\n", "invalid_row_mask = X.isnull().any(axis=1) | y.isnull()\n", "X = X.loc[~invalid_row_mask]\n", "y = y.loc[~invalid_row_mask]\n", "X.shape, y.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "bf1fe438-bf27-4fc3-9d9b-5c71a7b046ae", "metadata": {}, "outputs": [], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "\n", "clf = RandomForestClassifier(random_state=42)" ] }, { "cell_type": "code", "execution_count": null, "id": "f64bbb84-5340-4554-a78c-5a58bdbe5010", "metadata": {}, "outputs": [], "source": [ "cv = vbt.SplitterCV(\n", " \"from_expanding\", \n", " min_length=360, \n", " offset=180, \n", " split=-180,\n", " set_labels=[\"train\", \"test\"]\n", ")\n", "\n", "cv_splitter = cv.get_splitter(X)\n", "cv_splitter.plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "5c26f554-c0ff-4973-a625-d95549bbbe36", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import cross_val_score\n", "\n", "cross_val_score(clf, X, y, cv=cv, scoring=\"accuracy\")" ] }, { "cell_type": "code", "execution_count": null, "id": "580ae7c7-d108-454f-903f-937c8dcf058e", "metadata": {}, "outputs": [], "source": [ "X_slices = cv_splitter.take(X)\n", "y_slices = cv_splitter.take(y)" ] }, { "cell_type": "code", "execution_count": null, "id": "b9186a59-798a-404a-b763-f6360d098dc9", "metadata": {}, "outputs": [], "source": [ "test_labels = []\n", "test_preds = []\n", "for split in X_slices.index.unique(level=\"split\"):\n", " X_train_slice = X_slices[(split, \"train\")]\n", " y_train_slice = y_slices[(split, \"train\")]\n", " X_test_slice = X_slices[(split, \"test\")]\n", " y_test_slice = y_slices[(split, \"test\")]\n", " slice_clf = clf.fit(X_train_slice, y_train_slice)\n", " test_pred = slice_clf.predict(X_test_slice)\n", " test_pred = pd.Series(test_pred, index=y_test_slice.index)\n", " test_labels.append(y_test_slice)\n", " test_preds.append(test_pred)\n", " \n", "test_labels = pd.concat(test_labels).rename(\"labels\")\n", "test_preds = pd.concat(test_preds).rename(\"preds\")" ] }, { "cell_type": "code", "execution_count": null, "id": "411d961d-7e24-427a-92e1-07c6fbc52f4d", "metadata": {}, "outputs": [], "source": [ "data.close.vbt.overlay_with_heatmap(test_labels).show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "a904c9f0-9514-4c17-bc14-a2d9980debe6", "metadata": {}, "outputs": [], "source": [ "data.close.vbt.overlay_with_heatmap(test_preds).show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "c0592fce-7a89-4fc6-9206-1ca7b3a51700", "metadata": {}, "outputs": [], "source": [ "pf = vbt.Portfolio.from_signals(\n", " data.close[test_preds.index], \n", " test_preds == 1, \n", " test_preds == 0, \n", " direction=\"both\"\n", ")\n", "pf.stats()" ] }, { "cell_type": "code", "execution_count": null, "id": "70cc579d-fb48-41ff-935a-1aa035c8a1f8", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" } }, "nbformat": 4, "nbformat_minor": 5 }