3246 lines
81 KiB
Plaintext
3246 lines
81 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c69786c2-7c5b-4b1e-bd0c-52380c8df261",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Cross-validation"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b2751478-95ce-47d2-9e80-4d47ed0c7b36",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from vectorbtpro import *\n",
|
|
"# whats_imported()\n",
|
|
"\n",
|
|
"vbt.settings.set_theme(\"dark\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6b45492f-d152-43e2-88a9-ab95bbffd546",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"data = vbt.BinanceData.pull(\"BTCUSDT\", end=\"2022-11-01 UTC\")\n",
|
|
"data.index"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "45122110-905e-4549-92f4-205c98bd9faa",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"@vbt.parameterized(merge_func=\"concat\")\n",
|
|
"def sma_crossover_perf(data, fast_window, slow_window):\n",
|
|
" fast_sma = data.run(\"sma\", fast_window, short_name=\"fast_sma\")\n",
|
|
" slow_sma = data.run(\"sma\", slow_window, short_name=\"slow_sma\")\n",
|
|
" entries = fast_sma.real_crossed_above(slow_sma)\n",
|
|
" exits = fast_sma.real_crossed_below(slow_sma)\n",
|
|
" pf = vbt.Portfolio.from_signals(\n",
|
|
" data, entries, exits, direction=\"both\")\n",
|
|
" return pf.sharpe_ratio"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "93b5c9e1-7d18-4268-9c2f-91a5404bba0a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"perf = sma_crossover_perf(\n",
|
|
" data[\"2020\":\"2020\"],\n",
|
|
" vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n",
|
|
" vbt.Param(np.arange(5, 50)),\n",
|
|
" _execute_kwargs=dict(\n",
|
|
" clear_cache=50,\n",
|
|
" collect_garbage=50\n",
|
|
" )\n",
|
|
")\n",
|
|
"perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "804d9d36-d0c6-46ab-b0c4-5d00285f6b36",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"perf.sort_values(ascending=False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5f9a5d58-aebe-4a34-8847-0bc91817e083",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"best_fast_window, best_slow_window = perf.idxmax()\n",
|
|
"sma_crossover_perf(\n",
|
|
" data[\"2021\":\"2021\"],\n",
|
|
" best_fast_window,\n",
|
|
" best_slow_window\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "933ad509-009a-4f7a-9de2-6e04d23a9c6f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"data[\"2021\":\"2021\"].run(\"from_holding\").sharpe_ratio"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "075e0d7a-bb55-4d69-bcbe-277497058dc2",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"start_index = data.index[0]\n",
|
|
"period = pd.Timedelta(days=180)\n",
|
|
"all_is_bounds = {}\n",
|
|
"all_is_bl_perf = {}\n",
|
|
"all_is_perf = {}\n",
|
|
"all_oos_bounds = {}\n",
|
|
"all_oos_bl_perf = {}\n",
|
|
"all_oos_perf = {}\n",
|
|
"split_idx = 0\n",
|
|
"period_idx = 0\n",
|
|
"\n",
|
|
"with vbt.ProgressBar() as pbar:\n",
|
|
" while start_index + 2 * period <= data.index[-1]:\n",
|
|
" pbar.set_prefix(str(start_index))\n",
|
|
"\n",
|
|
" is_start_index = start_index\n",
|
|
" is_end_index = start_index + period - pd.Timedelta(nanoseconds=1)\n",
|
|
" is_data = data[is_start_index : is_end_index]\n",
|
|
" is_bl_perf = is_data.run(\"from_holding\").sharpe_ratio\n",
|
|
" is_perf = sma_crossover_perf(\n",
|
|
" is_data,\n",
|
|
" vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n",
|
|
" vbt.Param(np.arange(5, 50)),\n",
|
|
" _execute_kwargs=dict(\n",
|
|
" clear_cache=50,\n",
|
|
" collect_garbage=50\n",
|
|
" )\n",
|
|
" )\n",
|
|
"\n",
|
|
" oos_start_index = start_index + period\n",
|
|
" oos_end_index = start_index + 2 * period - pd.Timedelta(nanoseconds=1)\n",
|
|
" oos_data = data[oos_start_index : oos_end_index]\n",
|
|
" oos_bl_perf = oos_data.run(\"from_holding\").sharpe_ratio\n",
|
|
" best_fw, best_sw = is_perf.idxmax()\n",
|
|
" oos_perf = sma_crossover_perf(oos_data, best_fw, best_sw)\n",
|
|
" oos_perf_index = is_perf.index[is_perf.index == (best_fw, best_sw)]\n",
|
|
" oos_perf = pd.Series([oos_perf], index=oos_perf_index)\n",
|
|
"\n",
|
|
" all_is_bounds[period_idx] = (is_start_index, is_end_index)\n",
|
|
" all_oos_bounds[period_idx + 1] = (oos_start_index, oos_end_index)\n",
|
|
" all_is_bl_perf[(split_idx, period_idx)] = is_bl_perf\n",
|
|
" all_oos_bl_perf[(split_idx, period_idx + 1)] = oos_bl_perf\n",
|
|
" all_is_perf[(split_idx, period_idx)] = is_perf\n",
|
|
" all_oos_perf[(split_idx, period_idx + 1)] = oos_perf\n",
|
|
" start_index = start_index + period\n",
|
|
" split_idx += 1\n",
|
|
" period_idx += 1\n",
|
|
" pbar.update()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e9cb8334-c44c-4580-819a-6eb501e6bb1b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"is_period_ranges = pd.DataFrame.from_dict(\n",
|
|
" all_is_bounds, \n",
|
|
" orient=\"index\",\n",
|
|
" columns=[\"start\", \"end\"]\n",
|
|
")\n",
|
|
"is_period_ranges.index.name = \"period\"\n",
|
|
"oos_period_ranges = pd.DataFrame.from_dict(\n",
|
|
" all_oos_bounds, \n",
|
|
" orient=\"index\",\n",
|
|
" columns=[\"start\", \"end\"]\n",
|
|
")\n",
|
|
"oos_period_ranges.index.name = \"period\"\n",
|
|
"period_ranges = pd.concat((is_period_ranges, oos_period_ranges))\n",
|
|
"period_ranges = period_ranges.drop_duplicates()\n",
|
|
"print(period_ranges)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9800c089-7514-4a16-a72c-65ab7455ae26",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"is_bl_perf = pd.Series(all_is_bl_perf)\n",
|
|
"is_bl_perf.index.names = [\"split\", \"period\"]\n",
|
|
"oos_bl_perf = pd.Series(all_oos_bl_perf)\n",
|
|
"oos_bl_perf.index.names = [\"split\", \"period\"]\n",
|
|
"bl_perf = pd.concat((\n",
|
|
" is_bl_perf.vbt.select_levels(\"period\"), \n",
|
|
" oos_bl_perf.vbt.select_levels(\"period\")\n",
|
|
"))\n",
|
|
"bl_perf = bl_perf.drop_duplicates()\n",
|
|
"bl_perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "44f04653-946d-47ec-8857-f063f07e7cc3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"is_perf = pd.concat(all_is_perf, names=[\"split\", \"period\"])\n",
|
|
"is_perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f20ae091-36b8-4811-b789-2a99dc9f0f81",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"oos_perf = pd.concat(all_oos_perf, names=[\"split\", \"period\"])\n",
|
|
"oos_perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e73f8adc-bf80-4955-a86e-ebcb5f8254b5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"is_best_mask = is_perf.index.vbt.drop_levels(\"period\").isin(\n",
|
|
" oos_perf.index.vbt.drop_levels(\"period\"))\n",
|
|
"is_best_perf = is_perf[is_best_mask]\n",
|
|
"is_best_perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b77ffdd0-dbd2-4473-9eb4-ad7d36fc7625",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(pd.concat((\n",
|
|
" is_perf.describe(),\n",
|
|
" is_best_perf.describe(),\n",
|
|
" is_bl_perf.describe(),\n",
|
|
" oos_perf.describe(),\n",
|
|
" oos_bl_perf.describe()\n",
|
|
"), axis=1, keys=[\n",
|
|
" \"IS\", \n",
|
|
" \"IS (Best)\", \n",
|
|
" \"IS (Baseline)\", \n",
|
|
" \"OOS (Test)\", \n",
|
|
" \"OOS (Baseline)\"\n",
|
|
"]))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a0f37d37-8051-4f48-b6bd-ad9ee882bd0b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"fig = is_perf.vbt.boxplot(\n",
|
|
" by_level=\"period\",\n",
|
|
" trace_kwargs=dict(\n",
|
|
" line=dict(color=\"lightskyblue\"), \n",
|
|
" opacity=0.4,\n",
|
|
" showlegend=False\n",
|
|
" ),\n",
|
|
" xaxis_title=\"Period\", \n",
|
|
" yaxis_title=\"Sharpe\",\n",
|
|
")\n",
|
|
"fig = is_best_perf.vbt.select_levels(\"period\").vbt.plot(\n",
|
|
" trace_kwargs=dict(\n",
|
|
" name=\"Best\", \n",
|
|
" line=dict(color=\"limegreen\", dash=\"dash\")\n",
|
|
" ), \n",
|
|
" fig=fig\n",
|
|
")\n",
|
|
"fig = bl_perf.vbt.plot(\n",
|
|
" trace_kwargs=dict(\n",
|
|
" name=\"Baseline\", \n",
|
|
" line=dict(color=\"orange\", dash=\"dash\")\n",
|
|
" ), \n",
|
|
" fig=fig\n",
|
|
")\n",
|
|
"fig = oos_perf.vbt.select_levels(\"period\").vbt.plot(\n",
|
|
" trace_kwargs=dict(\n",
|
|
" name=\"Test\", \n",
|
|
" line=dict(color=\"orangered\")\n",
|
|
" ), \n",
|
|
" fig=fig\n",
|
|
")\n",
|
|
"fig.show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a3e52247-958a-40ba-af30-7a5f1a55b744",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"is_perf_split6 = is_perf.xs(6, level=\"split\")\n",
|
|
"is_perf_split6.describe()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "53a0d648-a49e-48c5-b7a2-23fb8b18db03",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"first_left_bound = period_ranges.loc[6, \"start\"]\n",
|
|
"first_right_bound = period_ranges.loc[6, \"end\"]\n",
|
|
"data[first_left_bound : first_right_bound].plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "11a80453-5f53-4a7e-92b8-79834dd546af",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"oos_perf.xs(6, level=\"period\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8b01f9be-a5b8-433d-aab3-c0c51219ce56",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"is_perf_split6.quantile(0.25)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c0b946fb-81de-4183-9d47-9defc4f0f44e",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Splitter"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "113d8739-60b1-40d3-b031-984d7c7adf17",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter = vbt.Splitter.from_rolling(\n",
|
|
" data.index, \n",
|
|
" length=360, \n",
|
|
" split=0.5,\n",
|
|
" set_labels=[\"IS\", \"OOS\"]\n",
|
|
")\n",
|
|
"splitter.plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "9e0edb2e-8e5b-4554-8c3b-8a84b54279fe",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Schema"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "92af91ef-5b3e-4869-9ffd-cf5451714e94",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(splitter.splits)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "31376ef0-288f-461e-8fa7-2023c85248bf",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.index"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ed969ea9-de5b-4501-9093-7628fb3241ae",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.wrapper.index"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c204a8e8-a640-4056-bbdd-2bfb81bbd7a7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.wrapper.columns"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ac099a17-7a4a-4227-8782-f71f089e5943",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"oos_splitter = splitter[\"OOS\"]\n",
|
|
"print(oos_splitter.splits)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "f043f402-5a79-4a74-a355-12bac03e0235",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Range format"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "87ab6012-b22b-4a7d-81f7-0b2d8f6ffce9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"index = vbt.date_range(\"2020\", periods=14)\n",
|
|
"index[slice(1, 7)]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8c6462f8-0f46-4237-a499-b63cd961b55b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"index[1], index[6]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c2289e7e-6dbc-46cd-9026-09716dd283c4",
|
|
"metadata": {},
|
|
"source": [
|
|
"##### Relative"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "980722dd-e524-4ab7-9e75-4871fdbcf31d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"rel_range = vbt.RelRange(offset=10, length=40)\n",
|
|
"rel_range"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9e6c987f-ead6-47f2-978a-2c46bbe995b0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"rel_range.to_slice(total_len=len(splitter.index), prev_end=100)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "f835ba92-9d19-484f-aead-6c11c99a4ad6",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Array format"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "41c25ca4-bf22-4e11-bade-d115f4b692f0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"index = vbt.date_range(\"2020\", \"2021\", freq=\"1min\")\n",
|
|
"range_ = np.arange(len(index))\n",
|
|
"range_.nbytes / 1024 / 1024"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "91d67769-695f-4a29-83f6-1b09d77f9397",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"range_ = np.full(len(index), True)\n",
|
|
"range_.nbytes / 1024 / 1024"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "95a932ae-317d-4168-81ba-4045716d6eda",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.splits_arr.dtype"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f73dab8c-8488-48bc-8824-9f263116fa9b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"id(slice(0, 180, None))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "328d2ec2-d2cc-4550-b0ae-273287cdd05f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"range_00 = np.arange(0, 5)\n",
|
|
"range_01 = np.arange(5, 15)\n",
|
|
"range_10 = np.arange(15, 30)\n",
|
|
"range_11 = np.arange(30, 50)\n",
|
|
"\n",
|
|
"ind_splitter = vbt.Splitter.from_splits(\n",
|
|
" data.index,\n",
|
|
" [[range_00, range_01], [range_10, range_11]],\n",
|
|
" fix_ranges=False\n",
|
|
")\n",
|
|
"print(ind_splitter.splits)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "deae0ac0-0218-4835-aedd-6dbc5696cc75",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ind_splitter.splits.loc[0, \"set_1\"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "35f60dad-3a36-4c62-b07d-c36fb7a1b9da",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ind_splitter.splits.loc[0, \"set_1\"].range_"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "6326885f-9be3-4416-a868-187bfc2808d6",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Preparation"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "79a85a1f-eae2-49ff-b239-574ff90eadc4",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Splits"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "acad6284-91bb-48d0-86bd-f3324236fa40",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.split_range(\n",
|
|
" slice(None),\n",
|
|
" (vbt.RelRange(length=0.75), vbt.RelRange()),\n",
|
|
" index=data.index\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4674b573-34fd-4d15-bc81-0adda3784086",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.split_range(\n",
|
|
" slice(None),\n",
|
|
" (vbt.RelRange(length=0.75), vbt.RelRange())\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "244f8814-d37e-4804-97fc-4586223f308d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"data[slice(0, 1426, None)]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a2637af2-a0d0-47bf-bf97-a9cfc9143a06",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.split_range(\n",
|
|
" slice(None), \n",
|
|
" 0.75, \n",
|
|
" index=data.index\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d79151bf-1d01-436b-a4b4-3f73e1016e4f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.split_range(\n",
|
|
" slice(None), \n",
|
|
" -0.25,\n",
|
|
" index=data.index\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ab0e6009-1b0e-4f0f-838a-cfe4a87f7406",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"int(0.75 * len(data.index))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c87ffde6-b565-457a-bb97-0be21789705e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"len(data.index) - int(0.25 * len(data.index))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "653cc6d7-2236-4b12-ba92-a115cab15656",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.split_range(\n",
|
|
" slice(None), \n",
|
|
" (vbt.RelRange(), vbt.RelRange(length=0.25)),\n",
|
|
" backwards=True,\n",
|
|
" index=data.index\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0065dda0-0bfd-4ce0-b6bc-693c7f0960a5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.split_range(\n",
|
|
" slice(None), \n",
|
|
" (1.0, 30), \n",
|
|
" backwards=True,\n",
|
|
" index=data.index\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a4c1bc39-70e6-4b94-97d5-1472c705e70b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.split_range(\n",
|
|
" slice(None), \n",
|
|
" (\n",
|
|
" vbt.RelRange(length=0.4, length_space=\"all\"), \n",
|
|
" vbt.RelRange(length=0.4, length_space=\"all\"),\n",
|
|
" vbt.RelRange()\n",
|
|
" ),\n",
|
|
" index=data.index\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d2c201c9-f114-4f3b-ad47-bcee006d7953",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.split_range(\n",
|
|
" slice(None),\n",
|
|
" (vbt.RelRange(length=0.75), vbt.RelRange(offset=1)),\n",
|
|
" index=data.index\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6f23a330-cb2f-4a15-8a09-08504c4cd48a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.split_range(\n",
|
|
" slice(None), \n",
|
|
" (\n",
|
|
" vbt.RelRange(length=0.75), \n",
|
|
" vbt.RelRange(length=1, is_gap=True),\n",
|
|
" vbt.RelRange()\n",
|
|
" ),\n",
|
|
" index=data.index\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "958e0e1c-2735-4568-b05b-b29c8b4f7560",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.split_range(\n",
|
|
" slice(None), \n",
|
|
" (np.array([3, 4, 5]), np.array([6, 8, 10])),\n",
|
|
" index=data.index\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a60e0aaa-d581-4530-a747-3067c8678650",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.split_range(\n",
|
|
" slice(None), \n",
|
|
" (np.array([3, 4, 5]), np.array([6, 8, 10])),\n",
|
|
" range_format=\"indices\",\n",
|
|
" index=data.index\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4249fa3b-ef6f-41b7-8d8d-81723e6ce48a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.split_range(\n",
|
|
" slice(None), \n",
|
|
" (slice(\"2020\", \"2021\"), slice(\"2021\", \"2022\")),\n",
|
|
" index=data.index\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "bfbae80f-a314-41f9-8c49-2111f4df01d9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"data.index[867:1233]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "debcb754-e691-4e59-b318-61486af3fbf4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"data.index[1233:1598]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7d1340f2-d1b9-4fbe-9898-d41620e3d30d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.split_range(\n",
|
|
" slice(None), \n",
|
|
" (\n",
|
|
" vbt.RelRange(length=\"180 days\"), \n",
|
|
" vbt.RelRange(offset=\"1 day\", length=\"90 days\")\n",
|
|
" ),\n",
|
|
" index=data.index\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "8638db99-b5a3-4ce8-adcc-f3ebf7c59b71",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Method"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b6a99b43-4326-4d37-85fc-2f6b0076ebd4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"manual_splitter = vbt.Splitter.from_splits(\n",
|
|
" data.index,\n",
|
|
" [\n",
|
|
" (vbt.RelRange(), vbt.RelRange(offset=0.5, length=0.25, length_space=\"all\")),\n",
|
|
" (vbt.RelRange(), vbt.RelRange(offset=0.25, length=0.25, length_space=\"all\")),\n",
|
|
" (vbt.RelRange(), vbt.RelRange(offset=0, length=0.25, length_space=\"all\")),\n",
|
|
" ],\n",
|
|
" split_range_kwargs=dict(backwards=True),\n",
|
|
" set_labels=[\"IS\", \"OOS\"]\n",
|
|
")\n",
|
|
"print(manual_splitter.splits)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "871ceeb6-533d-48ce-803c-49106fc6d807",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"manual_splitter.plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "22246385-ae17-47db-8815-71788c60453d",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Generation"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "e6dbecb8-fd62-4a3d-9682-24deb81a8da0",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Rolling"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d0ab8ccc-dfe7-4612-8a39-3017795e0486",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.from_rolling(\n",
|
|
" data.index,\n",
|
|
" length=360,\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "02d5d230-30c0-4156-a493-6ef4157d727a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.from_rolling(\n",
|
|
" data.index, \n",
|
|
" length=360,\n",
|
|
" offset=90\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ba270114-7b18-45d0-a542-7bcfe3c0ddc3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.from_rolling(\n",
|
|
" data.index, \n",
|
|
" length=360,\n",
|
|
" offset=-0.5\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f47fb9c2-eebd-40f0-b901-da3dd54ae721",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.from_rolling(\n",
|
|
" data.index, \n",
|
|
" length=360,\n",
|
|
" split=0.5\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "66314ec5-2a84-4343-8b08-9712bccc2d37",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.from_rolling(\n",
|
|
" data.index,\n",
|
|
" length=360,\n",
|
|
" split=0.5,\n",
|
|
" offset_anchor_set=None\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2c0661b1-df61-4792-9da6-720d6866e7c5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.from_n_rolling(\n",
|
|
" data.index,\n",
|
|
" n=5,\n",
|
|
" split=0.5\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "34c752b3-ed6c-4752-b835-e6a0003ff1e5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.from_n_rolling(\n",
|
|
" data.index,\n",
|
|
" n=3,\n",
|
|
" length=360,\n",
|
|
" split=0.5\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1b40afcc-b3d1-47c3-9f84-f30d1840333e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.from_n_rolling(\n",
|
|
" data.index,\n",
|
|
" n=7,\n",
|
|
" length=360,\n",
|
|
" split=0.5\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "21f304b2-205c-4cb3-a231-905e266d437a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.from_expanding(\n",
|
|
" data.index, \n",
|
|
" min_length=360,\n",
|
|
" offset=180,\n",
|
|
" split=-180\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "59a2a68e-b863-4526-aa79-6694b825ac4f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.from_n_expanding(\n",
|
|
" data.index, \n",
|
|
" n=5,\n",
|
|
" min_length=360,\n",
|
|
" split=-180\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "2bd6889b-8da8-4bc6-be03-23a4e7eca992",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Anchored"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1ef7b028-471b-4033-a3c4-1aaa3354a237",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.from_ranges(\n",
|
|
" data.index,\n",
|
|
" every=\"Y\",\n",
|
|
" split=0.5\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2eed590b-93be-4ae5-967d-8b9c04bf3990",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.from_ranges(\n",
|
|
" data.index,\n",
|
|
" every=\"Q\",\n",
|
|
" lookback_period=\"Y\",\n",
|
|
" split=0.5\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "69154548-b1e8-4ecd-95fd-9be2f5466ed4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.from_ranges(\n",
|
|
" data.index,\n",
|
|
" every=\"Q\",\n",
|
|
" lookback_period=\"Y\",\n",
|
|
" split=(\n",
|
|
" vbt.RepEval(\"index.month != index.month[-1]\"),\n",
|
|
" vbt.RepEval(\"index.month == index.month[-1]\")\n",
|
|
" )\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1562819f-2715-41b2-977e-6e046aba4e0b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def qyear(index):\n",
|
|
" return index.to_period(\"Q\")\n",
|
|
"\n",
|
|
"vbt.Splitter.from_ranges(\n",
|
|
" data.index,\n",
|
|
" start=0,\n",
|
|
" fixed_start=True,\n",
|
|
" every=\"Q\",\n",
|
|
" closed_end=True,\n",
|
|
" split=(\n",
|
|
" lambda index: qyear(index) != qyear(index)[-1],\n",
|
|
" lambda index: qyear(index) == qyear(index)[-1]\n",
|
|
" )\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "432485a6-f8f4-4970-8cec-56689fc1969d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.from_grouper(\n",
|
|
" data.index,\n",
|
|
" by=\"Y\",\n",
|
|
" split=0.5\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5cd1d516-094e-4f32-a202-6d49e700386c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def is_split_complete(index, split):\n",
|
|
" first_range = split[0]\n",
|
|
" first_index = index[first_range][0]\n",
|
|
" last_range = split[-1]\n",
|
|
" last_index = index[last_range][-1]\n",
|
|
" return first_index.is_year_start and last_index.is_year_end\n",
|
|
"\n",
|
|
"vbt.Splitter.from_grouper(\n",
|
|
" data.index,\n",
|
|
" by=\"Y\",\n",
|
|
" split=0.5,\n",
|
|
" split_check_template=vbt.RepFunc(is_split_complete)\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6cc38232-edfb-4395-899f-47ed3aa0723c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def format_split_labels(index, splits_arr):\n",
|
|
" years = map(lambda x: index[x[0]][0].year, splits_arr)\n",
|
|
" return pd.Index(years, name=\"split_year\")\n",
|
|
"\n",
|
|
"vbt.Splitter.from_grouper(\n",
|
|
" data.index,\n",
|
|
" by=\"Y\",\n",
|
|
" split=0.5,\n",
|
|
" split_check_template=vbt.RepFunc(is_split_complete),\n",
|
|
" split_labels=vbt.RepFunc(format_split_labels)\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "11d702be-5df5-42de-9491-9b38ac7aae06",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.from_grouper(\n",
|
|
" data.index,\n",
|
|
" by=data.index.year,\n",
|
|
" split=0.5,\n",
|
|
" split_check_template=vbt.RepFunc(is_split_complete)\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "38d4f68a-0372-48e5-bc1a-c45171942e7e",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Random"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e72d4e77-c387-4c7e-bcd5-07e8c15ac15c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.from_n_random(\n",
|
|
" data.index,\n",
|
|
" n=50,\n",
|
|
" min_length=360,\n",
|
|
" seed=42,\n",
|
|
" split=0.5\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "dbeea5da-bceb-4e8e-866b-2a0e608995f2",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vbt.Splitter.from_n_random(\n",
|
|
" data.index,\n",
|
|
" n=50,\n",
|
|
" min_length=60,\n",
|
|
" max_length=480,\n",
|
|
" seed=42,\n",
|
|
" split=0.5\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b2c41a6c-b579-499c-98af-b7332cdaa7ab",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def start_p_func(i, indices):\n",
|
|
" return indices / indices.sum()\n",
|
|
"\n",
|
|
"vbt.Splitter.from_n_random(\n",
|
|
" data.index,\n",
|
|
" n=50,\n",
|
|
" min_length=60,\n",
|
|
" max_length=480,\n",
|
|
" seed=42,\n",
|
|
" start_p_func=start_p_func,\n",
|
|
" split=0.5\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "bf44a54a-5349-4b2e-a92a-03ecdda01e00",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Scikit-learn"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3992f4a8-d943-4256-939f-6f3be3767df6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.model_selection import KFold\n",
|
|
"\n",
|
|
"vbt.Splitter.from_sklearn(\n",
|
|
" data.index, \n",
|
|
" KFold(n_splits=5)\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "43c93cc4-b494-4119-923a-c01e5e84b458",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Dynamic"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a55967e9-58b6-46a4-92eb-47de2c99ee40",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def split_func(index, prev_start):\n",
|
|
" if prev_start is None:\n",
|
|
" prev_start = index[0]\n",
|
|
" new_start = prev_start + pd.offsets.MonthBegin(1)\n",
|
|
" new_end = new_start + pd.DateOffset(years=1)\n",
|
|
" if new_end > index[-1] + index.freq:\n",
|
|
" return None\n",
|
|
" return [\n",
|
|
" slice(new_start, new_start + pd.offsets.MonthBegin(9)),\n",
|
|
" slice(new_start + pd.offsets.MonthBegin(9), new_end)\n",
|
|
" ]\n",
|
|
"\n",
|
|
"vbt.Splitter.from_split_func(\n",
|
|
" data.index,\n",
|
|
" split_func=split_func,\n",
|
|
" split_args=(vbt.Rep(\"index\"), vbt.Rep(\"prev_start\")),\n",
|
|
" range_bounds_kwargs=dict(index_bounds=True)\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3dcf9638-36e8-4304-99e4-c30e1970cfd5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_next_monday(from_date):\n",
|
|
" if from_date.weekday == 0 and from_date.ceil(\"H\").hour <= 9:\n",
|
|
" return from_date.floor(\"D\")\n",
|
|
" return from_date.floor(\"D\") + pd.offsets.Week(n=0, weekday=0)\n",
|
|
"\n",
|
|
"def get_next_business_range(from_date):\n",
|
|
" monday_0000 = get_next_monday(from_date)\n",
|
|
" monday_0900 = monday_0000 + pd.DateOffset(hours=9)\n",
|
|
" friday_1700 = monday_0900 + pd.DateOffset(days=4, hours=8)\n",
|
|
" return slice(monday_0900, friday_1700)\n",
|
|
"\n",
|
|
"def split_func(index, bounds):\n",
|
|
" if len(bounds) == 0:\n",
|
|
" from_date = index[0]\n",
|
|
" else:\n",
|
|
" from_date = bounds[-1][1][0]\n",
|
|
" train_range = get_next_business_range(from_date)\n",
|
|
" test_range = get_next_business_range(train_range.stop)\n",
|
|
" if test_range.stop > index[-1] + index.freq:\n",
|
|
" return None\n",
|
|
" return train_range, test_range\n",
|
|
"\n",
|
|
"vbt.Splitter.from_split_func(\n",
|
|
" vbt.date_range(\"2020-01\", \"2020-03\", freq=\"15min\"),\n",
|
|
" split_func=split_func,\n",
|
|
" split_args=(vbt.Rep(\"index\"), vbt.Rep(\"bounds\")),\n",
|
|
" range_bounds_kwargs=dict(index_bounds=True)\n",
|
|
").plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "4ab3b2d1-5fb5-4978-95e0-2ee627abe829",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Validation"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0b84d3f9-22a1-46b3-9593-4ebe18060fc1",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter = vbt.Splitter.from_ranges(\n",
|
|
" data.index,\n",
|
|
" every=\"Y\",\n",
|
|
" closed_end=True,\n",
|
|
" split=0.5,\n",
|
|
" set_labels=[\"IS\", \"OOS\"]\n",
|
|
")\n",
|
|
"splitter.plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "589b9b90-e825-4cf6-a402-4720fd82e6b0",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Bounds"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9aa53234-fab9-4db4-b0f6-955f8c66f467",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"bounds_arr = splitter.get_bounds_arr()\n",
|
|
"bounds_arr.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f364195f-9ef1-43ee-b8a9-af7835d147a2",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(bounds_arr)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b3051c7d-3186-49a4-9092-d837f79c09c8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"bounds = splitter.get_bounds(index_bounds=True)\n",
|
|
"bounds.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9d76d199-12db-4574-a8f4-0b87107d8a0d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(bounds)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a0818b81-9735-481d-b510-14140ba6c576",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"bounds.loc[(0, \"OOS\"), \"end\"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "21737903-0707-410c-a1c1-133178784fcf",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"bounds.loc[(1, \"IS\"), \"start\"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c5b5d61b-66fe-4057-b699-64d1863d061c",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Masks"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "32b1ad13-4523-41d9-b424-637f1b25b4a2",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"mask = splitter.get_mask()\n",
|
|
"mask.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a21e4d35-4403-4d5e-ad67-0086da9444ce",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(mask)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "328a93e7-7b14-42d8-acc6-3eba2a72ec4f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"mask[\"2021\":\"2021\"].any()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8bccca0a-65be-4810-bd5e-5222edfba6c4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(mask.resample(vbt.offset(\"Y\")).sum())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "629ea5be-73c7-4799-8d07-ab28a90df329",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"results = []\n",
|
|
"for mask in splitter.get_iter_split_masks():\n",
|
|
" results.append(mask.resample(vbt.offset(\"Y\")).sum())\n",
|
|
"print(pd.concat(results, axis=1, keys=splitter.split_labels))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "ec5cbff7-c630-4d35-a607-cf4f7c482f0f",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Coverage"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "76bb4bb3-027b-465f-be37-fb44cb567bc6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.get_split_coverage()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "071e5e14-def1-4133-8682-d4e214810079",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.get_set_coverage()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d885fac2-0a56-4e4e-81d0-52a29b8cd572",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.get_range_coverage()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7c4301ee-8998-44d5-b112-0e463f0213ea",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.get_coverage()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4d328277-71e9-49c1-8b54-d7ecf08682e2",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.index_bounds.loc[(2, \"OOS\"), \"start\"].is_leap_year"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a8e94289-4550-43bf-9cca-f13cb6e195ad",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.get_range_coverage(relative=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0c9e5444-4276-42bc-b0c6-0799c7606345",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.get_set_coverage(relative=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "cf6715aa-0a33-4202-9b38-3dc8f6b24ebc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.get_split_coverage(overlapping=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1567e6fe-85e5-4075-b4a4-f0490d69bfb4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.get_set_coverage(overlapping=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a1935522-f1f7-460d-939b-8134fe1287e1",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.get_coverage(overlapping=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2466892b-1521-4b7d-94bd-54ab21adf816",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.plot_coverage().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b4f46cc9-edeb-42bb-b9d1-fbb26d9510e9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(splitter.get_overlap_matrix(by=\"range\", normalize=False))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "0f125754-29e4-4d22-9f0f-defb3db268ce",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Grouping"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7af58745-fa11-4a7a-bd1e-833df3d3a8b4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(splitter.get_bounds(index_bounds=True, set_group_by=True))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "b5d73373-ed8d-4943-860f-05342ca2c511",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Manipulation"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2d1d4116-8e59-47d2-9364-c44aa908d2f0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter = vbt.Splitter.from_grouper(\n",
|
|
" data.index, \n",
|
|
" by=data.index.year.rename(\"split_year\")\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6b9c5b86-7aec-4496-94d9-4c592766e167",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.stats()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0aa022d7-6eb4-471f-aa5a-ab26c5deee58",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.plots().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "98537c1f-efbb-4574-9f57-5bdb07b452b9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter = splitter.iloc[1:-1]\n",
|
|
"splitter.stats()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a442aad5-ead3-4d4e-b049-7e3090200ddf",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def new_split(index):\n",
|
|
" return [\n",
|
|
" np.isin(index.quarter, [1, 2]), \n",
|
|
" index.quarter == 3, \n",
|
|
" index.quarter == 4\n",
|
|
" ]\n",
|
|
"\n",
|
|
"splitter = splitter.split_set(\n",
|
|
" vbt.RepFunc(new_split),\n",
|
|
" new_set_labels=[\"train\", \"valid\", \"test\"]\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a938abed-831a-4cb2-9729-9b015d819e38",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.stats()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0f9125b7-6d85-4117-ad6b-37b24bf3c50d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.plots().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "35f5cfe5-0626-4372-a90b-df68e155d994",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Homework"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3d9fb4df-5ca0-43f5-869c-4721c3993402",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter = splitter.merge_sets(columns=[\"valid\", \"test\"], new_set_label=\"test\")\n",
|
|
"splitter.plots().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "d00fdcdf-0bd1-49ed-ade9-411a2ae3d79c",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Applications"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "3ebb6158-e395-49e9-bcc8-cd258c17783f",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Taking"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "1aa23c97-e221-48ed-8a5e-484fda11e955",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Without stacking"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ed524fc3-b782-43dc-a8a6-0bced61a3708",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"close_slices = splitter.take(data.close)\n",
|
|
"close_slices"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "74d768fe-ce73-41c1-945f-c21fbb7fd662",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"close_slices[2020, \"test\"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "79ac7f33-08aa-4bb7-87d2-f8e80c94db0c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_total_return(sr):\n",
|
|
" return sr.vbt.to_returns().vbt.returns.total()\n",
|
|
"\n",
|
|
"close_slices.apply(get_total_return)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "02213852-3258-46d6-97a4-32a3a5f19820",
|
|
"metadata": {},
|
|
"source": [
|
|
"##### Complex objects"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c2d3208f-2454-4501-8cd2-e94694238a56",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"trendlb = data.run(\"trendlb\", 1.0, 0.5)\n",
|
|
"trendlb.plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4926dbe3-ecc6-445d-868a-46a88af5756c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"grouper = pd.Index(trendlb.labels.map({1: \"U\", 0: \"D\"}), name=\"trend\")\n",
|
|
"trend_splitter = vbt.Splitter.from_grouper(data.index, grouper)\n",
|
|
"trend_splitter.plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f41b604c-84ca-4ea2-a293-0d40878eb65c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"hold_pf = vbt.Portfolio.from_holding(data)\n",
|
|
"hold_returns_acc = hold_pf.returns_acc\n",
|
|
"\n",
|
|
"fast_sma, slow_sma = vbt.talib(\"SMA\").run_combs(\n",
|
|
" data.close, np.arange(5, 50), short_names=[\"fast_sma\", \"slow_sma\"])\n",
|
|
"entries = fast_sma.real_crossed_above(slow_sma)\n",
|
|
"exits = fast_sma.real_crossed_below(slow_sma)\n",
|
|
"strat_pf = vbt.Portfolio.from_signals(\n",
|
|
" data, entries, exits, direction=\"both\")\n",
|
|
"strat_returns_acc = strat_pf.returns_acc"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "84335fef-a4c9-44a3-8f68-61ab14e6d01d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"hold_returns_acc_slices = trend_splitter.take(hold_returns_acc)\n",
|
|
"strat_returns_acc_slices = trend_splitter.take(strat_returns_acc)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "faae3461-56b6-4f50-aad8-f196745375e3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"hold_returns_acc_slices[\"U\"].sharpe_ratio()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "91ae1996-5fce-459d-a08c-58950ca7ac83",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"strat_returns_acc_slices[\"U\"].sharpe_ratio().vbt.heatmap(\n",
|
|
" x_level=\"fast_sma_timeperiod\", \n",
|
|
" y_level=\"slow_sma_timeperiod\",\n",
|
|
" symmetric=True\n",
|
|
").show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ea0824b5-c6b5-48a5-9abd-3b8d582afeca",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"hold_returns_acc_slices[\"D\"].sharpe_ratio()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "dff164c5-5840-4bda-b25a-b290be490d2e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"strat_returns_acc_slices[\"D\"].sharpe_ratio().vbt.heatmap(\n",
|
|
" x_level=\"fast_sma_timeperiod\", \n",
|
|
" y_level=\"slow_sma_timeperiod\",\n",
|
|
" symmetric=True\n",
|
|
").show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "42039308-cbeb-4a20-928c-8c1b4588b84e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"trend_splitter = trend_splitter.break_up_splits(\"by_gap\", sort=True)\n",
|
|
"trend_splitter.plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6e564cf0-f138-4552-afc6-0ce34502e3e8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"strat_pf_slices = strat_pf.split(trend_splitter)\n",
|
|
"strat_pf_slices"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "247f3770-249d-4612-bb7c-4c3d64e141f9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"trend_range_perf = strat_pf_slices.apply(lambda pf: pf.sharpe_ratio)\n",
|
|
"median_trend_perf = trend_range_perf.median(axis=1)\n",
|
|
"median_trend_perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "483eaa0d-cde1-4c30-873e-cc8d668139f8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"trend_perf_ts = data.symbol_wrapper.fill().rename(\"trend_perf\")\n",
|
|
"for label, sr in trend_splitter.bounds.iterrows():\n",
|
|
" trend_perf_ts.iloc[sr[\"start\"]:sr[\"end\"]] = median_trend_perf[label]\n",
|
|
"data.close.vbt.overlay_with_heatmap(trend_perf_ts).show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "7f013d68-bbc6-43b4-ae38-0414781602a4",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Column stacking"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c899aa78-29d8-4833-b307-2949132e0b78",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"close_stacked = pd.concat(\n",
|
|
" close_slices.values.tolist(), \n",
|
|
" axis=1, \n",
|
|
" keys=close_slices.index\n",
|
|
")\n",
|
|
"print(close_stacked)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4e83e120-ee01-4664-81bb-39b4c084d3b6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"get_total_return(close_stacked)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "20abc610-b2a9-4ecd-a724-f3c267e71636",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"close_stacked = splitter.take(data.close, into=\"stacked\")\n",
|
|
"close_stacked.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5d67665d-ea55-400c-ace4-bd5a2adaef12",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"close_stacked = splitter.take(data.close, into=\"reset_stacked\")\n",
|
|
"print(close_stacked)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c6a65f09-264e-40d4-a01b-f8ef208e331d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"close_stacked = splitter.take(data.close, into=\"from_end_stacked\")\n",
|
|
"print(close_stacked)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "25c891ae-ffab-4ef1-a97d-0de12a1c0f63",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"close_stacked = splitter.take(data.close, into=\"reset_stacked_by_set\")\n",
|
|
"close_stacked"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "78f294b1-a07b-4f1e-a43f-bf6b33ec0d13",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(close_stacked[\"train\"])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "94613e5d-2c2f-48ba-9714-e95b6d706b7a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(182 * 8)\n",
|
|
"print(1461 * 8)\n",
|
|
"print(1 - 1456 / 11688)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f72feaad-a8d0-4249-9719-7e0f5811f9c5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"index_slices = splitter.take(data.index)\n",
|
|
"index_slices"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0e0b9776-b7a4-4dc2-bcf5-3f73c47d9d5d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"close_stacked_wb = splitter.take(\n",
|
|
" data.close, \n",
|
|
" into=\"reset_stacked_by_set\",\n",
|
|
" attach_bounds=\"index\",\n",
|
|
" right_inclusive=True\n",
|
|
")\n",
|
|
"print(close_stacked_wb[\"train\"])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "48b59d45-5ddc-4777-969b-1582b4660055",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"@vbt.parameterized(merge_func=\"concat\")\n",
|
|
"def set_sma_crossover_perf(close, fast_window, slow_window, freq):\n",
|
|
" fast_sma = vbt.talib(\"sma\").run(\n",
|
|
" close, fast_window, short_name=\"fast_sma\", hide_params=True) \n",
|
|
" slow_sma = vbt.talib(\"sma\").run(\n",
|
|
" close, slow_window, short_name=\"slow_sma\", hide_params=True) \n",
|
|
" entries = fast_sma.real_crossed_above(slow_sma)\n",
|
|
" exits = fast_sma.real_crossed_below(slow_sma)\n",
|
|
" pf = vbt.Portfolio.from_signals(\n",
|
|
" close, entries, exits, freq=freq, direction=\"both\")\n",
|
|
" return pf.sharpe_ratio"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "21ec3795-d820-4f7d-b6fa-de6710fac0cd",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"train_perf = set_sma_crossover_perf(\n",
|
|
" close_stacked[\"train\"],\n",
|
|
" vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n",
|
|
" vbt.Param(np.arange(5, 50)),\n",
|
|
" data.index.freq,\n",
|
|
" _execute_kwargs=dict(\n",
|
|
" clear_cache=50,\n",
|
|
" collect_garbage=50\n",
|
|
" )\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "63202321-0689-4aca-9e82-7ece4fd18a0e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"train_perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "75669ef5-ad2a-4122-8be9-c510ca252fb0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"train_perf.vbt.heatmap(\n",
|
|
" x_level=\"fast_window\",\n",
|
|
" y_level=\"slow_window\",\n",
|
|
" slider_level=\"split_year\",\n",
|
|
" symmetric=True\n",
|
|
").show_svg() # replace with show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "66734f37-624d-43ed-83ae-a9b33ace0085",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"@njit\n",
|
|
"def prox_median_nb(arr):\n",
|
|
" if (~np.isnan(arr)).sum() < 20:\n",
|
|
" return np.nan\n",
|
|
" return np.nanmedian(arr)\n",
|
|
"\n",
|
|
"prox_perf_list = []\n",
|
|
"for split_label, perf_sr in train_perf.groupby(\"split_year\"):\n",
|
|
" perf_df = perf_sr.vbt.unstack_to_df(0, [1, 2])\n",
|
|
" prox_perf_df = perf_df.vbt.proximity_apply(2, prox_median_nb)\n",
|
|
" prox_perf_sr = prox_perf_df.stack([0, 1])\n",
|
|
" prox_perf_list.append(prox_perf_sr.reindex(perf_sr.index))\n",
|
|
"\n",
|
|
"train_prox_perf = pd.concat(prox_perf_list)\n",
|
|
"train_prox_perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d64b2b46-598e-4211-8e66-fd2824156f5a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"train_prox_perf.vbt.heatmap(\n",
|
|
" x_level=\"fast_window\",\n",
|
|
" y_level=\"slow_window\",\n",
|
|
" slider_level=\"split_year\",\n",
|
|
" symmetric=True\n",
|
|
").show_svg() # replace with show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a2bd48a4-74c9-4e9d-a2a4-c1a726ca1619",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"best_params = train_prox_perf.groupby(\"split_year\").idxmax()\n",
|
|
"best_params = train_prox_perf[best_params].index\n",
|
|
"train_prox_perf[best_params]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c99e5a80-3fa4-453b-ad33-88c825f2dfa7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"test_perf = set_sma_crossover_perf(\n",
|
|
" vbt.RepEval(\n",
|
|
" \"test_close.iloc[:, [config_idx]]\", \n",
|
|
" context=dict(test_close=close_stacked[\"test\"])\n",
|
|
" ),\n",
|
|
" vbt.Param(best_params.get_level_values(\"fast_window\"), level=0),\n",
|
|
" vbt.Param(best_params.get_level_values(\"slow_window\"), level=0),\n",
|
|
" data.index.freq\n",
|
|
")\n",
|
|
"test_perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ee16a258-2788-402b-bf2b-c065254c1c3e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_index_sharpe(index):\n",
|
|
" return data.loc[index].run(\"from_holding\").sharpe_ratio\n",
|
|
"\n",
|
|
"index_slices.xs(\"test\", level=\"set\").apply(get_index_sharpe)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "dd73c429-db22-40ff-8a34-f773ae0e7762",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Row stacking"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e890c631-33b8-4822-914d-35432379cedf",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"block_size = int(3.15 * len(data.index) ** (1 / 3))\n",
|
|
"block_splitter = vbt.Splitter.from_rolling(\n",
|
|
" data.index, \n",
|
|
" length=block_size, \n",
|
|
" offset=1,\n",
|
|
" offset_anchor=\"prev_start\"\n",
|
|
")\n",
|
|
"block_splitter.n_splits"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5a04c2d4-c53a-4a42-9cf1-0f912b7df08e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"size = int(block_splitter.n_splits / block_size)\n",
|
|
"sample_splitter = block_splitter.shuffle_splits(size=size, replace=True)\n",
|
|
"sample_splitter.plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "42ae502d-3e72-458c-9d8a-09d475968013",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"returns = data.returns\n",
|
|
"sample_rets = sample_splitter.take(returns, into=\"stacked\", stack_axis=0)\n",
|
|
"sample_rets"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "59eb80cc-0738-4024-873b-98009964c5d4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"sample_rets.index = data.index[:len(sample_rets)]\n",
|
|
"sample_cumrets = data.close[0] * (sample_rets + 1).cumprod()\n",
|
|
"sample_cumrets.vbt.plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "78e88fe1-8433-4ce8-9beb-4fc24878745a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"samples_rets_list = []\n",
|
|
"for i in vbt.ProgressBar(range(1000)):\n",
|
|
" sample_spl = block_splitter.shuffle_splits(size=size, replace=True)\n",
|
|
" sample_rets = sample_spl.take(returns, into=\"stacked\", stack_axis=0)\n",
|
|
" sample_rets.index = returns.index[:len(sample_rets)]\n",
|
|
" sample_rets.name = i\n",
|
|
" samples_rets_list.append(sample_rets)\n",
|
|
"sample_rets_stacked = pd.concat(samples_rets_list, axis=1)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e909ffcc-d18f-4351-9626-714c81c18321",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"sample_sharpe = sample_rets_stacked.vbt.returns.sharpe_ratio()\n",
|
|
"sample_sharpe.vbt.boxplot(horizontal=True).show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0815456c-9b75-4ab8-b499-faa9947406ea",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"sample_sharpe.quantile(0.025), sample_sharpe.quantile(0.975)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "1fee36b8-a009-4b73-aeaa-18f918c92c55",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Applying"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b8ee38db-2b98-488c-b53c-f539e0bccedb",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.apply(\n",
|
|
" get_total_return,\n",
|
|
" vbt.Takeable(data.close),\n",
|
|
" merge_func=\"concat\"\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6d86ce45-9b6b-43e2-8e2e-79944cd4f413",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.apply(\n",
|
|
" get_total_return,\n",
|
|
" vbt.RepFunc(lambda range_: data.close[range_]),\n",
|
|
" merge_func=\"concat\"\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7ae1a59f-4466-4383-8a51-668c24f4321e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_total_return(range_, data):\n",
|
|
" return data.returns[range_].vbt.returns.total()\n",
|
|
"\n",
|
|
"splitter.apply(\n",
|
|
" get_total_return,\n",
|
|
" vbt.Rep(\"range_\"),\n",
|
|
" data,\n",
|
|
" merge_func=\"concat\"\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "35f59608-ee6f-4e16-a9a7-ad9dcaaab0cd",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_total_return(data):\n",
|
|
" return data.returns.vbt.returns.total()\n",
|
|
"\n",
|
|
"splitter.apply(\n",
|
|
" get_total_return,\n",
|
|
" vbt.Takeable(data),\n",
|
|
" merge_func=\"concat\"\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "11b5fa1d-a6a7-4703-a897-be517ecc396b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.apply(\n",
|
|
" get_total_return,\n",
|
|
" vbt.Takeable(data),\n",
|
|
" set_group_by=True,\n",
|
|
" merge_func=\"concat\"\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "62816646-c23f-4c12-8dfb-c0e327badeb6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"splitter.apply(\n",
|
|
" get_total_return,\n",
|
|
" vbt.Takeable(data),\n",
|
|
" split=[2020, 2021],\n",
|
|
" set_=\"train\",\n",
|
|
" merge_func=\"concat\"\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f8f03eb7-2835-45c7-9879-7b951c7a4647",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"train_perf = splitter.apply(\n",
|
|
" sma_crossover_perf,\n",
|
|
" vbt.Takeable(data),\n",
|
|
" vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n",
|
|
" vbt.Param(np.arange(5, 50)),\n",
|
|
" _execute_kwargs=dict(\n",
|
|
" clear_cache=50,\n",
|
|
" collect_garbage=50\n",
|
|
" ),\n",
|
|
" set_=\"train\",\n",
|
|
" merge_func=\"concat\",\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0ebb869c-1edf-491f-ad26-bcba207c0f26",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"train_perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "261c2dd1-2f5e-480e-a2c8-7593ac26771b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"best_params = train_perf.groupby(\"split_year\").idxmax()\n",
|
|
"best_params = train_perf[best_params].index\n",
|
|
"train_perf[best_params]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1749022b-1d3c-4ccb-b6a1-9fe0a06e52a5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"best_fast_windows = best_params.get_level_values(\"fast_window\")\n",
|
|
"best_slow_windows = best_params.get_level_values(\"slow_window\")\n",
|
|
"\n",
|
|
"test_perf = splitter.apply(\n",
|
|
" sma_crossover_perf,\n",
|
|
" vbt.Takeable(data),\n",
|
|
" vbt.RepFunc(lambda split_idx: best_fast_windows[split_idx]),\n",
|
|
" vbt.RepFunc(lambda split_idx: best_slow_windows[split_idx]),\n",
|
|
" set_=\"test\",\n",
|
|
" merge_func=\"concat\"\n",
|
|
")\n",
|
|
"test_perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "432e35f6-98b2-4278-9616-54a2206eb182",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Iteration schemes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e079d58d-f171-41ef-b685-80b502cb31b3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def cv_sma_crossover(\n",
|
|
" data, \n",
|
|
" fast_windows, \n",
|
|
" slow_windows, \n",
|
|
" split_idx,\n",
|
|
" set_idx,\n",
|
|
" train_perf_list\n",
|
|
"):\n",
|
|
" if set_idx == 0:\n",
|
|
" train_perf = sma_crossover_perf(\n",
|
|
" data,\n",
|
|
" vbt.Param(fast_windows, condition=\"x < slow_window\"),\n",
|
|
" vbt.Param(slow_windows),\n",
|
|
" _execute_kwargs=dict(\n",
|
|
" clear_cache=50,\n",
|
|
" collect_garbage=50\n",
|
|
" )\n",
|
|
" )\n",
|
|
" train_perf_list.append(train_perf)\n",
|
|
" best_params = train_perf.idxmax()\n",
|
|
" return train_perf[[best_params]]\n",
|
|
" else:\n",
|
|
" train_perf = train_perf_list[split_idx]\n",
|
|
" best_params = train_perf.idxmax()\n",
|
|
" test_perf = sma_crossover_perf(\n",
|
|
" data,\n",
|
|
" vbt.Param([best_params[0]]),\n",
|
|
" vbt.Param([best_params[1]]),\n",
|
|
" )\n",
|
|
" return test_perf\n",
|
|
" \n",
|
|
"train_perf_list = []\n",
|
|
"cv_perf = splitter.apply(\n",
|
|
" cv_sma_crossover,\n",
|
|
" vbt.Takeable(data),\n",
|
|
" np.arange(5, 50),\n",
|
|
" np.arange(5, 50),\n",
|
|
" vbt.Rep(\"split_idx\"),\n",
|
|
" vbt.Rep(\"set_idx\"),\n",
|
|
" train_perf_list,\n",
|
|
" iteration=\"set_major\",\n",
|
|
" merge_func=\"concat\",\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c7f1f4a1-fe05-44c2-b2c3-fcbb432bdeaa",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"train_perf = pd.concat(train_perf_list, keys=splitter.split_labels)\n",
|
|
"train_perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "dd134bf0-8c5d-4365-871b-a5a0aa232e4b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"cv_perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "8da1d36b-6b51-4429-be5c-ed0acbcfdf24",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Merging"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "23da7a75-db32-451d-8a7d-8bf729584e99",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_entries_and_exits(data, fast_window, slow_window):\n",
|
|
" fast_sma = data.run(\"sma\", fast_window, short_name=\"fast_sma\")\n",
|
|
" slow_sma = data.run(\"sma\", slow_window, short_name=\"slow_sma\")\n",
|
|
" entries = fast_sma.real_crossed_above(slow_sma)\n",
|
|
" exits = fast_sma.real_crossed_below(slow_sma)\n",
|
|
" return entries, exits\n",
|
|
"\n",
|
|
"entries, exits = splitter.apply(\n",
|
|
" get_entries_and_exits,\n",
|
|
" vbt.Takeable(data),\n",
|
|
" 20,\n",
|
|
" 30,\n",
|
|
" merge_func=\"column_stack\"\n",
|
|
")\n",
|
|
"\n",
|
|
"print(entries)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2822ee69-5a35-4700-b025-442d2e9396fc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"entries, exits = splitter.apply(\n",
|
|
" get_entries_and_exits,\n",
|
|
" vbt.Takeable(data),\n",
|
|
" 20,\n",
|
|
" 30,\n",
|
|
" merge_all=False,\n",
|
|
" merge_func=\"row_stack\"\n",
|
|
")\n",
|
|
"\n",
|
|
"entries.loc[2018]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "80266dbb-b700-4389-8956-5a253b18b816",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_signal_count(*args, **kwargs):\n",
|
|
" entries, exits = get_entries_and_exits(*args, **kwargs)\n",
|
|
" return entries.vbt.signals.total(), exits.vbt.signals.total()\n",
|
|
"\n",
|
|
"entry_count, exit_count = splitter.apply(\n",
|
|
" get_signal_count,\n",
|
|
" vbt.Takeable(data),\n",
|
|
" 20,\n",
|
|
" 30,\n",
|
|
" merge_func=\"concat\",\n",
|
|
" attach_bounds=\"index\"\n",
|
|
")\n",
|
|
"entry_count"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "56842a7c-290c-41ca-8e39-2fe148b9a352",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def plot_entries_and_exits(results, data, keys):\n",
|
|
" set_labels = keys.get_level_values(\"set\")\n",
|
|
" fig = data.plot(plot_volume=False)\n",
|
|
" train_seen = False\n",
|
|
" test_seen = False\n",
|
|
"\n",
|
|
" for i in range(len(results)):\n",
|
|
" entries, exits = results[i]\n",
|
|
" set_label = set_labels[i]\n",
|
|
" if set_label == \"train\":\n",
|
|
" entries.vbt.signals.plot_as_entries(\n",
|
|
" data.close,\n",
|
|
" trace_kwargs=dict(\n",
|
|
" marker=dict(color=\"limegreen\"), \n",
|
|
" name=f\"Entries ({set_label})\",\n",
|
|
" legendgroup=f\"Entries ({set_label})\",\n",
|
|
" showlegend=not train_seen\n",
|
|
" ),\n",
|
|
" fig=fig\n",
|
|
" ),\n",
|
|
" exits.vbt.signals.plot_as_exits(\n",
|
|
" data.close,\n",
|
|
" trace_kwargs=dict(\n",
|
|
" marker=dict(color=\"orange\"), \n",
|
|
" name=f\"Exits ({set_label})\",\n",
|
|
" legendgroup=f\"Exits ({set_label})\",\n",
|
|
" showlegend=not train_seen\n",
|
|
" ),\n",
|
|
" fig=fig\n",
|
|
" )\n",
|
|
" train_seen = True\n",
|
|
" else:\n",
|
|
" entries.vbt.signals.plot_as_entries(\n",
|
|
" data.close,\n",
|
|
" trace_kwargs=dict(\n",
|
|
" marker=dict(color=\"skyblue\"), \n",
|
|
" name=f\"Entries ({set_label})\",\n",
|
|
" legendgroup=f\"Entries ({set_label})\",\n",
|
|
" showlegend=not test_seen\n",
|
|
" ),\n",
|
|
" fig=fig\n",
|
|
" ),\n",
|
|
" exits.vbt.signals.plot_as_exits(\n",
|
|
" data.close,\n",
|
|
" trace_kwargs=dict(\n",
|
|
" marker=dict(color=\"magenta\"), \n",
|
|
" name=f\"Exits ({set_label})\",\n",
|
|
" legendgroup=f\"Entries ({set_label})\",\n",
|
|
" showlegend=not test_seen\n",
|
|
" ),\n",
|
|
" fig=fig\n",
|
|
" )\n",
|
|
" test_seen = True\n",
|
|
" return fig\n",
|
|
"\n",
|
|
"splitter.apply(\n",
|
|
" get_entries_and_exits,\n",
|
|
" vbt.Takeable(data),\n",
|
|
" 20,\n",
|
|
" 30,\n",
|
|
" merge_func=plot_entries_and_exits,\n",
|
|
" merge_kwargs=dict(data=data, keys=vbt.Rep(\"keys\")),\n",
|
|
").show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "93378394-5bd8-49e4-ad0a-e9d0eb25f58d",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Decorators"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "63999a23-9b3e-41e9-ba9e-aefc4775c60c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"@vbt.split(splitter=splitter)\n",
|
|
"def get_split_total_return(data):\n",
|
|
" return data.returns.vbt.returns.total()\n",
|
|
"\n",
|
|
"get_split_total_return(vbt.Takeable(data))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "47f5be24-bd93-4f16-9849-2774d7d29617",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_total_return(data):\n",
|
|
" return data.returns.vbt.returns.total()\n",
|
|
"\n",
|
|
"get_split_total_return = vbt.split(\n",
|
|
" get_total_return, \n",
|
|
" splitter=splitter\n",
|
|
")\n",
|
|
"get_split_total_return(vbt.Takeable(data))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e508211b-3517-4925-ae8d-f248e94579d3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"@vbt.split\n",
|
|
"def get_split_total_return(data):\n",
|
|
" return data.returns.vbt.returns.total()\n",
|
|
"\n",
|
|
"get_split_total_return(vbt.Takeable(data), _splitter=splitter)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "bfaab288-43f0-454e-88d2-4b8b36b3932c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"get_split_total_return(\n",
|
|
" vbt.Takeable(data.loc[\"2020\":\"2020\"]), \n",
|
|
" _splitter=\"from_rolling\", \n",
|
|
" _splitter_kwargs=dict(length=\"30d\")\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "20d941b7-4419-4dfb-81f0-72f1b0839888",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"get_total_return_by_month = vbt.split(\n",
|
|
" get_total_return,\n",
|
|
" splitter=\"from_grouper\", \n",
|
|
" splitter_kwargs=dict(by=vbt.RepEval(\"index.to_period('M')\")),\n",
|
|
" takeable_args=[\"data\"]\n",
|
|
")\n",
|
|
"\n",
|
|
"get_total_return_by_month(data.loc[\"2020\":\"2020\"])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d61a83eb-c72c-4257-a195-a34a82378f56",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"cv_sma_crossover_perf = vbt.split(\n",
|
|
" sma_crossover_perf, \n",
|
|
" splitter=\"from_single\",\n",
|
|
" splitter_kwargs=dict(split=0.6, set_labels=[\"train\", \"test\"]),\n",
|
|
" takeable_args=[\"data\"],\n",
|
|
" merge_func=\"concat\",\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "27387cd2-f223-4cbd-b2e5-65aef60e512c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"train_perf = cv_sma_crossover_perf(\n",
|
|
" data.loc[\"2020\":\"2021\"],\n",
|
|
" vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n",
|
|
" vbt.Param(np.arange(5, 50)),\n",
|
|
" p_execute_kwargs=dict(\n",
|
|
" clear_cache=50,\n",
|
|
" collect_garbage=50\n",
|
|
" ),\n",
|
|
" _forward_kwargs_as={\n",
|
|
" \"p_execute_kwargs\": \"_execute_kwargs\"\n",
|
|
" },\n",
|
|
" _apply_kwargs=dict(set_=\"train\")\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9878d18a-7b05-4b83-9ca5-8d291dae2d5d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"train_perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9b4665fe-f78c-4257-922d-f05b351171ec",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"test_perf = cv_sma_crossover_perf(\n",
|
|
" data.loc[\"2020\":\"2021\"],\n",
|
|
" train_perf.idxmax()[0],\n",
|
|
" train_perf.idxmax()[1],\n",
|
|
" _apply_kwargs=dict(set_=\"test\")\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7a921ee5-4dd4-4c8e-be52-430a912059a6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"test_perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8ebf0a46-778d-4186-bc7a-dfa4e897c872",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"@njit(nogil=True)\n",
|
|
"def sma_crossover_perf_nb(close, fast_window, slow_window, ann_factor):\n",
|
|
" fast_sma = vbt.nb.ma_nb(close, fast_window)\n",
|
|
" slow_sma = vbt.nb.ma_nb(close, slow_window)\n",
|
|
" entries = vbt.nb.crossed_above_nb(fast_sma, slow_sma)\n",
|
|
" exits = vbt.nb.crossed_above_nb(slow_sma, fast_sma)\n",
|
|
" sim_out = vbt.pf_nb.from_signals_nb(\n",
|
|
" target_shape=close.shape,\n",
|
|
" group_lens=np.full(close.shape[1], 1),\n",
|
|
" close=close,\n",
|
|
" long_entries=entries,\n",
|
|
" short_entries=exits,\n",
|
|
" save_returns=True\n",
|
|
" )\n",
|
|
" return vbt.ret_nb.sharpe_ratio_nb(\n",
|
|
" sim_out.in_outputs.returns, \n",
|
|
" ann_factor\n",
|
|
" )"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "01f161db-2548-4627-a385-40a2d84c9034",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"sma_crossover_perf_nb(vbt.to_2d_array(data.close), 20, 30, 365)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9e35b428-f5e6-4000-afc7-8ef9bd3aeffc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"cv_sma_crossover_perf = vbt.cv_split(\n",
|
|
" sma_crossover_perf_nb,\n",
|
|
" splitter=\"from_rolling\",\n",
|
|
" splitter_kwargs=dict(\n",
|
|
" length=360, \n",
|
|
" split=0.5, \n",
|
|
" set_labels=[\"train\", \"test\"]\n",
|
|
" ),\n",
|
|
" takeable_args=[\"close\"],\n",
|
|
" merge_func=\"concat\",\n",
|
|
" parameterized_kwargs=dict(\n",
|
|
" engine=\"dask\", \n",
|
|
" chunk_len=\"auto\",\n",
|
|
" )\n",
|
|
")\n",
|
|
"\n",
|
|
"grid_perf, best_perf = cv_sma_crossover_perf(\n",
|
|
" vbt.to_2d_array(data.close),\n",
|
|
" vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n",
|
|
" vbt.Param(np.arange(5, 50)),\n",
|
|
" pd.Timedelta(days=365) // data.index.freq,\n",
|
|
" _merge_kwargs=dict(wrapper=data.symbol_wrapper),\n",
|
|
" _index=data.index,\n",
|
|
" _return_grid=\"all\"\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ca4288fd-79ea-4b0b-a007-7ca2d8d85472",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"grid_perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "75fd72d5-7004-46a5-a47d-d45ee3b95381",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"best_perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "87e59f91-a076-4b75-a1a6-64ad04f35cbe",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"best_train_perf = best_perf.xs(\"train\", level=\"set\")\n",
|
|
"best_test_perf = best_perf.xs(\"test\", level=\"set\")\n",
|
|
"best_train_perf.corr(best_test_perf)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e548f6f6-ac62-46c3-b6ef-40467a4564fe",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"param_cross_set_corr = grid_perf\\\n",
|
|
" .unstack(\"set\")\\\n",
|
|
" .groupby([\"fast_window\", \"slow_window\"])\\\n",
|
|
" .apply(lambda x: x[\"train\"].corr(x[\"test\"]))\n",
|
|
"param_cross_set_corr.vbt.heatmap(symmetric=True).show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4a40f160-6159-4b52-bc15-13f68c36be31",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"grid_test_perf = grid_perf.xs(\"test\", level=\"set\")\n",
|
|
"grid_df = grid_test_perf.rename(\"grid\").reset_index()\n",
|
|
"del grid_df[\"fast_window\"]\n",
|
|
"del grid_df[\"slow_window\"]\n",
|
|
"best_df = best_test_perf.rename(\"best\").reset_index()\n",
|
|
"del best_df[\"fast_window\"]\n",
|
|
"del best_df[\"slow_window\"]\n",
|
|
"merged_df = pd.merge(grid_df, best_df, on=[\"split\", \"symbol\"])\n",
|
|
"grid_better_mask = merged_df[\"grid\"] > merged_df[\"best\"]\n",
|
|
"grid_better_mask.index = grid_test_perf.index\n",
|
|
"grid_better_cnt = grid_better_mask.groupby([\"split\", \"symbol\"]).mean()\n",
|
|
"grid_better_cnt"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c0f127bb-3b1a-4425-a1bc-ec6be6a27f3e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"cv_splitter = cv_sma_crossover_perf(\n",
|
|
" _index=data.index, \n",
|
|
" _return_splitter=True\n",
|
|
")\n",
|
|
"stacked_close = cv_splitter.take(\n",
|
|
" data.close, \n",
|
|
" into=\"reset_stacked\",\n",
|
|
" set_=\"test\"\n",
|
|
")\n",
|
|
"hold_pf = vbt.Portfolio.from_holding(stacked_close, freq=\"daily\")\n",
|
|
"hold_perf = hold_pf.sharpe_ratio\n",
|
|
"hold_perf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "935ca582-ef09-40f9-b6ff-c303c98989b1",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Modeling"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0f9cc747-d0b4-470f-a262-7cfc7ad1d62e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"X = data.run(\"talib\")\n",
|
|
"X.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7fdb5b1c-e4cc-4472-bdb1-4629492306e7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"trendlb = data.run(\"trendlb\", 1.0, 0.5, mode=\"binary\")\n",
|
|
"y = trendlb.labels\n",
|
|
"y.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ce1e42f0-7f1f-474c-865b-2fabd6dfc907",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"X = X.replace([-np.inf, np.inf], np.nan)\n",
|
|
"invalid_column_mask = X.isnull().all(axis=0) | (X.nunique() == 1)\n",
|
|
"X = X.loc[:, ~invalid_column_mask]\n",
|
|
"invalid_row_mask = X.isnull().any(axis=1) | y.isnull()\n",
|
|
"X = X.loc[~invalid_row_mask]\n",
|
|
"y = y.loc[~invalid_row_mask]\n",
|
|
"X.shape, y.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "bf1fe438-bf27-4fc3-9d9b-5c71a7b046ae",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|
"\n",
|
|
"clf = RandomForestClassifier(random_state=42)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f64bbb84-5340-4554-a78c-5a58bdbe5010",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"cv = vbt.SplitterCV(\n",
|
|
" \"from_expanding\", \n",
|
|
" min_length=360, \n",
|
|
" offset=180, \n",
|
|
" split=-180,\n",
|
|
" set_labels=[\"train\", \"test\"]\n",
|
|
")\n",
|
|
"\n",
|
|
"cv_splitter = cv.get_splitter(X)\n",
|
|
"cv_splitter.plot().show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5c26f554-c0ff-4973-a625-d95549bbbe36",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.model_selection import cross_val_score\n",
|
|
"\n",
|
|
"cross_val_score(clf, X, y, cv=cv, scoring=\"accuracy\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "580ae7c7-d108-454f-903f-937c8dcf058e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"X_slices = cv_splitter.take(X)\n",
|
|
"y_slices = cv_splitter.take(y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b9186a59-798a-404a-b763-f6360d098dc9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"test_labels = []\n",
|
|
"test_preds = []\n",
|
|
"for split in X_slices.index.unique(level=\"split\"):\n",
|
|
" X_train_slice = X_slices[(split, \"train\")]\n",
|
|
" y_train_slice = y_slices[(split, \"train\")]\n",
|
|
" X_test_slice = X_slices[(split, \"test\")]\n",
|
|
" y_test_slice = y_slices[(split, \"test\")]\n",
|
|
" slice_clf = clf.fit(X_train_slice, y_train_slice)\n",
|
|
" test_pred = slice_clf.predict(X_test_slice)\n",
|
|
" test_pred = pd.Series(test_pred, index=y_test_slice.index)\n",
|
|
" test_labels.append(y_test_slice)\n",
|
|
" test_preds.append(test_pred)\n",
|
|
" \n",
|
|
"test_labels = pd.concat(test_labels).rename(\"labels\")\n",
|
|
"test_preds = pd.concat(test_preds).rename(\"preds\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "411d961d-7e24-427a-92e1-07c6fbc52f4d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"data.close.vbt.overlay_with_heatmap(test_labels).show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a904c9f0-9514-4c17-bc14-a2d9980debe6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"data.close.vbt.overlay_with_heatmap(test_preds).show_svg()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c0592fce-7a89-4fc6-9206-1ca7b3a51700",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"pf = vbt.Portfolio.from_signals(\n",
|
|
" data.close[test_preds.index], \n",
|
|
" test_preds == 1, \n",
|
|
" test_preds == 0, \n",
|
|
" direction=\"both\"\n",
|
|
")\n",
|
|
"pf.stats()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "70cc579d-fb48-41ff-935a-1aa035c8a1f8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.8"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|