{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "c69786c2-7c5b-4b1e-bd0c-52380c8df261",
   "metadata": {},
   "source": [
    "# Cross-validation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b2751478-95ce-47d2-9e80-4d47ed0c7b36",
   "metadata": {},
   "outputs": [],
   "source": [
    "from vectorbtpro import *\n",
    "# whats_imported()\n",
    "\n",
    "vbt.settings.set_theme(\"dark\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6b45492f-d152-43e2-88a9-ab95bbffd546",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = vbt.BinanceData.pull(\"BTCUSDT\", end=\"2022-11-01 UTC\")\n",
    "data.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "45122110-905e-4549-92f4-205c98bd9faa",
   "metadata": {},
   "outputs": [],
   "source": [
    "@vbt.parameterized(merge_func=\"concat\")\n",
    "def sma_crossover_perf(data, fast_window, slow_window):\n",
    "    fast_sma = data.run(\"sma\", fast_window, short_name=\"fast_sma\")\n",
    "    slow_sma = data.run(\"sma\", slow_window, short_name=\"slow_sma\")\n",
    "    entries = fast_sma.real_crossed_above(slow_sma)\n",
    "    exits = fast_sma.real_crossed_below(slow_sma)\n",
    "    pf = vbt.Portfolio.from_signals(\n",
    "        data, entries, exits, direction=\"both\")\n",
    "    return pf.sharpe_ratio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "93b5c9e1-7d18-4268-9c2f-91a5404bba0a",
   "metadata": {},
   "outputs": [],
   "source": [
    "perf = sma_crossover_perf(\n",
    "    data[\"2020\":\"2020\"],\n",
    "    vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n",
    "    vbt.Param(np.arange(5, 50)),\n",
    "    _execute_kwargs=dict(\n",
    "        clear_cache=50,\n",
    "        collect_garbage=50\n",
    "    )\n",
    ")\n",
    "perf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "804d9d36-d0c6-46ab-b0c4-5d00285f6b36",
   "metadata": {},
   "outputs": [],
   "source": [
    "perf.sort_values(ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5f9a5d58-aebe-4a34-8847-0bc91817e083",
   "metadata": {},
   "outputs": [],
   "source": [
    "best_fast_window, best_slow_window = perf.idxmax()\n",
    "sma_crossover_perf(\n",
    "    data[\"2021\":\"2021\"],\n",
    "    best_fast_window,\n",
    "    best_slow_window\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "933ad509-009a-4f7a-9de2-6e04d23a9c6f",
   "metadata": {},
   "outputs": [],
   "source": [
    "data[\"2021\":\"2021\"].run(\"from_holding\").sharpe_ratio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "075e0d7a-bb55-4d69-bcbe-277497058dc2",
   "metadata": {},
   "outputs": [],
   "source": [
    "start_index = data.index[0]\n",
    "period = pd.Timedelta(days=180)\n",
    "all_is_bounds = {}\n",
    "all_is_bl_perf = {}\n",
    "all_is_perf = {}\n",
    "all_oos_bounds = {}\n",
    "all_oos_bl_perf = {}\n",
    "all_oos_perf = {}\n",
    "split_idx = 0\n",
    "period_idx = 0\n",
    "\n",
    "with vbt.ProgressBar() as pbar:\n",
    "    while start_index + 2 * period <= data.index[-1]:\n",
    "        pbar.set_prefix(str(start_index))\n",
    "\n",
    "        is_start_index = start_index\n",
    "        is_end_index = start_index + period - pd.Timedelta(nanoseconds=1)\n",
    "        is_data = data[is_start_index : is_end_index]\n",
    "        is_bl_perf = is_data.run(\"from_holding\").sharpe_ratio\n",
    "        is_perf = sma_crossover_perf(\n",
    "            is_data,\n",
    "            vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n",
    "            vbt.Param(np.arange(5, 50)),\n",
    "            _execute_kwargs=dict(\n",
    "                clear_cache=50,\n",
    "                collect_garbage=50\n",
    "            )\n",
    "        )\n",
    "\n",
    "        oos_start_index = start_index + period\n",
    "        oos_end_index = start_index + 2 * period - pd.Timedelta(nanoseconds=1)\n",
    "        oos_data = data[oos_start_index : oos_end_index]\n",
    "        oos_bl_perf = oos_data.run(\"from_holding\").sharpe_ratio\n",
    "        best_fw, best_sw = is_perf.idxmax()\n",
    "        oos_perf = sma_crossover_perf(oos_data, best_fw, best_sw)\n",
    "        oos_perf_index = is_perf.index[is_perf.index == (best_fw, best_sw)]\n",
    "        oos_perf = pd.Series([oos_perf], index=oos_perf_index)\n",
    "\n",
    "        all_is_bounds[period_idx] = (is_start_index, is_end_index)\n",
    "        all_oos_bounds[period_idx + 1] = (oos_start_index, oos_end_index)\n",
    "        all_is_bl_perf[(split_idx, period_idx)] = is_bl_perf\n",
    "        all_oos_bl_perf[(split_idx, period_idx + 1)] = oos_bl_perf\n",
    "        all_is_perf[(split_idx, period_idx)] = is_perf\n",
    "        all_oos_perf[(split_idx, period_idx + 1)] = oos_perf\n",
    "        start_index = start_index + period\n",
    "        split_idx += 1\n",
    "        period_idx += 1\n",
    "        pbar.update()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e9cb8334-c44c-4580-819a-6eb501e6bb1b",
   "metadata": {},
   "outputs": [],
   "source": [
    "is_period_ranges = pd.DataFrame.from_dict(\n",
    "    all_is_bounds, \n",
    "    orient=\"index\",\n",
    "    columns=[\"start\", \"end\"]\n",
    ")\n",
    "is_period_ranges.index.name = \"period\"\n",
    "oos_period_ranges = pd.DataFrame.from_dict(\n",
    "    all_oos_bounds, \n",
    "    orient=\"index\",\n",
    "    columns=[\"start\", \"end\"]\n",
    ")\n",
    "oos_period_ranges.index.name = \"period\"\n",
    "period_ranges = pd.concat((is_period_ranges, oos_period_ranges))\n",
    "period_ranges = period_ranges.drop_duplicates()\n",
    "print(period_ranges)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9800c089-7514-4a16-a72c-65ab7455ae26",
   "metadata": {},
   "outputs": [],
   "source": [
    "is_bl_perf = pd.Series(all_is_bl_perf)\n",
    "is_bl_perf.index.names = [\"split\", \"period\"]\n",
    "oos_bl_perf = pd.Series(all_oos_bl_perf)\n",
    "oos_bl_perf.index.names = [\"split\", \"period\"]\n",
    "bl_perf = pd.concat((\n",
    "    is_bl_perf.vbt.select_levels(\"period\"), \n",
    "    oos_bl_perf.vbt.select_levels(\"period\")\n",
    "))\n",
    "bl_perf = bl_perf.drop_duplicates()\n",
    "bl_perf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "44f04653-946d-47ec-8857-f063f07e7cc3",
   "metadata": {},
   "outputs": [],
   "source": [
    "is_perf = pd.concat(all_is_perf, names=[\"split\", \"period\"])\n",
    "is_perf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f20ae091-36b8-4811-b789-2a99dc9f0f81",
   "metadata": {},
   "outputs": [],
   "source": [
    "oos_perf = pd.concat(all_oos_perf, names=[\"split\", \"period\"])\n",
    "oos_perf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e73f8adc-bf80-4955-a86e-ebcb5f8254b5",
   "metadata": {},
   "outputs": [],
   "source": [
    "is_best_mask = is_perf.index.vbt.drop_levels(\"period\").isin(\n",
    "    oos_perf.index.vbt.drop_levels(\"period\"))\n",
    "is_best_perf = is_perf[is_best_mask]\n",
    "is_best_perf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b77ffdd0-dbd2-4473-9eb4-ad7d36fc7625",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(pd.concat((\n",
    "    is_perf.describe(),\n",
    "    is_best_perf.describe(),\n",
    "    is_bl_perf.describe(),\n",
    "    oos_perf.describe(),\n",
    "    oos_bl_perf.describe()\n",
    "), axis=1, keys=[\n",
    "    \"IS\", \n",
    "    \"IS (Best)\", \n",
    "    \"IS (Baseline)\", \n",
    "    \"OOS (Test)\", \n",
    "    \"OOS (Baseline)\"\n",
    "]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a0f37d37-8051-4f48-b6bd-ad9ee882bd0b",
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = is_perf.vbt.boxplot(\n",
    "    by_level=\"period\",\n",
    "    trace_kwargs=dict(\n",
    "        line=dict(color=\"lightskyblue\"), \n",
    "        opacity=0.4,\n",
    "        showlegend=False\n",
    "    ),\n",
    "    xaxis_title=\"Period\", \n",
    "    yaxis_title=\"Sharpe\",\n",
    ")\n",
    "fig = is_best_perf.vbt.select_levels(\"period\").vbt.plot(\n",
    "    trace_kwargs=dict(\n",
    "        name=\"Best\", \n",
    "        line=dict(color=\"limegreen\", dash=\"dash\")\n",
    "    ), \n",
    "    fig=fig\n",
    ")\n",
    "fig = bl_perf.vbt.plot(\n",
    "    trace_kwargs=dict(\n",
    "        name=\"Baseline\", \n",
    "        line=dict(color=\"orange\", dash=\"dash\")\n",
    "    ), \n",
    "    fig=fig\n",
    ")\n",
    "fig = oos_perf.vbt.select_levels(\"period\").vbt.plot(\n",
    "    trace_kwargs=dict(\n",
    "        name=\"Test\", \n",
    "        line=dict(color=\"orangered\")\n",
    "    ), \n",
    "    fig=fig\n",
    ")\n",
    "fig.show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a3e52247-958a-40ba-af30-7a5f1a55b744",
   "metadata": {},
   "outputs": [],
   "source": [
    "is_perf_split6 = is_perf.xs(6, level=\"split\")\n",
    "is_perf_split6.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "53a0d648-a49e-48c5-b7a2-23fb8b18db03",
   "metadata": {},
   "outputs": [],
   "source": [
    "first_left_bound = period_ranges.loc[6, \"start\"]\n",
    "first_right_bound = period_ranges.loc[6, \"end\"]\n",
    "data[first_left_bound : first_right_bound].plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11a80453-5f53-4a7e-92b8-79834dd546af",
   "metadata": {},
   "outputs": [],
   "source": [
    "oos_perf.xs(6, level=\"period\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8b01f9be-a5b8-433d-aab3-c0c51219ce56",
   "metadata": {},
   "outputs": [],
   "source": [
    "is_perf_split6.quantile(0.25)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c0b946fb-81de-4183-9d47-9defc4f0f44e",
   "metadata": {},
   "source": [
    "## Splitter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "113d8739-60b1-40d3-b031-984d7c7adf17",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter = vbt.Splitter.from_rolling(\n",
    "    data.index, \n",
    "    length=360, \n",
    "    split=0.5,\n",
    "    set_labels=[\"IS\", \"OOS\"]\n",
    ")\n",
    "splitter.plot().show_svg()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9e0edb2e-8e5b-4554-8c3b-8a84b54279fe",
   "metadata": {},
   "source": [
    "### Schema"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "92af91ef-5b3e-4869-9ffd-cf5451714e94",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(splitter.splits)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "31376ef0-288f-461e-8fa7-2023c85248bf",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ed969ea9-de5b-4501-9093-7628fb3241ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.wrapper.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c204a8e8-a640-4056-bbdd-2bfb81bbd7a7",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.wrapper.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ac099a17-7a4a-4227-8782-f71f089e5943",
   "metadata": {},
   "outputs": [],
   "source": [
    "oos_splitter = splitter[\"OOS\"]\n",
    "print(oos_splitter.splits)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f043f402-5a79-4a74-a355-12bac03e0235",
   "metadata": {},
   "source": [
    "#### Range format"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "87ab6012-b22b-4a7d-81f7-0b2d8f6ffce9",
   "metadata": {},
   "outputs": [],
   "source": [
    "index = vbt.date_range(\"2020\", periods=14)\n",
    "index[slice(1, 7)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8c6462f8-0f46-4237-a499-b63cd961b55b",
   "metadata": {},
   "outputs": [],
   "source": [
    "index[1], index[6]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c2289e7e-6dbc-46cd-9026-09716dd283c4",
   "metadata": {},
   "source": [
    "##### Relative"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "980722dd-e524-4ab7-9e75-4871fdbcf31d",
   "metadata": {},
   "outputs": [],
   "source": [
    "rel_range = vbt.RelRange(offset=10, length=40)\n",
    "rel_range"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9e6c987f-ead6-47f2-978a-2c46bbe995b0",
   "metadata": {},
   "outputs": [],
   "source": [
    "rel_range.to_slice(total_len=len(splitter.index), prev_end=100)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f835ba92-9d19-484f-aead-6c11c99a4ad6",
   "metadata": {},
   "source": [
    "#### Array format"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "41c25ca4-bf22-4e11-bade-d115f4b692f0",
   "metadata": {},
   "outputs": [],
   "source": [
    "index = vbt.date_range(\"2020\", \"2021\", freq=\"1min\")\n",
    "range_ = np.arange(len(index))\n",
    "range_.nbytes / 1024 / 1024"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "91d67769-695f-4a29-83f6-1b09d77f9397",
   "metadata": {},
   "outputs": [],
   "source": [
    "range_ = np.full(len(index), True)\n",
    "range_.nbytes / 1024 / 1024"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "95a932ae-317d-4168-81ba-4045716d6eda",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.splits_arr.dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f73dab8c-8488-48bc-8824-9f263116fa9b",
   "metadata": {},
   "outputs": [],
   "source": [
    "id(slice(0, 180, None))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "328d2ec2-d2cc-4550-b0ae-273287cdd05f",
   "metadata": {},
   "outputs": [],
   "source": [
    "range_00 = np.arange(0, 5)\n",
    "range_01 = np.arange(5, 15)\n",
    "range_10 = np.arange(15, 30)\n",
    "range_11 = np.arange(30, 50)\n",
    "\n",
    "ind_splitter = vbt.Splitter.from_splits(\n",
    "    data.index,\n",
    "    [[range_00, range_01], [range_10, range_11]],\n",
    "    fix_ranges=False\n",
    ")\n",
    "print(ind_splitter.splits)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "deae0ac0-0218-4835-aedd-6dbc5696cc75",
   "metadata": {},
   "outputs": [],
   "source": [
    "ind_splitter.splits.loc[0, \"set_1\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "35f60dad-3a36-4c62-b07d-c36fb7a1b9da",
   "metadata": {},
   "outputs": [],
   "source": [
    "ind_splitter.splits.loc[0, \"set_1\"].range_"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6326885f-9be3-4416-a868-187bfc2808d6",
   "metadata": {},
   "source": [
    "### Preparation"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "79a85a1f-eae2-49ff-b239-574ff90eadc4",
   "metadata": {},
   "source": [
    "#### Splits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "acad6284-91bb-48d0-86bd-f3324236fa40",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.split_range(\n",
    "    slice(None),\n",
    "    (vbt.RelRange(length=0.75), vbt.RelRange()),\n",
    "    index=data.index\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4674b573-34fd-4d15-bc81-0adda3784086",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.split_range(\n",
    "    slice(None),\n",
    "    (vbt.RelRange(length=0.75), vbt.RelRange())\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "244f8814-d37e-4804-97fc-4586223f308d",
   "metadata": {},
   "outputs": [],
   "source": [
    "data[slice(0, 1426, None)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a2637af2-a0d0-47bf-bf97-a9cfc9143a06",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.split_range(\n",
    "    slice(None), \n",
    "    0.75, \n",
    "    index=data.index\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d79151bf-1d01-436b-a4b4-3f73e1016e4f",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.split_range(\n",
    "    slice(None), \n",
    "    -0.25,\n",
    "    index=data.index\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ab0e6009-1b0e-4f0f-838a-cfe4a87f7406",
   "metadata": {},
   "outputs": [],
   "source": [
    "int(0.75 * len(data.index))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c87ffde6-b565-457a-bb97-0be21789705e",
   "metadata": {},
   "outputs": [],
   "source": [
    "len(data.index) - int(0.25 * len(data.index))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "653cc6d7-2236-4b12-ba92-a115cab15656",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.split_range(\n",
    "    slice(None), \n",
    "    (vbt.RelRange(), vbt.RelRange(length=0.25)),\n",
    "    backwards=True,\n",
    "    index=data.index\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0065dda0-0bfd-4ce0-b6bc-693c7f0960a5",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.split_range(\n",
    "    slice(None), \n",
    "    (1.0, 30), \n",
    "    backwards=True,\n",
    "    index=data.index\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a4c1bc39-70e6-4b94-97d5-1472c705e70b",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.split_range(\n",
    "    slice(None), \n",
    "    (\n",
    "        vbt.RelRange(length=0.4, length_space=\"all\"), \n",
    "        vbt.RelRange(length=0.4, length_space=\"all\"),\n",
    "        vbt.RelRange()\n",
    "    ),\n",
    "    index=data.index\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d2c201c9-f114-4f3b-ad47-bcee006d7953",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.split_range(\n",
    "    slice(None),\n",
    "    (vbt.RelRange(length=0.75), vbt.RelRange(offset=1)),\n",
    "    index=data.index\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6f23a330-cb2f-4a15-8a09-08504c4cd48a",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.split_range(\n",
    "    slice(None), \n",
    "    (\n",
    "        vbt.RelRange(length=0.75), \n",
    "        vbt.RelRange(length=1, is_gap=True),\n",
    "        vbt.RelRange()\n",
    "    ),\n",
    "    index=data.index\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "958e0e1c-2735-4568-b05b-b29c8b4f7560",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.split_range(\n",
    "    slice(None), \n",
    "    (np.array([3, 4, 5]), np.array([6, 8, 10])),\n",
    "    index=data.index\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a60e0aaa-d581-4530-a747-3067c8678650",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.split_range(\n",
    "    slice(None), \n",
    "    (np.array([3, 4, 5]), np.array([6, 8, 10])),\n",
    "    range_format=\"indices\",\n",
    "    index=data.index\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4249fa3b-ef6f-41b7-8d8d-81723e6ce48a",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.split_range(\n",
    "    slice(None), \n",
    "    (slice(\"2020\", \"2021\"), slice(\"2021\", \"2022\")),\n",
    "    index=data.index\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bfbae80f-a314-41f9-8c49-2111f4df01d9",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.index[867:1233]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "debcb754-e691-4e59-b318-61486af3fbf4",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.index[1233:1598]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7d1340f2-d1b9-4fbe-9898-d41620e3d30d",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.split_range(\n",
    "    slice(None), \n",
    "    (\n",
    "        vbt.RelRange(length=\"180 days\"), \n",
    "        vbt.RelRange(offset=\"1 day\", length=\"90 days\")\n",
    "    ),\n",
    "    index=data.index\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8638db99-b5a3-4ce8-adcc-f3ebf7c59b71",
   "metadata": {},
   "source": [
    "#### Method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b6a99b43-4326-4d37-85fc-2f6b0076ebd4",
   "metadata": {},
   "outputs": [],
   "source": [
    "manual_splitter = vbt.Splitter.from_splits(\n",
    "    data.index,\n",
    "    [\n",
    "        (vbt.RelRange(), vbt.RelRange(offset=0.5, length=0.25, length_space=\"all\")),\n",
    "        (vbt.RelRange(), vbt.RelRange(offset=0.25, length=0.25, length_space=\"all\")),\n",
    "        (vbt.RelRange(), vbt.RelRange(offset=0, length=0.25, length_space=\"all\")),\n",
    "    ],\n",
    "    split_range_kwargs=dict(backwards=True),\n",
    "    set_labels=[\"IS\", \"OOS\"]\n",
    ")\n",
    "print(manual_splitter.splits)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "871ceeb6-533d-48ce-803c-49106fc6d807",
   "metadata": {},
   "outputs": [],
   "source": [
    "manual_splitter.plot().show_svg()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "22246385-ae17-47db-8815-71788c60453d",
   "metadata": {},
   "source": [
    "### Generation"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e6dbecb8-fd62-4a3d-9682-24deb81a8da0",
   "metadata": {},
   "source": [
    "#### Rolling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d0ab8ccc-dfe7-4612-8a39-3017795e0486",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.from_rolling(\n",
    "    data.index,\n",
    "    length=360,\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "02d5d230-30c0-4156-a493-6ef4157d727a",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.from_rolling(\n",
    "    data.index, \n",
    "    length=360,\n",
    "    offset=90\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ba270114-7b18-45d0-a542-7bcfe3c0ddc3",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.from_rolling(\n",
    "    data.index, \n",
    "    length=360,\n",
    "    offset=-0.5\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f47fb9c2-eebd-40f0-b901-da3dd54ae721",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.from_rolling(\n",
    "    data.index, \n",
    "    length=360,\n",
    "    split=0.5\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "66314ec5-2a84-4343-8b08-9712bccc2d37",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.from_rolling(\n",
    "    data.index,\n",
    "    length=360,\n",
    "    split=0.5,\n",
    "    offset_anchor_set=None\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2c0661b1-df61-4792-9da6-720d6866e7c5",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.from_n_rolling(\n",
    "    data.index,\n",
    "    n=5,\n",
    "    split=0.5\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "34c752b3-ed6c-4752-b835-e6a0003ff1e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.from_n_rolling(\n",
    "    data.index,\n",
    "    n=3,\n",
    "    length=360,\n",
    "    split=0.5\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1b40afcc-b3d1-47c3-9f84-f30d1840333e",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.from_n_rolling(\n",
    "    data.index,\n",
    "    n=7,\n",
    "    length=360,\n",
    "    split=0.5\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "21f304b2-205c-4cb3-a231-905e266d437a",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.from_expanding(\n",
    "    data.index, \n",
    "    min_length=360,\n",
    "    offset=180,\n",
    "    split=-180\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "59a2a68e-b863-4526-aa79-6694b825ac4f",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.from_n_expanding(\n",
    "    data.index, \n",
    "    n=5,\n",
    "    min_length=360,\n",
    "    split=-180\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2bd6889b-8da8-4bc6-be03-23a4e7eca992",
   "metadata": {},
   "source": [
    "#### Anchored"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1ef7b028-471b-4033-a3c4-1aaa3354a237",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.from_ranges(\n",
    "    data.index,\n",
    "    every=\"Y\",\n",
    "    split=0.5\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2eed590b-93be-4ae5-967d-8b9c04bf3990",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.from_ranges(\n",
    "    data.index,\n",
    "    every=\"Q\",\n",
    "    lookback_period=\"Y\",\n",
    "    split=0.5\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "69154548-b1e8-4ecd-95fd-9be2f5466ed4",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.from_ranges(\n",
    "    data.index,\n",
    "    every=\"Q\",\n",
    "    lookback_period=\"Y\",\n",
    "    split=(\n",
    "        vbt.RepEval(\"index.month != index.month[-1]\"),\n",
    "        vbt.RepEval(\"index.month == index.month[-1]\")\n",
    "    )\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1562819f-2715-41b2-977e-6e046aba4e0b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def qyear(index):\n",
    "    return index.to_period(\"Q\")\n",
    "\n",
    "vbt.Splitter.from_ranges(\n",
    "    data.index,\n",
    "    start=0,\n",
    "    fixed_start=True,\n",
    "    every=\"Q\",\n",
    "    closed_end=True,\n",
    "    split=(\n",
    "        lambda index: qyear(index) != qyear(index)[-1],\n",
    "        lambda index: qyear(index) == qyear(index)[-1]\n",
    "    )\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "432485a6-f8f4-4970-8cec-56689fc1969d",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.from_grouper(\n",
    "    data.index,\n",
    "    by=\"Y\",\n",
    "    split=0.5\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5cd1d516-094e-4f32-a202-6d49e700386c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def is_split_complete(index, split):\n",
    "    first_range = split[0]\n",
    "    first_index = index[first_range][0]\n",
    "    last_range = split[-1]\n",
    "    last_index = index[last_range][-1]\n",
    "    return first_index.is_year_start and last_index.is_year_end\n",
    "\n",
    "vbt.Splitter.from_grouper(\n",
    "    data.index,\n",
    "    by=\"Y\",\n",
    "    split=0.5,\n",
    "    split_check_template=vbt.RepFunc(is_split_complete)\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6cc38232-edfb-4395-899f-47ed3aa0723c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def format_split_labels(index, splits_arr):\n",
    "    years = map(lambda x: index[x[0]][0].year, splits_arr)\n",
    "    return pd.Index(years, name=\"split_year\")\n",
    "\n",
    "vbt.Splitter.from_grouper(\n",
    "    data.index,\n",
    "    by=\"Y\",\n",
    "    split=0.5,\n",
    "    split_check_template=vbt.RepFunc(is_split_complete),\n",
    "    split_labels=vbt.RepFunc(format_split_labels)\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11d702be-5df5-42de-9491-9b38ac7aae06",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.from_grouper(\n",
    "    data.index,\n",
    "    by=data.index.year,\n",
    "    split=0.5,\n",
    "    split_check_template=vbt.RepFunc(is_split_complete)\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "38d4f68a-0372-48e5-bc1a-c45171942e7e",
   "metadata": {},
   "source": [
    "#### Random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e72d4e77-c387-4c7e-bcd5-07e8c15ac15c",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.from_n_random(\n",
    "    data.index,\n",
    "    n=50,\n",
    "    min_length=360,\n",
    "    seed=42,\n",
    "    split=0.5\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dbeea5da-bceb-4e8e-866b-2a0e608995f2",
   "metadata": {},
   "outputs": [],
   "source": [
    "vbt.Splitter.from_n_random(\n",
    "    data.index,\n",
    "    n=50,\n",
    "    min_length=60,\n",
    "    max_length=480,\n",
    "    seed=42,\n",
    "    split=0.5\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b2c41a6c-b579-499c-98af-b7332cdaa7ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "def start_p_func(i, indices):\n",
    "    return indices / indices.sum()\n",
    "\n",
    "vbt.Splitter.from_n_random(\n",
    "    data.index,\n",
    "    n=50,\n",
    "    min_length=60,\n",
    "    max_length=480,\n",
    "    seed=42,\n",
    "    start_p_func=start_p_func,\n",
    "    split=0.5\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bf44a54a-5349-4b2e-a92a-03ecdda01e00",
   "metadata": {},
   "source": [
    "#### Scikit-learn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3992f4a8-d943-4256-939f-6f3be3767df6",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import KFold\n",
    "\n",
    "vbt.Splitter.from_sklearn(\n",
    "    data.index, \n",
    "    KFold(n_splits=5)\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "43c93cc4-b494-4119-923a-c01e5e84b458",
   "metadata": {},
   "source": [
    "#### Dynamic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a55967e9-58b6-46a4-92eb-47de2c99ee40",
   "metadata": {},
   "outputs": [],
   "source": [
    "def split_func(index, prev_start):\n",
    "    if prev_start is None:\n",
    "        prev_start = index[0]\n",
    "    new_start = prev_start + pd.offsets.MonthBegin(1)\n",
    "    new_end = new_start + pd.DateOffset(years=1)\n",
    "    if new_end > index[-1] + index.freq:\n",
    "        return None\n",
    "    return [\n",
    "        slice(new_start, new_start + pd.offsets.MonthBegin(9)),\n",
    "        slice(new_start + pd.offsets.MonthBegin(9), new_end)\n",
    "    ]\n",
    "\n",
    "vbt.Splitter.from_split_func(\n",
    "    data.index,\n",
    "    split_func=split_func,\n",
    "    split_args=(vbt.Rep(\"index\"), vbt.Rep(\"prev_start\")),\n",
    "    range_bounds_kwargs=dict(index_bounds=True)\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3dcf9638-36e8-4304-99e4-c30e1970cfd5",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_next_monday(from_date):\n",
    "    if from_date.weekday == 0 and from_date.ceil(\"H\").hour <= 9:\n",
    "        return from_date.floor(\"D\")\n",
    "    return from_date.floor(\"D\") + pd.offsets.Week(n=0, weekday=0)\n",
    "\n",
    "def get_next_business_range(from_date):\n",
    "    monday_0000 = get_next_monday(from_date)\n",
    "    monday_0900 = monday_0000 + pd.DateOffset(hours=9)\n",
    "    friday_1700 = monday_0900 + pd.DateOffset(days=4, hours=8)\n",
    "    return slice(monday_0900, friday_1700)\n",
    "\n",
    "def split_func(index, bounds):\n",
    "    if len(bounds) == 0:\n",
    "        from_date = index[0]\n",
    "    else:\n",
    "        from_date = bounds[-1][1][0]\n",
    "    train_range = get_next_business_range(from_date)\n",
    "    test_range = get_next_business_range(train_range.stop)\n",
    "    if test_range.stop > index[-1] + index.freq:\n",
    "        return None\n",
    "    return train_range, test_range\n",
    "\n",
    "vbt.Splitter.from_split_func(\n",
    "    vbt.date_range(\"2020-01\", \"2020-03\", freq=\"15min\"),\n",
    "    split_func=split_func,\n",
    "    split_args=(vbt.Rep(\"index\"), vbt.Rep(\"bounds\")),\n",
    "    range_bounds_kwargs=dict(index_bounds=True)\n",
    ").plot().show_svg()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4ab3b2d1-5fb5-4978-95e0-2ee627abe829",
   "metadata": {},
   "source": [
    "### Validation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0b84d3f9-22a1-46b3-9593-4ebe18060fc1",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter = vbt.Splitter.from_ranges(\n",
    "    data.index,\n",
    "    every=\"Y\",\n",
    "    closed_end=True,\n",
    "    split=0.5,\n",
    "    set_labels=[\"IS\", \"OOS\"]\n",
    ")\n",
    "splitter.plot().show_svg()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "589b9b90-e825-4cf6-a402-4720fd82e6b0",
   "metadata": {},
   "source": [
    "#### Bounds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9aa53234-fab9-4db4-b0f6-955f8c66f467",
   "metadata": {},
   "outputs": [],
   "source": [
    "bounds_arr = splitter.get_bounds_arr()\n",
    "bounds_arr.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f364195f-9ef1-43ee-b8a9-af7835d147a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(bounds_arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b3051c7d-3186-49a4-9092-d837f79c09c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "bounds = splitter.get_bounds(index_bounds=True)\n",
    "bounds.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9d76d199-12db-4574-a8f4-0b87107d8a0d",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(bounds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a0818b81-9735-481d-b510-14140ba6c576",
   "metadata": {},
   "outputs": [],
   "source": [
    "bounds.loc[(0, \"OOS\"), \"end\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "21737903-0707-410c-a1c1-133178784fcf",
   "metadata": {},
   "outputs": [],
   "source": [
    "bounds.loc[(1, \"IS\"), \"start\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c5b5d61b-66fe-4057-b699-64d1863d061c",
   "metadata": {},
   "source": [
    "#### Masks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "32b1ad13-4523-41d9-b424-637f1b25b4a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "mask = splitter.get_mask()\n",
    "mask.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a21e4d35-4403-4d5e-ad67-0086da9444ce",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(mask)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "328a93e7-7b14-42d8-acc6-3eba2a72ec4f",
   "metadata": {},
   "outputs": [],
   "source": [
    "mask[\"2021\":\"2021\"].any()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8bccca0a-65be-4810-bd5e-5222edfba6c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(mask.resample(vbt.offset(\"Y\")).sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "629ea5be-73c7-4799-8d07-ab28a90df329",
   "metadata": {},
   "outputs": [],
   "source": [
    "results = []\n",
    "for mask in splitter.get_iter_split_masks():\n",
    "    results.append(mask.resample(vbt.offset(\"Y\")).sum())\n",
    "print(pd.concat(results, axis=1, keys=splitter.split_labels))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ec5cbff7-c630-4d35-a607-cf4f7c482f0f",
   "metadata": {},
   "source": [
    "#### Coverage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "76bb4bb3-027b-465f-be37-fb44cb567bc6",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.get_split_coverage()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "071e5e14-def1-4133-8682-d4e214810079",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.get_set_coverage()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d885fac2-0a56-4e4e-81d0-52a29b8cd572",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.get_range_coverage()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7c4301ee-8998-44d5-b112-0e463f0213ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.get_coverage()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4d328277-71e9-49c1-8b54-d7ecf08682e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.index_bounds.loc[(2, \"OOS\"), \"start\"].is_leap_year"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a8e94289-4550-43bf-9cca-f13cb6e195ad",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.get_range_coverage(relative=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0c9e5444-4276-42bc-b0c6-0799c7606345",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.get_set_coverage(relative=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cf6715aa-0a33-4202-9b38-3dc8f6b24ebc",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.get_split_coverage(overlapping=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1567e6fe-85e5-4075-b4a4-f0490d69bfb4",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.get_set_coverage(overlapping=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a1935522-f1f7-460d-939b-8134fe1287e1",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.get_coverage(overlapping=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2466892b-1521-4b7d-94bd-54ab21adf816",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.plot_coverage().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4f46cc9-edeb-42bb-b9d1-fbb26d9510e9",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(splitter.get_overlap_matrix(by=\"range\", normalize=False))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0f125754-29e4-4d22-9f0f-defb3db268ce",
   "metadata": {},
   "source": [
    "#### Grouping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7af58745-fa11-4a7a-bd1e-833df3d3a8b4",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(splitter.get_bounds(index_bounds=True, set_group_by=True))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b5d73373-ed8d-4943-860f-05342ca2c511",
   "metadata": {},
   "source": [
    "### Manipulation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2d1d4116-8e59-47d2-9364-c44aa908d2f0",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter = vbt.Splitter.from_grouper(\n",
    "    data.index, \n",
    "    by=data.index.year.rename(\"split_year\")\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6b9c5b86-7aec-4496-94d9-4c592766e167",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.stats()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0aa022d7-6eb4-471f-aa5a-ab26c5deee58",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.plots().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "98537c1f-efbb-4574-9f57-5bdb07b452b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter = splitter.iloc[1:-1]\n",
    "splitter.stats()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a442aad5-ead3-4d4e-b049-7e3090200ddf",
   "metadata": {},
   "outputs": [],
   "source": [
    "def new_split(index):\n",
    "    return [\n",
    "        np.isin(index.quarter, [1, 2]), \n",
    "        index.quarter == 3, \n",
    "        index.quarter == 4\n",
    "    ]\n",
    "\n",
    "splitter = splitter.split_set(\n",
    "    vbt.RepFunc(new_split),\n",
    "    new_set_labels=[\"train\", \"valid\", \"test\"]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a938abed-831a-4cb2-9729-9b015d819e38",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.stats()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f9125b7-6d85-4117-ad6b-37b24bf3c50d",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.plots().show_svg()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "35f5cfe5-0626-4372-a90b-df68e155d994",
   "metadata": {},
   "source": [
    "#### Homework"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3d9fb4df-5ca0-43f5-869c-4721c3993402",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter = splitter.merge_sets(columns=[\"valid\", \"test\"], new_set_label=\"test\")\n",
    "splitter.plots().show_svg()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d00fdcdf-0bd1-49ed-ade9-411a2ae3d79c",
   "metadata": {},
   "source": [
    "## Applications"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3ebb6158-e395-49e9-bcc8-cd258c17783f",
   "metadata": {},
   "source": [
    "### Taking"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1aa23c97-e221-48ed-8a5e-484fda11e955",
   "metadata": {},
   "source": [
    "#### Without stacking"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ed524fc3-b782-43dc-a8a6-0bced61a3708",
   "metadata": {},
   "outputs": [],
   "source": [
    "close_slices = splitter.take(data.close)\n",
    "close_slices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "74d768fe-ce73-41c1-945f-c21fbb7fd662",
   "metadata": {},
   "outputs": [],
   "source": [
    "close_slices[2020, \"test\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "79ac7f33-08aa-4bb7-87d2-f8e80c94db0c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_total_return(sr):\n",
    "    return sr.vbt.to_returns().vbt.returns.total()\n",
    "\n",
    "close_slices.apply(get_total_return)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "02213852-3258-46d6-97a4-32a3a5f19820",
   "metadata": {},
   "source": [
    "##### Complex objects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c2d3208f-2454-4501-8cd2-e94694238a56",
   "metadata": {},
   "outputs": [],
   "source": [
    "trendlb = data.run(\"trendlb\", 1.0, 0.5)\n",
    "trendlb.plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4926dbe3-ecc6-445d-868a-46a88af5756c",
   "metadata": {},
   "outputs": [],
   "source": [
    "grouper = pd.Index(trendlb.labels.map({1: \"U\", 0: \"D\"}), name=\"trend\")\n",
    "trend_splitter = vbt.Splitter.from_grouper(data.index, grouper)\n",
    "trend_splitter.plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f41b604c-84ca-4ea2-a293-0d40878eb65c",
   "metadata": {},
   "outputs": [],
   "source": [
    "hold_pf = vbt.Portfolio.from_holding(data)\n",
    "hold_returns_acc = hold_pf.returns_acc\n",
    "\n",
    "fast_sma, slow_sma = vbt.talib(\"SMA\").run_combs(\n",
    "    data.close, np.arange(5, 50), short_names=[\"fast_sma\", \"slow_sma\"])\n",
    "entries = fast_sma.real_crossed_above(slow_sma)\n",
    "exits = fast_sma.real_crossed_below(slow_sma)\n",
    "strat_pf = vbt.Portfolio.from_signals(\n",
    "    data, entries, exits, direction=\"both\")\n",
    "strat_returns_acc = strat_pf.returns_acc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "84335fef-a4c9-44a3-8f68-61ab14e6d01d",
   "metadata": {},
   "outputs": [],
   "source": [
    "hold_returns_acc_slices = trend_splitter.take(hold_returns_acc)\n",
    "strat_returns_acc_slices = trend_splitter.take(strat_returns_acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "faae3461-56b6-4f50-aad8-f196745375e3",
   "metadata": {},
   "outputs": [],
   "source": [
    "hold_returns_acc_slices[\"U\"].sharpe_ratio()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "91ae1996-5fce-459d-a08c-58950ca7ac83",
   "metadata": {},
   "outputs": [],
   "source": [
    "strat_returns_acc_slices[\"U\"].sharpe_ratio().vbt.heatmap(\n",
    "    x_level=\"fast_sma_timeperiod\", \n",
    "    y_level=\"slow_sma_timeperiod\",\n",
    "    symmetric=True\n",
    ").show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ea0824b5-c6b5-48a5-9abd-3b8d582afeca",
   "metadata": {},
   "outputs": [],
   "source": [
    "hold_returns_acc_slices[\"D\"].sharpe_ratio()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dff164c5-5840-4bda-b25a-b290be490d2e",
   "metadata": {},
   "outputs": [],
   "source": [
    "strat_returns_acc_slices[\"D\"].sharpe_ratio().vbt.heatmap(\n",
    "    x_level=\"fast_sma_timeperiod\", \n",
    "    y_level=\"slow_sma_timeperiod\",\n",
    "    symmetric=True\n",
    ").show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "42039308-cbeb-4a20-928c-8c1b4588b84e",
   "metadata": {},
   "outputs": [],
   "source": [
    "trend_splitter = trend_splitter.break_up_splits(\"by_gap\", sort=True)\n",
    "trend_splitter.plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6e564cf0-f138-4552-afc6-0ce34502e3e8",
   "metadata": {},
   "outputs": [],
   "source": [
    "strat_pf_slices = strat_pf.split(trend_splitter)\n",
    "strat_pf_slices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "247f3770-249d-4612-bb7c-4c3d64e141f9",
   "metadata": {},
   "outputs": [],
   "source": [
    "trend_range_perf = strat_pf_slices.apply(lambda pf: pf.sharpe_ratio)\n",
    "median_trend_perf = trend_range_perf.median(axis=1)\n",
    "median_trend_perf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "483eaa0d-cde1-4c30-873e-cc8d668139f8",
   "metadata": {},
   "outputs": [],
   "source": [
    "trend_perf_ts = data.symbol_wrapper.fill().rename(\"trend_perf\")\n",
    "for label, sr in trend_splitter.bounds.iterrows():\n",
    "    trend_perf_ts.iloc[sr[\"start\"]:sr[\"end\"]] = median_trend_perf[label]\n",
    "data.close.vbt.overlay_with_heatmap(trend_perf_ts).show_svg()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7f013d68-bbc6-43b4-ae38-0414781602a4",
   "metadata": {},
   "source": [
    "#### Column stacking"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c899aa78-29d8-4833-b307-2949132e0b78",
   "metadata": {},
   "outputs": [],
   "source": [
    "close_stacked = pd.concat(\n",
    "    close_slices.values.tolist(), \n",
    "    axis=1, \n",
    "    keys=close_slices.index\n",
    ")\n",
    "print(close_stacked)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4e83e120-ee01-4664-81bb-39b4c084d3b6",
   "metadata": {},
   "outputs": [],
   "source": [
    "get_total_return(close_stacked)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "20abc610-b2a9-4ecd-a724-f3c267e71636",
   "metadata": {},
   "outputs": [],
   "source": [
    "close_stacked = splitter.take(data.close, into=\"stacked\")\n",
    "close_stacked.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5d67665d-ea55-400c-ace4-bd5a2adaef12",
   "metadata": {},
   "outputs": [],
   "source": [
    "close_stacked = splitter.take(data.close, into=\"reset_stacked\")\n",
    "print(close_stacked)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c6a65f09-264e-40d4-a01b-f8ef208e331d",
   "metadata": {},
   "outputs": [],
   "source": [
    "close_stacked = splitter.take(data.close, into=\"from_end_stacked\")\n",
    "print(close_stacked)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "25c891ae-ffab-4ef1-a97d-0de12a1c0f63",
   "metadata": {},
   "outputs": [],
   "source": [
    "close_stacked = splitter.take(data.close, into=\"reset_stacked_by_set\")\n",
    "close_stacked"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "78f294b1-a07b-4f1e-a43f-bf6b33ec0d13",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(close_stacked[\"train\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "94613e5d-2c2f-48ba-9714-e95b6d706b7a",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(182 * 8)\n",
    "print(1461 * 8)\n",
    "print(1 - 1456 / 11688)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f72feaad-a8d0-4249-9719-7e0f5811f9c5",
   "metadata": {},
   "outputs": [],
   "source": [
    "index_slices = splitter.take(data.index)\n",
    "index_slices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0e0b9776-b7a4-4dc2-bcf5-3f73c47d9d5d",
   "metadata": {},
   "outputs": [],
   "source": [
    "close_stacked_wb = splitter.take(\n",
    "    data.close, \n",
    "    into=\"reset_stacked_by_set\",\n",
    "    attach_bounds=\"index\",\n",
    "    right_inclusive=True\n",
    ")\n",
    "print(close_stacked_wb[\"train\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "48b59d45-5ddc-4777-969b-1582b4660055",
   "metadata": {},
   "outputs": [],
   "source": [
    "@vbt.parameterized(merge_func=\"concat\")\n",
    "def set_sma_crossover_perf(close, fast_window, slow_window, freq):\n",
    "    fast_sma = vbt.talib(\"sma\").run(\n",
    "        close, fast_window, short_name=\"fast_sma\", hide_params=True) \n",
    "    slow_sma = vbt.talib(\"sma\").run(\n",
    "        close, slow_window, short_name=\"slow_sma\", hide_params=True) \n",
    "    entries = fast_sma.real_crossed_above(slow_sma)\n",
    "    exits = fast_sma.real_crossed_below(slow_sma)\n",
    "    pf = vbt.Portfolio.from_signals(\n",
    "        close, entries, exits, freq=freq, direction=\"both\")\n",
    "    return pf.sharpe_ratio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "21ec3795-d820-4f7d-b6fa-de6710fac0cd",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_perf = set_sma_crossover_perf(\n",
    "    close_stacked[\"train\"],\n",
    "    vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n",
    "    vbt.Param(np.arange(5, 50)),\n",
    "    data.index.freq,\n",
    "    _execute_kwargs=dict(\n",
    "        clear_cache=50,\n",
    "        collect_garbage=50\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63202321-0689-4aca-9e82-7ece4fd18a0e",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_perf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "75669ef5-ad2a-4122-8be9-c510ca252fb0",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_perf.vbt.heatmap(\n",
    "    x_level=\"fast_window\",\n",
    "    y_level=\"slow_window\",\n",
    "    slider_level=\"split_year\",\n",
    "    symmetric=True\n",
    ").show_svg()  # replace with show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "66734f37-624d-43ed-83ae-a9b33ace0085",
   "metadata": {},
   "outputs": [],
   "source": [
    "@njit\n",
    "def prox_median_nb(arr):\n",
    "    if (~np.isnan(arr)).sum() < 20:\n",
    "        return np.nan\n",
    "    return np.nanmedian(arr)\n",
    "\n",
    "prox_perf_list = []\n",
    "for split_label, perf_sr in train_perf.groupby(\"split_year\"):\n",
    "    perf_df = perf_sr.vbt.unstack_to_df(0, [1, 2])\n",
    "    prox_perf_df = perf_df.vbt.proximity_apply(2, prox_median_nb)\n",
    "    prox_perf_sr = prox_perf_df.stack([0, 1])\n",
    "    prox_perf_list.append(prox_perf_sr.reindex(perf_sr.index))\n",
    "\n",
    "train_prox_perf = pd.concat(prox_perf_list)\n",
    "train_prox_perf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d64b2b46-598e-4211-8e66-fd2824156f5a",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_prox_perf.vbt.heatmap(\n",
    "    x_level=\"fast_window\",\n",
    "    y_level=\"slow_window\",\n",
    "    slider_level=\"split_year\",\n",
    "    symmetric=True\n",
    ").show_svg()  # replace with show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a2bd48a4-74c9-4e9d-a2a4-c1a726ca1619",
   "metadata": {},
   "outputs": [],
   "source": [
    "best_params = train_prox_perf.groupby(\"split_year\").idxmax()\n",
    "best_params = train_prox_perf[best_params].index\n",
    "train_prox_perf[best_params]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c99e5a80-3fa4-453b-ad33-88c825f2dfa7",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_perf = set_sma_crossover_perf(\n",
    "    vbt.RepEval(\n",
    "        \"test_close.iloc[:, [config_idx]]\", \n",
    "        context=dict(test_close=close_stacked[\"test\"])\n",
    "    ),\n",
    "    vbt.Param(best_params.get_level_values(\"fast_window\"), level=0),\n",
    "    vbt.Param(best_params.get_level_values(\"slow_window\"), level=0),\n",
    "    data.index.freq\n",
    ")\n",
    "test_perf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ee16a258-2788-402b-bf2b-c065254c1c3e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_index_sharpe(index):\n",
    "    return data.loc[index].run(\"from_holding\").sharpe_ratio\n",
    "\n",
    "index_slices.xs(\"test\", level=\"set\").apply(get_index_sharpe)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dd73c429-db22-40ff-8a34-f773ae0e7762",
   "metadata": {},
   "source": [
    "#### Row stacking"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e890c631-33b8-4822-914d-35432379cedf",
   "metadata": {},
   "outputs": [],
   "source": [
    "block_size = int(3.15 * len(data.index) ** (1 / 3))\n",
    "block_splitter = vbt.Splitter.from_rolling(\n",
    "    data.index, \n",
    "    length=block_size, \n",
    "    offset=1,\n",
    "    offset_anchor=\"prev_start\"\n",
    ")\n",
    "block_splitter.n_splits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5a04c2d4-c53a-4a42-9cf1-0f912b7df08e",
   "metadata": {},
   "outputs": [],
   "source": [
    "size = int(block_splitter.n_splits / block_size)\n",
    "sample_splitter = block_splitter.shuffle_splits(size=size, replace=True)\n",
    "sample_splitter.plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "42ae502d-3e72-458c-9d8a-09d475968013",
   "metadata": {},
   "outputs": [],
   "source": [
    "returns = data.returns\n",
    "sample_rets = sample_splitter.take(returns, into=\"stacked\", stack_axis=0)\n",
    "sample_rets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "59eb80cc-0738-4024-873b-98009964c5d4",
   "metadata": {},
   "outputs": [],
   "source": [
    "sample_rets.index = data.index[:len(sample_rets)]\n",
    "sample_cumrets = data.close[0] * (sample_rets + 1).cumprod()\n",
    "sample_cumrets.vbt.plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "78e88fe1-8433-4ce8-9beb-4fc24878745a",
   "metadata": {},
   "outputs": [],
   "source": [
    "samples_rets_list = []\n",
    "for i in vbt.ProgressBar(range(1000)):\n",
    "    sample_spl = block_splitter.shuffle_splits(size=size, replace=True)\n",
    "    sample_rets = sample_spl.take(returns, into=\"stacked\", stack_axis=0)\n",
    "    sample_rets.index = returns.index[:len(sample_rets)]\n",
    "    sample_rets.name = i\n",
    "    samples_rets_list.append(sample_rets)\n",
    "sample_rets_stacked = pd.concat(samples_rets_list, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e909ffcc-d18f-4351-9626-714c81c18321",
   "metadata": {},
   "outputs": [],
   "source": [
    "sample_sharpe = sample_rets_stacked.vbt.returns.sharpe_ratio()\n",
    "sample_sharpe.vbt.boxplot(horizontal=True).show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0815456c-9b75-4ab8-b499-faa9947406ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "sample_sharpe.quantile(0.025), sample_sharpe.quantile(0.975)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1fee36b8-a009-4b73-aeaa-18f918c92c55",
   "metadata": {},
   "source": [
    "### Applying"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b8ee38db-2b98-488c-b53c-f539e0bccedb",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.apply(\n",
    "    get_total_return,\n",
    "    vbt.Takeable(data.close),\n",
    "    merge_func=\"concat\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6d86ce45-9b6b-43e2-8e2e-79944cd4f413",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.apply(\n",
    "    get_total_return,\n",
    "    vbt.RepFunc(lambda range_: data.close[range_]),\n",
    "    merge_func=\"concat\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7ae1a59f-4466-4383-8a51-668c24f4321e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_total_return(range_, data):\n",
    "    return data.returns[range_].vbt.returns.total()\n",
    "\n",
    "splitter.apply(\n",
    "    get_total_return,\n",
    "    vbt.Rep(\"range_\"),\n",
    "    data,\n",
    "    merge_func=\"concat\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "35f59608-ee6f-4e16-a9a7-ad9dcaaab0cd",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_total_return(data):\n",
    "    return data.returns.vbt.returns.total()\n",
    "\n",
    "splitter.apply(\n",
    "    get_total_return,\n",
    "    vbt.Takeable(data),\n",
    "    merge_func=\"concat\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11b5fa1d-a6a7-4703-a897-be517ecc396b",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.apply(\n",
    "    get_total_return,\n",
    "    vbt.Takeable(data),\n",
    "    set_group_by=True,\n",
    "    merge_func=\"concat\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "62816646-c23f-4c12-8dfb-c0e327badeb6",
   "metadata": {},
   "outputs": [],
   "source": [
    "splitter.apply(\n",
    "    get_total_return,\n",
    "    vbt.Takeable(data),\n",
    "    split=[2020, 2021],\n",
    "    set_=\"train\",\n",
    "    merge_func=\"concat\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f8f03eb7-2835-45c7-9879-7b951c7a4647",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_perf = splitter.apply(\n",
    "    sma_crossover_perf,\n",
    "    vbt.Takeable(data),\n",
    "    vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n",
    "    vbt.Param(np.arange(5, 50)),\n",
    "    _execute_kwargs=dict(\n",
    "        clear_cache=50,\n",
    "        collect_garbage=50\n",
    "    ),\n",
    "    set_=\"train\",\n",
    "    merge_func=\"concat\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0ebb869c-1edf-491f-ad26-bcba207c0f26",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_perf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "261c2dd1-2f5e-480e-a2c8-7593ac26771b",
   "metadata": {},
   "outputs": [],
   "source": [
    "best_params = train_perf.groupby(\"split_year\").idxmax()\n",
    "best_params = train_perf[best_params].index\n",
    "train_perf[best_params]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1749022b-1d3c-4ccb-b6a1-9fe0a06e52a5",
   "metadata": {},
   "outputs": [],
   "source": [
    "best_fast_windows = best_params.get_level_values(\"fast_window\")\n",
    "best_slow_windows = best_params.get_level_values(\"slow_window\")\n",
    "\n",
    "test_perf = splitter.apply(\n",
    "    sma_crossover_perf,\n",
    "    vbt.Takeable(data),\n",
    "    vbt.RepFunc(lambda split_idx: best_fast_windows[split_idx]),\n",
    "    vbt.RepFunc(lambda split_idx: best_slow_windows[split_idx]),\n",
    "    set_=\"test\",\n",
    "    merge_func=\"concat\"\n",
    ")\n",
    "test_perf"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "432e35f6-98b2-4278-9616-54a2206eb182",
   "metadata": {},
   "source": [
    "#### Iteration schemes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e079d58d-f171-41ef-b685-80b502cb31b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "def cv_sma_crossover(\n",
    "    data, \n",
    "    fast_windows, \n",
    "    slow_windows, \n",
    "    split_idx,\n",
    "    set_idx,\n",
    "    train_perf_list\n",
    "):\n",
    "    if set_idx == 0:\n",
    "        train_perf = sma_crossover_perf(\n",
    "            data,\n",
    "            vbt.Param(fast_windows, condition=\"x < slow_window\"),\n",
    "            vbt.Param(slow_windows),\n",
    "            _execute_kwargs=dict(\n",
    "                clear_cache=50,\n",
    "                collect_garbage=50\n",
    "            )\n",
    "        )\n",
    "        train_perf_list.append(train_perf)\n",
    "        best_params = train_perf.idxmax()\n",
    "        return train_perf[[best_params]]\n",
    "    else:\n",
    "        train_perf = train_perf_list[split_idx]\n",
    "        best_params = train_perf.idxmax()\n",
    "        test_perf = sma_crossover_perf(\n",
    "            data,\n",
    "            vbt.Param([best_params[0]]),\n",
    "            vbt.Param([best_params[1]]),\n",
    "        )\n",
    "        return test_perf\n",
    "    \n",
    "train_perf_list = []\n",
    "cv_perf = splitter.apply(\n",
    "    cv_sma_crossover,\n",
    "    vbt.Takeable(data),\n",
    "    np.arange(5, 50),\n",
    "    np.arange(5, 50),\n",
    "    vbt.Rep(\"split_idx\"),\n",
    "    vbt.Rep(\"set_idx\"),\n",
    "    train_perf_list,\n",
    "    iteration=\"set_major\",\n",
    "    merge_func=\"concat\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c7f1f4a1-fe05-44c2-b2c3-fcbb432bdeaa",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_perf = pd.concat(train_perf_list, keys=splitter.split_labels)\n",
    "train_perf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dd134bf0-8c5d-4365-871b-a5a0aa232e4b",
   "metadata": {},
   "outputs": [],
   "source": [
    "cv_perf"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8da1d36b-6b51-4429-be5c-ed0acbcfdf24",
   "metadata": {},
   "source": [
    "#### Merging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "23da7a75-db32-451d-8a7d-8bf729584e99",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_entries_and_exits(data, fast_window, slow_window):\n",
    "    fast_sma = data.run(\"sma\", fast_window, short_name=\"fast_sma\")\n",
    "    slow_sma = data.run(\"sma\", slow_window, short_name=\"slow_sma\")\n",
    "    entries = fast_sma.real_crossed_above(slow_sma)\n",
    "    exits = fast_sma.real_crossed_below(slow_sma)\n",
    "    return entries, exits\n",
    "\n",
    "entries, exits = splitter.apply(\n",
    "    get_entries_and_exits,\n",
    "    vbt.Takeable(data),\n",
    "    20,\n",
    "    30,\n",
    "    merge_func=\"column_stack\"\n",
    ")\n",
    "\n",
    "print(entries)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2822ee69-5a35-4700-b025-442d2e9396fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "entries, exits = splitter.apply(\n",
    "    get_entries_and_exits,\n",
    "    vbt.Takeable(data),\n",
    "    20,\n",
    "    30,\n",
    "    merge_all=False,\n",
    "    merge_func=\"row_stack\"\n",
    ")\n",
    "\n",
    "entries.loc[2018]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "80266dbb-b700-4389-8956-5a253b18b816",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_signal_count(*args, **kwargs):\n",
    "    entries, exits = get_entries_and_exits(*args, **kwargs)\n",
    "    return entries.vbt.signals.total(), exits.vbt.signals.total()\n",
    "\n",
    "entry_count, exit_count = splitter.apply(\n",
    "    get_signal_count,\n",
    "    vbt.Takeable(data),\n",
    "    20,\n",
    "    30,\n",
    "    merge_func=\"concat\",\n",
    "    attach_bounds=\"index\"\n",
    ")\n",
    "entry_count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "56842a7c-290c-41ca-8e39-2fe148b9a352",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_entries_and_exits(results, data, keys):\n",
    "    set_labels = keys.get_level_values(\"set\")\n",
    "    fig = data.plot(plot_volume=False)\n",
    "    train_seen = False\n",
    "    test_seen = False\n",
    "\n",
    "    for i in range(len(results)):\n",
    "        entries, exits = results[i]\n",
    "        set_label = set_labels[i]\n",
    "        if set_label == \"train\":\n",
    "            entries.vbt.signals.plot_as_entries(\n",
    "                data.close,\n",
    "                trace_kwargs=dict(\n",
    "                    marker=dict(color=\"limegreen\"), \n",
    "                    name=f\"Entries ({set_label})\",\n",
    "                    legendgroup=f\"Entries ({set_label})\",\n",
    "                    showlegend=not train_seen\n",
    "                ),\n",
    "                fig=fig\n",
    "            ),\n",
    "            exits.vbt.signals.plot_as_exits(\n",
    "                data.close,\n",
    "                trace_kwargs=dict(\n",
    "                    marker=dict(color=\"orange\"), \n",
    "                    name=f\"Exits ({set_label})\",\n",
    "                    legendgroup=f\"Exits ({set_label})\",\n",
    "                    showlegend=not train_seen\n",
    "                ),\n",
    "                fig=fig\n",
    "            )\n",
    "            train_seen = True\n",
    "        else:\n",
    "            entries.vbt.signals.plot_as_entries(\n",
    "                data.close,\n",
    "                trace_kwargs=dict(\n",
    "                    marker=dict(color=\"skyblue\"), \n",
    "                    name=f\"Entries ({set_label})\",\n",
    "                    legendgroup=f\"Entries ({set_label})\",\n",
    "                    showlegend=not test_seen\n",
    "                ),\n",
    "                fig=fig\n",
    "            ),\n",
    "            exits.vbt.signals.plot_as_exits(\n",
    "                data.close,\n",
    "                trace_kwargs=dict(\n",
    "                    marker=dict(color=\"magenta\"), \n",
    "                    name=f\"Exits ({set_label})\",\n",
    "                    legendgroup=f\"Entries ({set_label})\",\n",
    "                    showlegend=not test_seen\n",
    "                ),\n",
    "                fig=fig\n",
    "            )\n",
    "            test_seen = True\n",
    "    return fig\n",
    "\n",
    "splitter.apply(\n",
    "    get_entries_and_exits,\n",
    "    vbt.Takeable(data),\n",
    "    20,\n",
    "    30,\n",
    "    merge_func=plot_entries_and_exits,\n",
    "    merge_kwargs=dict(data=data, keys=vbt.Rep(\"keys\")),\n",
    ").show_svg()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "93378394-5bd8-49e4-ad0a-e9d0eb25f58d",
   "metadata": {},
   "source": [
    "#### Decorators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63999a23-9b3e-41e9-ba9e-aefc4775c60c",
   "metadata": {},
   "outputs": [],
   "source": [
    "@vbt.split(splitter=splitter)\n",
    "def get_split_total_return(data):\n",
    "    return data.returns.vbt.returns.total()\n",
    "\n",
    "get_split_total_return(vbt.Takeable(data))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "47f5be24-bd93-4f16-9849-2774d7d29617",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_total_return(data):\n",
    "    return data.returns.vbt.returns.total()\n",
    "\n",
    "get_split_total_return = vbt.split(\n",
    "    get_total_return, \n",
    "    splitter=splitter\n",
    ")\n",
    "get_split_total_return(vbt.Takeable(data))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e508211b-3517-4925-ae8d-f248e94579d3",
   "metadata": {},
   "outputs": [],
   "source": [
    "@vbt.split\n",
    "def get_split_total_return(data):\n",
    "    return data.returns.vbt.returns.total()\n",
    "\n",
    "get_split_total_return(vbt.Takeable(data), _splitter=splitter)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bfaab288-43f0-454e-88d2-4b8b36b3932c",
   "metadata": {},
   "outputs": [],
   "source": [
    "get_split_total_return(\n",
    "    vbt.Takeable(data.loc[\"2020\":\"2020\"]), \n",
    "    _splitter=\"from_rolling\", \n",
    "    _splitter_kwargs=dict(length=\"30d\")\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "20d941b7-4419-4dfb-81f0-72f1b0839888",
   "metadata": {},
   "outputs": [],
   "source": [
    "get_total_return_by_month = vbt.split(\n",
    "    get_total_return,\n",
    "    splitter=\"from_grouper\", \n",
    "    splitter_kwargs=dict(by=vbt.RepEval(\"index.to_period('M')\")),\n",
    "    takeable_args=[\"data\"]\n",
    ")\n",
    "\n",
    "get_total_return_by_month(data.loc[\"2020\":\"2020\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d61a83eb-c72c-4257-a195-a34a82378f56",
   "metadata": {},
   "outputs": [],
   "source": [
    "cv_sma_crossover_perf = vbt.split(\n",
    "    sma_crossover_perf, \n",
    "    splitter=\"from_single\",\n",
    "    splitter_kwargs=dict(split=0.6, set_labels=[\"train\", \"test\"]),\n",
    "    takeable_args=[\"data\"],\n",
    "    merge_func=\"concat\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "27387cd2-f223-4cbd-b2e5-65aef60e512c",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_perf = cv_sma_crossover_perf(\n",
    "    data.loc[\"2020\":\"2021\"],\n",
    "    vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n",
    "    vbt.Param(np.arange(5, 50)),\n",
    "    p_execute_kwargs=dict(\n",
    "        clear_cache=50,\n",
    "        collect_garbage=50\n",
    "    ),\n",
    "    _forward_kwargs_as={\n",
    "        \"p_execute_kwargs\": \"_execute_kwargs\"\n",
    "    },\n",
    "    _apply_kwargs=dict(set_=\"train\")\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9878d18a-7b05-4b83-9ca5-8d291dae2d5d",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_perf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9b4665fe-f78c-4257-922d-f05b351171ec",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_perf = cv_sma_crossover_perf(\n",
    "    data.loc[\"2020\":\"2021\"],\n",
    "    train_perf.idxmax()[0],\n",
    "    train_perf.idxmax()[1],\n",
    "    _apply_kwargs=dict(set_=\"test\")\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7a921ee5-4dd4-4c8e-be52-430a912059a6",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_perf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8ebf0a46-778d-4186-bc7a-dfa4e897c872",
   "metadata": {},
   "outputs": [],
   "source": [
    "@njit(nogil=True)\n",
    "def sma_crossover_perf_nb(close, fast_window, slow_window, ann_factor):\n",
    "    fast_sma = vbt.nb.ma_nb(close, fast_window)\n",
    "    slow_sma = vbt.nb.ma_nb(close, slow_window)\n",
    "    entries = vbt.nb.crossed_above_nb(fast_sma, slow_sma)\n",
    "    exits = vbt.nb.crossed_above_nb(slow_sma, fast_sma)\n",
    "    sim_out = vbt.pf_nb.from_signals_nb(\n",
    "        target_shape=close.shape,\n",
    "        group_lens=np.full(close.shape[1], 1),\n",
    "        close=close,\n",
    "        long_entries=entries,\n",
    "        short_entries=exits,\n",
    "        save_returns=True\n",
    "    )\n",
    "    return vbt.ret_nb.sharpe_ratio_nb(\n",
    "        sim_out.in_outputs.returns, \n",
    "        ann_factor\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "01f161db-2548-4627-a385-40a2d84c9034",
   "metadata": {},
   "outputs": [],
   "source": [
    "sma_crossover_perf_nb(vbt.to_2d_array(data.close), 20, 30, 365)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9e35b428-f5e6-4000-afc7-8ef9bd3aeffc",
   "metadata": {},
   "outputs": [],
   "source": [
    "cv_sma_crossover_perf = vbt.cv_split(\n",
    "    sma_crossover_perf_nb,\n",
    "    splitter=\"from_rolling\",\n",
    "    splitter_kwargs=dict(\n",
    "        length=360, \n",
    "        split=0.5, \n",
    "        set_labels=[\"train\", \"test\"]\n",
    "    ),\n",
    "    takeable_args=[\"close\"],\n",
    "    merge_func=\"concat\",\n",
    "    parameterized_kwargs=dict(\n",
    "        engine=\"dask\", \n",
    "        chunk_len=\"auto\",\n",
    "    )\n",
    ")\n",
    "\n",
    "grid_perf, best_perf = cv_sma_crossover_perf(\n",
    "    vbt.to_2d_array(data.close),\n",
    "    vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n",
    "    vbt.Param(np.arange(5, 50)),\n",
    "    pd.Timedelta(days=365) // data.index.freq,\n",
    "    _merge_kwargs=dict(wrapper=data.symbol_wrapper),\n",
    "    _index=data.index,\n",
    "    _return_grid=\"all\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ca4288fd-79ea-4b0b-a007-7ca2d8d85472",
   "metadata": {},
   "outputs": [],
   "source": [
    "grid_perf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "75fd72d5-7004-46a5-a47d-d45ee3b95381",
   "metadata": {},
   "outputs": [],
   "source": [
    "best_perf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "87e59f91-a076-4b75-a1a6-64ad04f35cbe",
   "metadata": {},
   "outputs": [],
   "source": [
    "best_train_perf = best_perf.xs(\"train\", level=\"set\")\n",
    "best_test_perf = best_perf.xs(\"test\", level=\"set\")\n",
    "best_train_perf.corr(best_test_perf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e548f6f6-ac62-46c3-b6ef-40467a4564fe",
   "metadata": {},
   "outputs": [],
   "source": [
    "param_cross_set_corr = grid_perf\\\n",
    "    .unstack(\"set\")\\\n",
    "    .groupby([\"fast_window\", \"slow_window\"])\\\n",
    "    .apply(lambda x: x[\"train\"].corr(x[\"test\"]))\n",
    "param_cross_set_corr.vbt.heatmap(symmetric=True).show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a40f160-6159-4b52-bc15-13f68c36be31",
   "metadata": {},
   "outputs": [],
   "source": [
    "grid_test_perf = grid_perf.xs(\"test\", level=\"set\")\n",
    "grid_df = grid_test_perf.rename(\"grid\").reset_index()\n",
    "del grid_df[\"fast_window\"]\n",
    "del grid_df[\"slow_window\"]\n",
    "best_df = best_test_perf.rename(\"best\").reset_index()\n",
    "del best_df[\"fast_window\"]\n",
    "del best_df[\"slow_window\"]\n",
    "merged_df = pd.merge(grid_df, best_df, on=[\"split\", \"symbol\"])\n",
    "grid_better_mask = merged_df[\"grid\"] > merged_df[\"best\"]\n",
    "grid_better_mask.index = grid_test_perf.index\n",
    "grid_better_cnt = grid_better_mask.groupby([\"split\", \"symbol\"]).mean()\n",
    "grid_better_cnt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c0f127bb-3b1a-4425-a1bc-ec6be6a27f3e",
   "metadata": {},
   "outputs": [],
   "source": [
    "cv_splitter = cv_sma_crossover_perf(\n",
    "    _index=data.index, \n",
    "    _return_splitter=True\n",
    ")\n",
    "stacked_close = cv_splitter.take(\n",
    "    data.close, \n",
    "    into=\"reset_stacked\",\n",
    "    set_=\"test\"\n",
    ")\n",
    "hold_pf = vbt.Portfolio.from_holding(stacked_close, freq=\"daily\")\n",
    "hold_perf = hold_pf.sharpe_ratio\n",
    "hold_perf"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "935ca582-ef09-40f9-b6ff-c303c98989b1",
   "metadata": {},
   "source": [
    "### Modeling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f9cc747-d0b4-470f-a262-7cfc7ad1d62e",
   "metadata": {},
   "outputs": [],
   "source": [
    "X = data.run(\"talib\")\n",
    "X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7fdb5b1c-e4cc-4472-bdb1-4629492306e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "trendlb = data.run(\"trendlb\", 1.0, 0.5, mode=\"binary\")\n",
    "y = trendlb.labels\n",
    "y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ce1e42f0-7f1f-474c-865b-2fabd6dfc907",
   "metadata": {},
   "outputs": [],
   "source": [
    "X = X.replace([-np.inf, np.inf], np.nan)\n",
    "invalid_column_mask = X.isnull().all(axis=0) | (X.nunique() == 1)\n",
    "X = X.loc[:, ~invalid_column_mask]\n",
    "invalid_row_mask = X.isnull().any(axis=1) | y.isnull()\n",
    "X = X.loc[~invalid_row_mask]\n",
    "y = y.loc[~invalid_row_mask]\n",
    "X.shape, y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bf1fe438-bf27-4fc3-9d9b-5c71a7b046ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "\n",
    "clf = RandomForestClassifier(random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f64bbb84-5340-4554-a78c-5a58bdbe5010",
   "metadata": {},
   "outputs": [],
   "source": [
    "cv = vbt.SplitterCV(\n",
    "    \"from_expanding\", \n",
    "    min_length=360, \n",
    "    offset=180, \n",
    "    split=-180,\n",
    "    set_labels=[\"train\", \"test\"]\n",
    ")\n",
    "\n",
    "cv_splitter = cv.get_splitter(X)\n",
    "cv_splitter.plot().show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5c26f554-c0ff-4973-a625-d95549bbbe36",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import cross_val_score\n",
    "\n",
    "cross_val_score(clf, X, y, cv=cv, scoring=\"accuracy\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "580ae7c7-d108-454f-903f-937c8dcf058e",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_slices = cv_splitter.take(X)\n",
    "y_slices = cv_splitter.take(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b9186a59-798a-404a-b763-f6360d098dc9",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_labels = []\n",
    "test_preds = []\n",
    "for split in X_slices.index.unique(level=\"split\"):\n",
    "    X_train_slice = X_slices[(split, \"train\")]\n",
    "    y_train_slice = y_slices[(split, \"train\")]\n",
    "    X_test_slice = X_slices[(split, \"test\")]\n",
    "    y_test_slice = y_slices[(split, \"test\")]\n",
    "    slice_clf = clf.fit(X_train_slice, y_train_slice)\n",
    "    test_pred = slice_clf.predict(X_test_slice)\n",
    "    test_pred = pd.Series(test_pred, index=y_test_slice.index)\n",
    "    test_labels.append(y_test_slice)\n",
    "    test_preds.append(test_pred)\n",
    "    \n",
    "test_labels = pd.concat(test_labels).rename(\"labels\")\n",
    "test_preds = pd.concat(test_preds).rename(\"preds\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "411d961d-7e24-427a-92e1-07c6fbc52f4d",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.close.vbt.overlay_with_heatmap(test_labels).show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a904c9f0-9514-4c17-bc14-a2d9980debe6",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.close.vbt.overlay_with_heatmap(test_preds).show_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c0592fce-7a89-4fc6-9206-1ca7b3a51700",
   "metadata": {},
   "outputs": [],
   "source": [
    "pf = vbt.Portfolio.from_signals(\n",
    "    data.close[test_preds.index], \n",
    "    test_preds == 1, \n",
    "    test_preds == 0, \n",
    "    direction=\"both\"\n",
    ")\n",
    "pf.stats()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "70cc579d-fb48-41ff-935a-1aa035c8a1f8",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}