{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Markov Variance Switching\n", "\n", "# 2: Leveraged Exchange Traded Funds\n", "\n", "Kim, C., Nelson, C., and Startz, R. (1998). Testing for mean reversion in heteroskedastic data based on Gibbs-sampling-augmented randomization. Journal of Empirical Finance, (5)2, pp.131-154.\n", "\n", "**Author:** shittles\n", "\n", "**Created:** 2024-10-17\n", "\n", "**Modified:** 2024-10-17\n", "\n", "## Sources\n", "- https://www.statsmodels.org/v0.11.1/examples/notebooks/generated/markov_autoregression.html\n", "- https://www.proshares.com/our-etfs/leveraged-and-inverse/upro\n", "- https://www.bogleheads.org/forum/viewtopic.php?t=272007\n", "- https://www.bogleheads.org/forum/viewtopic.php?t=288192\n", "- https://www.reddit.com/r/LETFs/comments/14lubaz/finally_an_accurate_backtesting_model/\n", "- https://www.reddit.com/r/mauerstrassenwetten/comments/sivtas/zahlgrafs_exzellente_abenteuer_teil_4/\n", "- https://code.launchpad.net/zgea\n", "\n", "## Changelog\n", "- Modified Markov variance switching notebook for portfolio optimisation (2h - 2024-10-17).\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import plotly.express as px\n", "import plotly.graph_objects as go\n", "import statsmodels.api as sm\n", "\n", "from pykalman import KalmanFilter\n", "from sklearn.compose import make_column_transformer\n", "from sklearn.preprocessing import RobustScaler\n", "\n", "from vectorbtpro import *" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "vbt.settings.set_theme(\"dark\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Ingestion\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Use the fund, not the index, because its going to be part of a portfolio.\n", "data = vbt.YFData.pull(\n", " [\"SPY\", \"UPRO\"], start=\"50 years ago\", end=\"today\", timeframe=\"daily\", tz=\"UTC\"\n", ")\n", "\n", "data.stats()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.data[\"SPY\"]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.data[\"UPRO\"]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.index" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.plot(symbol=\"SPY\", yaxis=dict(type=\"log\")).show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.plot(symbol=\"UPRO\", yaxis=dict(type=\"log\")).show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Cleaning\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.features" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.get_feature(\"Dividends\").any()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.get_feature(\"Stock Splits\").any()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.get_feature(\"Capital Gains\").any()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "data = data.remove_features([\"Capital Gains\"])" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "# A post global financial crisis backtest probably isn't long enough.\n", "data = data.transform(lambda df: df.loc[\"June 25th 2009\" < df.index])" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# I don't need to resample the data since it's sourced from the same exchange.\n", "# data = data.resample(\"daily\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Modelling\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.log_returns.vbt.plot().show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Pre-processing\n" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "column_transformer = make_column_transformer(\n", " (RobustScaler(), [\"SPY\"]),\n", ")\n", "\n", "sr_log_returns_scaled = pd.Series(\n", " data=column_transformer.fit_transform(pd.DataFrame(data.log_returns[\"SPY\"])).ravel(),\n", " index=data.index,\n", " name=\"SPY\",\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sr_log_returns_scaled.vbt.plot().show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Markov Regression\n" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "k_regimes_kns = 3" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "kns = sm.tsa.MarkovRegression(\n", " sr_log_returns_scaled, k_regimes=k_regimes_kns, trend=\"n\", switching_variance=True\n", ")\n", "results_kns = kns.fit()\n", "\n", "results_kns.summary()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "results_kns.filtered_marginal_probabilities # using data until time t (excluding time t+1, ..., T)\n", "# results_kns.smoothed_marginal_probabilities # using data until time T" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig = vbt.make_subplots(\n", " rows=k_regimes_kns,\n", " cols=1,\n", " y_title=\"Filtered Marginal Variance Regime Probabilities\",\n", " # y_title=\"Smoothed Marginal Variance Regime Probabilities\",\n", " shared_xaxes=True,\n", " subplot_titles=[\n", " \"Low-variance\",\n", " \"Medium-variance\",\n", " \"High-variance\",\n", " ], # order changes dependent on fit\n", ")\n", "\n", "for i in range(k_regimes_kns):\n", " fig = results_kns.filtered_marginal_probabilities[i].vbt.plot(\n", " # fig = results_kns.smoothed_marginal_probabilities[i].vbt.plot(\n", " add_trace_kwargs=dict(row=i + 1, col=1), fig=fig\n", " )\n", "\n", "fig.update_layout(showlegend=False)\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "def plot_annotated_line(\n", " fig: go.Figure,\n", " x: pd.Series,\n", " y: pd.Series,\n", " classes: pd.Series,\n", " dict_class_colours: dict,\n", " dict_class_labels: dict,\n", ") -> go.Figure:\n", " \"\"\"Plot a line chart where each trace is coloured based on its class.\n", "\n", " Yes, plotly really doesn't support this out of the box.\n", "\n", " Args:\n", " fig: Figure.\n", " x: Indices.\n", " y: Close prices.\n", " classes: Regimes.\n", " dict_class_colours: In the format {class: colour}\n", " dict_class_labels: In the format {class: label}\n", "\n", " Returns:\n", " fig: The figure.\n", " \"\"\"\n", " # Plot each segment in its corresponding color.\n", " for i in range(len(x) - 1):\n", " fig.add_trace(\n", " go.Scatter(\n", " x=x[i : i + 2],\n", " y=y[i : i + 2],\n", " mode=\"lines\",\n", " line=dict(color=dict_class_colours[classes[i]], width=2),\n", " showlegend=False, # added manually\n", " )\n", " )\n", "\n", " # Label each regime.\n", " for regime, colour in dict_class_colours.items():\n", " fig.add_trace(\n", " go.Scatter(\n", " x=[None],\n", " y=[None],\n", " mode=\"lines\",\n", " line=dict(color=colour, width=2),\n", " name=dict_class_labels[regime],\n", " )\n", " )\n", "\n", " return fig" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "sr_variance_regime_forecasts = results_kns.filtered_marginal_probabilities.idxmax(\n", " axis=1\n", ")\n", "\n", "sr_variance_regime_predictions = results_kns.smoothed_marginal_probabilities.idxmax(\n", " axis=1\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sr_variance_regime_forecasts.vbt.plot().show()\n", "# sr_variance_regime_predictions.vbt.plot().show()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "# order changes dependent on fit\n", "dict_variance_regime_labels = {\n", " 0: \"Low\",\n", " 1: \"Medium\",\n", " 2: \"High\",\n", "}\n", "\n", "dict_variance_regime_colours = {\n", " 0: \"green\",\n", " 1: \"orange\",\n", " 2: \"red\",\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig = vbt.make_figure()\n", "\n", "fig = plot_annotated_line(\n", " fig,\n", " data.index,\n", " np.log(data.data[\"SPY\"][\"Close\"]),\n", " sr_variance_regime_forecasts,\n", " # sr_variance_regime_predictions,\n", " dict_variance_regime_colours,\n", " dict_variance_regime_labels,\n", ")\n", "\n", "fig.update_layout(\n", " title=\"Filtered Variance Regime Labels\",\n", " # title=\"Smoothed Variance Regime Labels\",\n", " xaxis_title=\"Date\",\n", " yaxis_title=\"Log Close\",\n", " showlegend=True,\n", ")\n", "\n", "fig.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Kalman Filter\n", "Experiment with smoothing the regime probabilities.\n" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "def kalman_smooth(column: np.array) -> np.array:\n", " \"\"\"Apply a Kalman filter to the column.\n", " \n", " The Kalman filter class cannot handle the NaNs created by aligning symbols\n", " indices, so only apply it to the relevent slice of the array.\n", " \"\"\"\n", " # index = column.index\n", " # column = column.loc[column.first_valid_index() : column.last_valid_index()]\n", "\n", " # Filter out NaNs at the start and end of the column.\n", " valid_mask = ~np.isnan(column)\n", "\n", " if not valid_mask.any():\n", " # If all values are NaN, return an array of NaNs with the same length.\n", " return np.full_like(column, np.nan)\n", "\n", " # Get the index of the first occurrence of the maximum value in the array.\n", " first_valid = valid_mask.argmax()\n", " # Reverse the array to find the index of the last occurence.\n", " last_valid = len(valid_mask) - valid_mask[::-1].argmax()\n", "\n", " column = column[first_valid:last_valid]\n", "\n", " kf = KalmanFilter(initial_state_mean=0, n_dim_obs=1)\n", " kf = kf.em(column, n_iter=5)\n", "\n", " smoothed_state_means, _ = kf.smooth(column)\n", "\n", " # return pd.Series(\n", " # data=smoothed_state_means.ravel(),\n", " # index=column.index,\n", " # name=column.name,\n", " # ).reindex(index)\n", " result = np.full(len(valid_mask), np.nan)\n", " result[first_valid:last_valid] = smoothed_state_means.ravel()\n", "\n", " return result" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "Kalman = vbt.IF(\n", " class_name=\"Kalman\",\n", " short_name=\"kf\",\n", " input_names=[\"column\"],\n", " output_names=[\"smoothed_state_means\"],\n", ").with_apply_func(\n", " kalman_smooth,\n", " takes_1d=True,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ssm = Kalman.run(results_kns.filtered_marginal_probabilities)\n", "\n", "ssm.kf" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig = vbt.make_subplots(\n", " rows=k_regimes_kns,\n", " cols=1,\n", " y_title=\"Kalman Filtered Marginal Variance Regime (Not) Probabilities\",\n", " shared_xaxes=True,\n", " subplot_titles=[\n", " \"Low-variance\",\n", " \"Medium-variance\",\n", " \"High-variance\",\n", " ], # order changes dependent on fit\n", ")\n", "\n", "for i in range(k_regimes_kns):\n", " fig = ssm.kf[i].vbt.plot(\n", " add_trace_kwargs=dict(row=i + 1, col=1), fig=fig\n", " )\n", "\n", "fig.update_layout(showlegend=False)\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "sr_variance_regime_kalman_forecasts = ssm.kf.idxmax(axis=1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sr_variance_regime_kalman_forecasts.vbt.plot().show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig = vbt.make_figure()\n", "\n", "fig = plot_annotated_line(\n", " fig,\n", " data.index,\n", " np.log(data.data[\"SPY\"][\"Close\"]),\n", " sr_variance_regime_kalman_forecasts,\n", " dict_variance_regime_colours,\n", " dict_variance_regime_labels,\n", ")\n", "\n", "fig.update_layout(\n", " title=\"Kalman Filtered Variance Regime Labels\",\n", " xaxis_title=\"Date\",\n", " yaxis_title=\"Log Close\",\n", " showlegend=True,\n", ")\n", "\n", "fig.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Backtest\n", "\n", "### 100% SPX Allocation\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pf = vbt.Portfolio.from_holding(close=data.data[\"SPY\"][\"Close\"])\n", "\n", "pf.stats()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pf.plot(yaxis=dict(type=\"log\")).show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig = vbt.make_figure()\n", "\n", "pf.drawdowns.plot(yaxis=dict(type=\"log\"), fig=fig)\n", "\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig = vbt.make_figure()\n", "\n", "pf.plot_underwater(pct_scale=True, fig=fig)\n", "\n", "fig.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 100% UPRO Allocation\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pf = vbt.Portfolio.from_holding(\n", " close=data.data[\"UPRO\"][\"Close\"], bm_close=data.data[\"SPY\"][\"Close\"]\n", ")\n", "\n", "pf.stats()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pf.plot(yaxis=dict(type=\"log\")).show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig = vbt.make_figure()\n", "\n", "pf.drawdowns.plot(yaxis=dict(type=\"log\"), fig=fig)\n", "\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Nice!\n", "fig = vbt.make_figure()\n", "\n", "pf.plot_underwater(pct_scale=True, fig=fig)\n", "\n", "fig.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Filtered Marginal Probability Allocation\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ws_points = data.wrapper.get_index_points(every=\"W\")\n", "\n", "ws_points" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ws_timestamps = data.wrapper.index[ws_points]\n", "\n", "ws_timestamps" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "symbol_wrapper = data.get_symbol_wrapper(freq=\"1D\") \n", "\n", "allocations = symbol_wrapper.fill()\n", "\n", "allocations" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "allocations[\"UPRO\"] = results_kns.filtered_marginal_probabilities[0]\n", "allocations[\"SPY\"] = 1 - allocations[\"UPRO\"]\n", "\n", "allocations" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pf = vbt.Portfolio.from_orders(\n", " close=data.get(\"Close\"),\n", " bm_close=data.data[\"SPY\"][\"Close\"],\n", " size=allocations,\n", " size_type=\"targetpercent\",\n", " group_by=True, \n", " cash_sharing=True,\n", " call_seq=\"auto\" \n", ")\n", "\n", "pf.stats()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# sim_alloc = pf.get_asset_value(group_by=False).vbt / pf.value\n", "\n", "# sim_alloc.vbt.plot(\n", "# trace_kwargs=dict(stackgroup=\"one\"),\n", "# use_gl=False\n", "# ).show()\n", "\n", "pf.plot_allocations().show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pf.plot(yaxis=dict(type=\"log\")).show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pf.drawdowns.plot(yaxis=dict(type=\"log\")).show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pf.plot_underwater(pct_scale=True).show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.0" } }, "nbformat": 4, "nbformat_minor": 2 }