daily update

This commit is contained in:
David Brazda
2024-10-21 20:57:56 +02:00
parent 132172855a
commit e3da60c647
196 changed files with 1722489 additions and 1134 deletions

View File

@ -0,0 +1,855 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Markov Variance Switching\n",
"\n",
"# 2: Leveraged Exchange Traded Funds\n",
"\n",
"Kim, C., Nelson, C., and Startz, R. (1998). Testing for mean reversion in heteroskedastic data based on Gibbs-sampling-augmented randomization. Journal of Empirical Finance, (5)2, pp.131-154.\n",
"\n",
"**Author:** shittles\n",
"\n",
"**Created:** 2024-10-17\n",
"\n",
"**Modified:** 2024-10-17\n",
"\n",
"## Sources\n",
"- https://www.statsmodels.org/v0.11.1/examples/notebooks/generated/markov_autoregression.html\n",
"- https://www.proshares.com/our-etfs/leveraged-and-inverse/upro\n",
"- https://www.bogleheads.org/forum/viewtopic.php?t=272007\n",
"- https://www.bogleheads.org/forum/viewtopic.php?t=288192\n",
"- https://www.reddit.com/r/LETFs/comments/14lubaz/finally_an_accurate_backtesting_model/\n",
"- https://www.reddit.com/r/mauerstrassenwetten/comments/sivtas/zahlgrafs_exzellente_abenteuer_teil_4/\n",
"- https://code.launchpad.net/zgea\n",
"\n",
"## Changelog\n",
"- Modified Markov variance switching notebook for portfolio optimisation (2h - 2024-10-17).\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import plotly.express as px\n",
"import plotly.graph_objects as go\n",
"import statsmodels.api as sm\n",
"\n",
"from pykalman import KalmanFilter\n",
"from sklearn.compose import make_column_transformer\n",
"from sklearn.preprocessing import RobustScaler\n",
"\n",
"from vectorbtpro import *"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"vbt.settings.set_theme(\"dark\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Ingestion\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Use the fund, not the index, because its going to be part of a portfolio.\n",
"data = vbt.YFData.pull(\n",
" [\"SPY\", \"UPRO\"], start=\"50 years ago\", end=\"today\", timeframe=\"daily\", tz=\"UTC\"\n",
")\n",
"\n",
"data.stats()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.data[\"SPY\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.data[\"UPRO\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.index"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.plot(symbol=\"SPY\", yaxis=dict(type=\"log\")).show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.plot(symbol=\"UPRO\", yaxis=dict(type=\"log\")).show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Cleaning\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.features"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.get_feature(\"Dividends\").any()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.get_feature(\"Stock Splits\").any()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.get_feature(\"Capital Gains\").any()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"data = data.remove_features([\"Capital Gains\"])"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"# A post global financial crisis backtest probably isn't long enough.\n",
"data = data.transform(lambda df: df.loc[\"June 25th 2009\" < df.index])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# I don't need to resample the data since it's sourced from the same exchange.\n",
"# data = data.resample(\"daily\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Modelling\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.log_returns.vbt.plot().show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Pre-processing\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"column_transformer = make_column_transformer(\n",
" (RobustScaler(), [\"SPY\"]),\n",
")\n",
"\n",
"sr_log_returns_scaled = pd.Series(\n",
" data=column_transformer.fit_transform(pd.DataFrame(data.log_returns[\"SPY\"])).ravel(),\n",
" index=data.index,\n",
" name=\"SPY\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sr_log_returns_scaled.vbt.plot().show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Markov Regression\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"k_regimes_kns = 3"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"kns = sm.tsa.MarkovRegression(\n",
" sr_log_returns_scaled, k_regimes=k_regimes_kns, trend=\"n\", switching_variance=True\n",
")\n",
"results_kns = kns.fit()\n",
"\n",
"results_kns.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"results_kns.filtered_marginal_probabilities # using data until time t (excluding time t+1, ..., T)\n",
"# results_kns.smoothed_marginal_probabilities # using data until time T"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = vbt.make_subplots(\n",
" rows=k_regimes_kns,\n",
" cols=1,\n",
" y_title=\"Filtered Marginal Variance Regime Probabilities\",\n",
" # y_title=\"Smoothed Marginal Variance Regime Probabilities\",\n",
" shared_xaxes=True,\n",
" subplot_titles=[\n",
" \"Low-variance\",\n",
" \"Medium-variance\",\n",
" \"High-variance\",\n",
" ], # order changes dependent on fit\n",
")\n",
"\n",
"for i in range(k_regimes_kns):\n",
" fig = results_kns.filtered_marginal_probabilities[i].vbt.plot(\n",
" # fig = results_kns.smoothed_marginal_probabilities[i].vbt.plot(\n",
" add_trace_kwargs=dict(row=i + 1, col=1), fig=fig\n",
" )\n",
"\n",
"fig.update_layout(showlegend=False)\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"def plot_annotated_line(\n",
" fig: go.Figure,\n",
" x: pd.Series,\n",
" y: pd.Series,\n",
" classes: pd.Series,\n",
" dict_class_colours: dict,\n",
" dict_class_labels: dict,\n",
") -> go.Figure:\n",
" \"\"\"Plot a line chart where each trace is coloured based on its class.\n",
"\n",
" Yes, plotly really doesn't support this out of the box.\n",
"\n",
" Args:\n",
" fig: Figure.\n",
" x: Indices.\n",
" y: Close prices.\n",
" classes: Regimes.\n",
" dict_class_colours: In the format {class: colour}\n",
" dict_class_labels: In the format {class: label}\n",
"\n",
" Returns:\n",
" fig: The figure.\n",
" \"\"\"\n",
" # Plot each segment in its corresponding color.\n",
" for i in range(len(x) - 1):\n",
" fig.add_trace(\n",
" go.Scatter(\n",
" x=x[i : i + 2],\n",
" y=y[i : i + 2],\n",
" mode=\"lines\",\n",
" line=dict(color=dict_class_colours[classes[i]], width=2),\n",
" showlegend=False, # added manually\n",
" )\n",
" )\n",
"\n",
" # Label each regime.\n",
" for regime, colour in dict_class_colours.items():\n",
" fig.add_trace(\n",
" go.Scatter(\n",
" x=[None],\n",
" y=[None],\n",
" mode=\"lines\",\n",
" line=dict(color=colour, width=2),\n",
" name=dict_class_labels[regime],\n",
" )\n",
" )\n",
"\n",
" return fig"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"sr_variance_regime_forecasts = results_kns.filtered_marginal_probabilities.idxmax(\n",
" axis=1\n",
")\n",
"\n",
"sr_variance_regime_predictions = results_kns.smoothed_marginal_probabilities.idxmax(\n",
" axis=1\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sr_variance_regime_forecasts.vbt.plot().show()\n",
"# sr_variance_regime_predictions.vbt.plot().show()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"# order changes dependent on fit\n",
"dict_variance_regime_labels = {\n",
" 0: \"Low\",\n",
" 1: \"Medium\",\n",
" 2: \"High\",\n",
"}\n",
"\n",
"dict_variance_regime_colours = {\n",
" 0: \"green\",\n",
" 1: \"orange\",\n",
" 2: \"red\",\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = vbt.make_figure()\n",
"\n",
"fig = plot_annotated_line(\n",
" fig,\n",
" data.index,\n",
" np.log(data.data[\"SPY\"][\"Close\"]),\n",
" sr_variance_regime_forecasts,\n",
" # sr_variance_regime_predictions,\n",
" dict_variance_regime_colours,\n",
" dict_variance_regime_labels,\n",
")\n",
"\n",
"fig.update_layout(\n",
" title=\"Filtered Variance Regime Labels\",\n",
" # title=\"Smoothed Variance Regime Labels\",\n",
" xaxis_title=\"Date\",\n",
" yaxis_title=\"Log Close\",\n",
" showlegend=True,\n",
")\n",
"\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Kalman Filter\n",
"Experiment with smoothing the regime probabilities.\n"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"def kalman_smooth(column: np.array) -> np.array:\n",
" \"\"\"Apply a Kalman filter to the column.\n",
" \n",
" The Kalman filter class cannot handle the NaNs created by aligning symbols\n",
" indices, so only apply it to the relevent slice of the array.\n",
" \"\"\"\n",
" # index = column.index\n",
" # column = column.loc[column.first_valid_index() : column.last_valid_index()]\n",
"\n",
" # Filter out NaNs at the start and end of the column.\n",
" valid_mask = ~np.isnan(column)\n",
"\n",
" if not valid_mask.any():\n",
" # If all values are NaN, return an array of NaNs with the same length.\n",
" return np.full_like(column, np.nan)\n",
"\n",
" # Get the index of the first occurrence of the maximum value in the array.\n",
" first_valid = valid_mask.argmax()\n",
" # Reverse the array to find the index of the last occurence.\n",
" last_valid = len(valid_mask) - valid_mask[::-1].argmax()\n",
"\n",
" column = column[first_valid:last_valid]\n",
"\n",
" kf = KalmanFilter(initial_state_mean=0, n_dim_obs=1)\n",
" kf = kf.em(column, n_iter=5)\n",
"\n",
" smoothed_state_means, _ = kf.smooth(column)\n",
"\n",
" # return pd.Series(\n",
" # data=smoothed_state_means.ravel(),\n",
" # index=column.index,\n",
" # name=column.name,\n",
" # ).reindex(index)\n",
" result = np.full(len(valid_mask), np.nan)\n",
" result[first_valid:last_valid] = smoothed_state_means.ravel()\n",
"\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"Kalman = vbt.IF(\n",
" class_name=\"Kalman\",\n",
" short_name=\"kf\",\n",
" input_names=[\"column\"],\n",
" output_names=[\"smoothed_state_means\"],\n",
").with_apply_func(\n",
" kalman_smooth,\n",
" takes_1d=True,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ssm = Kalman.run(results_kns.filtered_marginal_probabilities)\n",
"\n",
"ssm.kf"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = vbt.make_subplots(\n",
" rows=k_regimes_kns,\n",
" cols=1,\n",
" y_title=\"Kalman Filtered Marginal Variance Regime (Not) Probabilities\",\n",
" shared_xaxes=True,\n",
" subplot_titles=[\n",
" \"Low-variance\",\n",
" \"Medium-variance\",\n",
" \"High-variance\",\n",
" ], # order changes dependent on fit\n",
")\n",
"\n",
"for i in range(k_regimes_kns):\n",
" fig = ssm.kf[i].vbt.plot(\n",
" add_trace_kwargs=dict(row=i + 1, col=1), fig=fig\n",
" )\n",
"\n",
"fig.update_layout(showlegend=False)\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"sr_variance_regime_kalman_forecasts = ssm.kf.idxmax(axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sr_variance_regime_kalman_forecasts.vbt.plot().show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = vbt.make_figure()\n",
"\n",
"fig = plot_annotated_line(\n",
" fig,\n",
" data.index,\n",
" np.log(data.data[\"SPY\"][\"Close\"]),\n",
" sr_variance_regime_kalman_forecasts,\n",
" dict_variance_regime_colours,\n",
" dict_variance_regime_labels,\n",
")\n",
"\n",
"fig.update_layout(\n",
" title=\"Kalman Filtered Variance Regime Labels\",\n",
" xaxis_title=\"Date\",\n",
" yaxis_title=\"Log Close\",\n",
" showlegend=True,\n",
")\n",
"\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Backtest\n",
"\n",
"### 100% SPX Allocation\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pf = vbt.Portfolio.from_holding(close=data.data[\"SPY\"][\"Close\"])\n",
"\n",
"pf.stats()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pf.plot(yaxis=dict(type=\"log\")).show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = vbt.make_figure()\n",
"\n",
"pf.drawdowns.plot(yaxis=dict(type=\"log\"), fig=fig)\n",
"\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = vbt.make_figure()\n",
"\n",
"pf.plot_underwater(pct_scale=True, fig=fig)\n",
"\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 100% UPRO Allocation\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pf = vbt.Portfolio.from_holding(\n",
" close=data.data[\"UPRO\"][\"Close\"], bm_close=data.data[\"SPY\"][\"Close\"]\n",
")\n",
"\n",
"pf.stats()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pf.plot(yaxis=dict(type=\"log\")).show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = vbt.make_figure()\n",
"\n",
"pf.drawdowns.plot(yaxis=dict(type=\"log\"), fig=fig)\n",
"\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Nice!\n",
"fig = vbt.make_figure()\n",
"\n",
"pf.plot_underwater(pct_scale=True, fig=fig)\n",
"\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Filtered Marginal Probability Allocation\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ws_points = data.wrapper.get_index_points(every=\"W\")\n",
"\n",
"ws_points"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ws_timestamps = data.wrapper.index[ws_points]\n",
"\n",
"ws_timestamps"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"symbol_wrapper = data.get_symbol_wrapper(freq=\"1D\") \n",
"\n",
"allocations = symbol_wrapper.fill()\n",
"\n",
"allocations"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"allocations[\"UPRO\"] = results_kns.filtered_marginal_probabilities[0]\n",
"allocations[\"SPY\"] = 1 - allocations[\"UPRO\"]\n",
"\n",
"allocations"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pf = vbt.Portfolio.from_orders(\n",
" close=data.get(\"Close\"),\n",
" bm_close=data.data[\"SPY\"][\"Close\"],\n",
" size=allocations,\n",
" size_type=\"targetpercent\",\n",
" group_by=True, \n",
" cash_sharing=True,\n",
" call_seq=\"auto\" \n",
")\n",
"\n",
"pf.stats()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# sim_alloc = pf.get_asset_value(group_by=False).vbt / pf.value\n",
"\n",
"# sim_alloc.vbt.plot(\n",
"# trace_kwargs=dict(stackgroup=\"one\"),\n",
"# use_gl=False\n",
"# ).show()\n",
"\n",
"pf.plot_allocations().show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pf.plot(yaxis=dict(type=\"log\")).show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pf.drawdowns.plot(yaxis=dict(type=\"log\")).show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pf.plot_underwater(pct_scale=True).show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}