Files

David Brazda e3da60c647 daily update

2024-10-21 20:57:56 +02:00

20 KiB

Raw Blame History

Markov Variance Switching¶

2: Leveraged Exchange Traded Funds¶

Kim, C., Nelson, C., and Startz, R. (1998). Testing for mean reversion in heteroskedastic data based on Gibbs-sampling-augmented randomization. Journal of Empirical Finance, (5)2, pp.131-154.

Author: shittles

Created: 2024-10-17

Modified: 2024-10-17

Sources¶

Changelog¶

Modified Markov variance switching notebook for portfolio optimisation (2h - 2024-10-17).

In [1]:

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import statsmodels.api as sm

from pykalman import KalmanFilter
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import RobustScaler

from vectorbtpro import *

In [2]:

vbt.settings.set_theme("dark")

Ingestion¶

In [ ]:

# Use the fund, not the index, because its going to be part of a portfolio.
data = vbt.YFData.pull(
    ["SPY", "UPRO"], start="50 years ago", end="today", timeframe="daily", tz="UTC"
)

data.stats()

In [ ]:

data.data["SPY"]

In [ ]:

data.data["UPRO"]

In [ ]:

data.index

In [ ]:

data.plot(symbol="SPY", yaxis=dict(type="log")).show()

In [ ]:

data.plot(symbol="UPRO", yaxis=dict(type="log")).show()

Cleaning¶

In [ ]:

data.features

In [ ]:

data.get_feature("Dividends").any()

In [ ]:

data.get_feature("Stock Splits").any()

In [ ]:

data.get_feature("Capital Gains").any()

In [13]:

data = data.remove_features(["Capital Gains"])

In [14]:

# A post global financial crisis backtest probably isn't long enough.
data = data.transform(lambda df: df.loc["June 25th 2009" < df.index])

In [15]:

# I don't need to resample the data since it's sourced from the same exchange.
# data = data.resample("daily")

Modelling¶

In [ ]:

data.log_returns.vbt.plot().show()

Pre-processing¶

In [17]:

column_transformer = make_column_transformer(
    (RobustScaler(), ["SPY"]),
)

sr_log_returns_scaled = pd.Series(
    data=column_transformer.fit_transform(pd.DataFrame(data.log_returns["SPY"])).ravel(),
    index=data.index,
    name="SPY",
)

In [ ]:

sr_log_returns_scaled.vbt.plot().show()

Markov Regression¶

In [19]:

k_regimes_kns = 3

In [ ]:

kns = sm.tsa.MarkovRegression(
    sr_log_returns_scaled, k_regimes=k_regimes_kns, trend="n", switching_variance=True
)
results_kns = kns.fit()

results_kns.summary()

In [ ]:

results_kns.filtered_marginal_probabilities  # using data until time t (excluding time t+1, ..., T)
# results_kns.smoothed_marginal_probabilities  # using data until time T

In [ ]:

fig = vbt.make_subplots(
    rows=k_regimes_kns,
    cols=1,
    y_title="Filtered Marginal Variance Regime Probabilities",
    # y_title="Smoothed Marginal Variance Regime Probabilities",
    shared_xaxes=True,
    subplot_titles=[
        "Low-variance",
        "Medium-variance",
        "High-variance",
    ],  # order changes dependent on fit
)

for i in range(k_regimes_kns):
    fig = results_kns.filtered_marginal_probabilities[i].vbt.plot(
    # fig = results_kns.smoothed_marginal_probabilities[i].vbt.plot(
        add_trace_kwargs=dict(row=i + 1, col=1), fig=fig
    )

fig.update_layout(showlegend=False)
fig.show()

In [23]:

def plot_annotated_line(
    fig: go.Figure,
    x: pd.Series,
    y: pd.Series,
    classes: pd.Series,
    dict_class_colours: dict,
    dict_class_labels: dict,
) -> go.Figure:
    """Plot a line chart where each trace is coloured based on its class.

    Yes, plotly really doesn't support this out of the box.

    Args:
        fig: Figure.
        x: Indices.
        y: Close prices.
        classes: Regimes.
        dict_class_colours: In the format {class: colour}
        dict_class_labels: In the format {class: label}

    Returns:
        fig: The figure.
    """
    # Plot each segment in its corresponding color.
    for i in range(len(x) - 1):
        fig.add_trace(
            go.Scatter(
                x=x[i : i + 2],
                y=y[i : i + 2],
                mode="lines",
                line=dict(color=dict_class_colours[classes[i]], width=2),
                showlegend=False,  # added manually
            )
        )

    # Label each regime.
    for regime, colour in dict_class_colours.items():
        fig.add_trace(
            go.Scatter(
                x=[None],
                y=[None],
                mode="lines",
                line=dict(color=colour, width=2),
                name=dict_class_labels[regime],
            )
        )

    return fig

In [24]:

sr_variance_regime_forecasts = results_kns.filtered_marginal_probabilities.idxmax(
    axis=1
)

sr_variance_regime_predictions = results_kns.smoothed_marginal_probabilities.idxmax(
    axis=1
)

In [ ]:

sr_variance_regime_forecasts.vbt.plot().show()
# sr_variance_regime_predictions.vbt.plot().show()

In [26]:

# order changes dependent on fit
dict_variance_regime_labels = {
    0: "Low",
    1: "Medium",
    2: "High",
}

dict_variance_regime_colours = {
    0: "green",
    1: "orange",
    2: "red",
}

In [ ]:

fig = vbt.make_figure()

fig = plot_annotated_line(
    fig,
    data.index,
    np.log(data.data["SPY"]["Close"]),
    sr_variance_regime_forecasts,
    # sr_variance_regime_predictions,
    dict_variance_regime_colours,
    dict_variance_regime_labels,
)

fig.update_layout(
    title="Filtered Variance Regime Labels",
    # title="Smoothed Variance Regime Labels",
    xaxis_title="Date",
    yaxis_title="Log Close",
    showlegend=True,
)

fig.show()

Kalman Filter¶

Experiment with smoothing the regime probabilities.

In [28]:

def kalman_smooth(column: np.array) -> np.array:
    """Apply a Kalman filter to the column.
    
    The Kalman filter class cannot handle the NaNs created by aligning symbols
    indices, so only apply it to the relevent slice of the array.
    """
    # index = column.index
    # column = column.loc[column.first_valid_index() : column.last_valid_index()]

    # Filter out NaNs at the start and end of the column.
    valid_mask = ~np.isnan(column)

    if not valid_mask.any():
        # If all values are NaN, return an array of NaNs with the same length.
        return np.full_like(column, np.nan)

    # Get the index of the first occurrence of the maximum value in the array.
    first_valid = valid_mask.argmax()
    # Reverse the array to find the index of the last occurence.
    last_valid = len(valid_mask) - valid_mask[::-1].argmax()

    column = column[first_valid:last_valid]

    kf = KalmanFilter(initial_state_mean=0, n_dim_obs=1)
    kf = kf.em(column, n_iter=5)

    smoothed_state_means, _ = kf.smooth(column)

    # return pd.Series(
    #     data=smoothed_state_means.ravel(),
    #     index=column.index,
    #     name=column.name,
    # ).reindex(index)
    result = np.full(len(valid_mask), np.nan)
    result[first_valid:last_valid] = smoothed_state_means.ravel()

    return result

In [29]:

Kalman = vbt.IF(
    class_name="Kalman",
    short_name="kf",
    input_names=["column"],
    output_names=["smoothed_state_means"],
).with_apply_func(
    kalman_smooth,
    takes_1d=True,
)

In [ ]:

ssm = Kalman.run(results_kns.filtered_marginal_probabilities)

ssm.kf

In [ ]:

fig = vbt.make_subplots(
    rows=k_regimes_kns,
    cols=1,
    y_title="Kalman Filtered Marginal Variance Regime (Not) Probabilities",
    shared_xaxes=True,
    subplot_titles=[
        "Low-variance",
        "Medium-variance",
        "High-variance",
    ],  # order changes dependent on fit
)

for i in range(k_regimes_kns):
    fig = ssm.kf[i].vbt.plot(
        add_trace_kwargs=dict(row=i + 1, col=1), fig=fig
    )

fig.update_layout(showlegend=False)
fig.show()

In [32]:

sr_variance_regime_kalman_forecasts = ssm.kf.idxmax(axis=1)

In [ ]:

sr_variance_regime_kalman_forecasts.vbt.plot().show()

In [ ]:

fig = vbt.make_figure()

fig = plot_annotated_line(
    fig,
    data.index,
    np.log(data.data["SPY"]["Close"]),
    sr_variance_regime_kalman_forecasts,
    dict_variance_regime_colours,
    dict_variance_regime_labels,
)

fig.update_layout(
    title="Kalman Filtered Variance Regime Labels",
    xaxis_title="Date",
    yaxis_title="Log Close",
    showlegend=True,
)

fig.show()

Backtest¶

100% SPX Allocation¶

In [ ]:

pf = vbt.Portfolio.from_holding(close=data.data["SPY"]["Close"])

pf.stats()

In [ ]:

pf.plot(yaxis=dict(type="log")).show()

In [ ]:

fig = vbt.make_figure()

pf.drawdowns.plot(yaxis=dict(type="log"), fig=fig)

fig.show()

In [ ]:

fig = vbt.make_figure()

pf.plot_underwater(pct_scale=True, fig=fig)

fig.show()

100% UPRO Allocation¶

In [ ]:

pf = vbt.Portfolio.from_holding(
    close=data.data["UPRO"]["Close"], bm_close=data.data["SPY"]["Close"]
)

pf.stats()

In [ ]:

pf.plot(yaxis=dict(type="log")).show()

In [ ]:

fig = vbt.make_figure()

pf.drawdowns.plot(yaxis=dict(type="log"), fig=fig)

fig.show()

In [ ]:

# Nice!
fig = vbt.make_figure()

pf.plot_underwater(pct_scale=True, fig=fig)

fig.show()

Filtered Marginal Probability Allocation¶

In [ ]:

ws_points = data.wrapper.get_index_points(every="W")

ws_points

In [ ]:

ws_timestamps = data.wrapper.index[ws_points]

ws_timestamps

In [ ]:

symbol_wrapper = data.get_symbol_wrapper(freq="1D")  

allocations = symbol_wrapper.fill()

allocations

In [ ]:

allocations["UPRO"] = results_kns.filtered_marginal_probabilities[0]
allocations["SPY"] = 1 - allocations["UPRO"]

allocations

In [ ]:

pf = vbt.Portfolio.from_orders(
    close=data.get("Close"),
    bm_close=data.data["SPY"]["Close"],
    size=allocations,
    size_type="targetpercent",
    group_by=True,  
    cash_sharing=True,
    call_seq="auto"  
)

pf.stats()

In [ ]:

# sim_alloc = pf.get_asset_value(group_by=False).vbt / pf.value

# sim_alloc.vbt.plot(
#    trace_kwargs=dict(stackgroup="one"),
#    use_gl=False
# ).show()

pf.plot_allocations().show()

In [ ]:

pf.plot(yaxis=dict(type="log")).show()

In [ ]:

pf.drawdowns.plot(yaxis=dict(type="log")).show()

In [ ]:

pf.plot_underwater(pct_scale=True).show()

In [ ]:

20 KiB Raw Blame History

Markov Variance Switching¶

2: Leveraged Exchange Traded Funds¶

Sources¶

Changelog¶

Ingestion¶

Cleaning¶

Modelling¶

Pre-processing¶

Markov Regression¶

Kalman Filter¶

Backtest¶

100% SPX Allocation¶

100% UPRO Allocation¶

Filtered Marginal Probability Allocation¶

20 KiB

Raw Blame History