diff --git a/research/strat_ORDER_IMBALANCE/v2_SINGLE.ipynb b/research/strat_ORDER_IMBALANCE/v2_SINGLE.ipynb index 8b2386a..3b8278d 100644 --- a/research/strat_ORDER_IMBALANCE/v2_SINGLE.ipynb +++ b/research/strat_ORDER_IMBALANCE/v2_SINGLE.ipynb @@ -13,14 +13,14 @@ "\n", "## Note\n", "\n", - "Order disbalance nepodminuje zmenu ceny (tzn. muze byt order disbalance na buy stranu, ale cena nemusi jit nahoru a naopak)\n", - "Nicmene pokud je disbalance delsi a nedochazi ke zmene ceny - může to něco indikovat. \n", - "Vytvořit si kumulativní disbalance - kumulátory, které se budou načítat, když se budou silné disbalance, bez změny ceny. Tento akumulátor se bude nabíjet disbalancí a vybíjet příslušnou změnou ceny." + "The order imbalance does not necessarily cause a price change (i.e., there can be an order imbalance on the buy side, but the price does not have to go up, and vice versa). However, if there is a prolonged imbalance without a price change, it could indicate something.\n", + "\n", + "Create a cumulative imbalance — accumulators that will build up when there are strong imbalances without a price change. This accumulator will charge up with the imbalance and discharge with the corresponding price change." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -54,9 +54,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "trades_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet\n", + "trades_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet\n", + "trades_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n", + "trades_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n", + "ohlcv_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet\n", + "trades_df-BAC-2024-05-15T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n", + "ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n", + "ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n", + "ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet\n", + "ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n", + "ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet\n" + ] + }, + { + "data": { + "text/plain": [ + "351" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Define the market open and close times\n", "market_open = datetime.time(9, 30)\n", @@ -139,11 +167,7 @@ }, { "cell_type": "code", -<<<<<<< HEAD "execution_count": null, -======= - "execution_count": 5, ->>>>>>> parent of 7cc9f86... vbt pipeline edits "metadata": {}, "outputs": [], "source": [ @@ -161,276 +185,7 @@ "execution_count": null, "metadata": {}, "outputs": [], -<<<<<<< HEAD - "source": [ - "m30data.close.lw.plot()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Stationarity test (ADF)\n", - "if the p-value > 0.05, wee need to find the order of differencing. Use returns (current price - previous price)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time\n", - "from statsmodels.tsa.stattools import adfuller\n", - "\n", - "# Start the timer\n", - "start_time = time.time()\n", - "\n", - "# Perform the Augmented Dickey-Fuller test to check for stationarity\n", - "result = adfuller(m30data.xloc[:500].get(\"Close\"))\n", - "\n", - "# Stop the timer\n", - "end_time = time.time()\n", - "\n", - "# Print the results of the ADF test\n", - "print(result)\n", - "print('ADF Statistic: %f' % result[0])\n", - "print('p-value: %f' % result[1])\n", - "\n", - "# Print the time taken\n", - "print('Time taken: %f seconds' % (end_time - start_time))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Plot of close price vs log returns of close price vs just returns (delta)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "close = m30data.get(\"Close\")\n", - "log_returns = np.log(close) - np.log(close.shift(1))\n", - "returns = close - close.shift(1) #or close.diff()\n", - "/\n", - "\n", - "pane1 = Panel(\n", - " ohlcv=(), #(series, entries, exits, other_markers)\n", - " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n", - " #[(series, name, entries, exits, other_markers)]\n", - " right=[(close, \"close\"),\n", - " ],\n", - " left = [ \n", - " (log_returns, \"log_returns\"),\n", - " (returns, \"returns\"),\n", - " # (ret_log_diff, \"ret_log_diff\"),\n", - " ],\n", - ")\n", - "ch = chart([pane1], size=\"s\", precision=6)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from statsmodels.graphics.tsaplots import plot_acf, plot_pacf\n", - "plot_acf(returns)\n", - "#no autocorrelation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from statsmodels.tsa.arima.model import ARIMA\n", - "# Fit an ARIMA model\n", - "model = ARIMA(returns, order=(0, 1, 0)) # ARIMA(0, 1, 0) is a simple random walk model\n", - "result = model.fit()\n", - "\n", - "# Print summary of the model\n", - "#print(result.summary())\n", - "\n", - "\n", - "result.resid.lw.plot(session=None)\n", - "\n", - "plot_acf(result.resid, title='ACF of Residuals')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from statsmodels.graphics.tsaplots import month_plot\n", - "# import matplotlib.pyplot as plt\n", - "# plot_pacf(close, title='PACF of Close', lags=30)\n", - "# plot_acf(close, title='ACF of Close', lags=30)\n", - "month_plot(close)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "close.lw.plot()\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "returns" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "close.vbt.xloc[\"2023-01-03\":\"2024-01-03\"].obj\n", - "# [\"01-03-2023\":\"01-O3-2024\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "insample_close = close.vbt.xloc[\"2023-01-03\":\"2024-01-03\"].get()\n", - "insample_returns = returns.vbt.xloc[\"2023-01-03\":\"2024-01-03\"].get()\n", - "outsample_close = close.vbt.xloc[\"2024-01-03\":\"2025-01-03\"].obj\n", - "outsample_returns = returns.vbt.xloc[\"2024-01-03\":\"2025-01-03\"].obj\n", - "\n", - "insample_close.info()\n", - "outsample_close.info()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from statsmodels.tsa.ar_model import AutoReg\n", - "\n", - "\n", - "# Fit the AutoReg model\n", - "model = AutoReg(insample_close, lags=5, trend=\"ct\").fit()\n", - "\n", - "# Generate vectorized predictions\n", - "predictions = model.predict(start=1, end=len(insample_close))\n", - "predictions.index = insample_close.index\n", - "\n", - "# # Generate predictions for the differenced data\n", - "# differenced_predictions = result.predict(start=1, end=len(insample_returns))\n", - "\n", - "# differenced_predictions.index = insample_returns.index\n", - "# # Back-transform the differenced predictions to the original scale\n", - "# predictions = insample_close.shift(1) + differenced_predictions\n", - "\n", - "pane1 = Panel(\n", - " ohlcv=(), #(series, entries, exits, other_markers)\n", - " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n", - " #[(series, name, entries, exits, other_markers)]\n", - " right=[(insample_close, \"insample close\"),\n", - " (predictions, \"prediction next close lag1\")\n", - " ],\n", - " # left = [ \n", - " # (differenced_predictions, \"returns_predicted\"),\n", - " # (insample_returns, \"insample returns\"),],\n", - ")\n", - "ch = chart([pane1], size=\"s\",precision=6, title=\"AutoReg prediction\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "# Fit the ARIMA model on the differenced data\n", - "model = ARIMA(insample_returns, order=(6, 1, 6)) # Note the differenced data, so d=0 here\n", - "result = model.fit()\n", - "\n", - "# Generate predictions for the differenced data\n", - "differenced_predictions = result.predict(start=1, end=len(insample_returns))\n", - "\n", - "differenced_predictions.index = insample_returns.index\n", - "# Back-transform the differenced predictions to the original scale\n", - "predictions = insample_close.shift(1) + differenced_predictions\n", - "\n", - "pane1 = Panel(\n", - " ohlcv=(), #(series, entries, exits, other_markers)\n", - " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n", - " #[(series, name, entries, exits, other_markers)]\n", - " right=[(insample_close, \"insample close\"),\n", - " (predictions, \"prediction close\")\n", - " ],\n", - " left = [ \n", - " (differenced_predictions, \"returns_predicted\"),\n", - " (insample_returns, \"insample returns\"),],\n", - ")\n", - "ch = chart([pane1], size=\"s\",precision=6, title=\"ARIMA prediction\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Initialize an array to store the predictions\n", - "predictions = [np.nan] # No prediction for the first point\n", - "\n", - "# Rolling one-step-ahead forecasts\n", - "for t in range(10, len(close)): # Start from 2 to ensure enough data points\n", - " model = ARIMA(close[:t], order=(1, 1, 1)) # Fit ARIMA model up to time t-1\n", - " result = model.fit()\n", - " forecast = result.forecast(steps=1)\n", - " predictions.append(forecast.iloc[0]) # Store the forecast\n", - "\n", - "# Pad the predictions to align with the original series\n", - "predictions = [np.nan] * (len(close) - len(predictions)) + predictions\n", - "\n", - "# Convert predictions to a Pandas Series\n", - "predictions = pd.Series(predictions, index=close.index)\n", - "\n", - "pane1 = Panel(\n", - " ohlcv=(), #(series, entries, exits, other_markers)\n", - " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n", - " #[(series, name, entries, exits, other_markers)]\n", - " right=[(close, \"real close\"),\n", - " (predictions, \"real close\")\n", - " ],\n", - " left = [ \n", - " (log_returns, \"log_returns\"),\n", - " (returns, \"returns\"),],\n", - ")\n", - "ch = chart([pane1], size=\"s\", session=None, precision=6, title=\"One step ahead ARIMA prediction\")\n", - "\n", - "\n" - ] -======= "source": [] ->>>>>>> parent of 7cc9f86... vbt pipeline edits }, { "cell_type": "code",