eng translation

2024-08-30 21:06:06 +02:00
parent c11ed9d474
commit 51ba16dbe3
1 changed files with 34 additions and 279 deletions
--- a/research/strat_ORDER_IMBALANCE/v2_SINGLE.ipynb
+++ b/research/strat_ORDER_IMBALANCE/v2_SINGLE.ipynb
@@ -13,14 +13,14 @@
    "\n",
    "## Note\n",
    "\n",
-    "Order disbalance nepodminuje zmenu ceny (tzn. muze byt order disbalance na buy stranu, ale cena nemusi jit nahoru a naopak)\n",
+    "The order imbalance does not necessarily cause a price change (i.e., there can be an order imbalance on the buy side, but the price does not have to go up, and vice versa). However, if there is a prolonged imbalance without a price change, it could indicate something.\n",
-    "Nicmene pokud je disbalance delsi a nedochazi ke zmene ceny - může to něco indikovat. \n",
+    "\n",
-    "Vytvořit si kumulativní disbalance - kumulátory, které se budou načítat, když se budou silné disbalance, bez změny ceny. Tento akumulátor se bude nabíjet disbalancí a vybíjet příslušnou změnou ceny."
+    "Create a cumulative imbalance — accumulators that will build up when there are strong imbalances without a price change. This accumulator will charge up with the imbalance and discharge with the corresponding price change."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -54,9 +54,37 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "trades_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet\n",
      "trades_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet\n",
      "trades_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n",
      "trades_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
      "ohlcv_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet\n",
      "trades_df-BAC-2024-05-15T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
      "ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
      "ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n",
      "ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet\n",
      "ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
      "ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "351"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Define the market open and close times\n",
    "market_open = datetime.time(9, 30)\n",
@@ -139,11 +167,7 @@
  },
  {
   "cell_type": "code",
 <<<<<<< HEAD
   "execution_count": null,
 =======
   "execution_count": 5,
 >>>>>>> parent of 7cc9f86... vbt pipeline edits
   "metadata": {},
   "outputs": [],
   "source": [
@@ -161,276 +185,7 @@
   "execution_count": null,
   "metadata": {},
   "outputs": [],
 <<<<<<< HEAD
   "source": [
    "m30data.close.lw.plot()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Stationarity test (ADF)\n",
    "if the p-value > 0.05, wee need to find the order of differencing. Use returns (current price - previous price)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "from statsmodels.tsa.stattools import adfuller\n",
    "\n",
    "# Start the timer\n",
    "start_time = time.time()\n",
    "\n",
    "# Perform the Augmented Dickey-Fuller test to check for stationarity\n",
    "result = adfuller(m30data.xloc[:500].get(\"Close\"))\n",
    "\n",
    "# Stop the timer\n",
    "end_time = time.time()\n",
    "\n",
    "# Print the results of the ADF test\n",
    "print(result)\n",
    "print('ADF Statistic: %f' % result[0])\n",
    "print('p-value: %f' % result[1])\n",
    "\n",
    "# Print the time taken\n",
    "print('Time taken: %f seconds' % (end_time - start_time))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Plot of close price vs log returns of close price vs just returns (delta)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "close = m30data.get(\"Close\")\n",
    "log_returns = np.log(close) - np.log(close.shift(1))\n",
    "returns = close - close.shift(1) #or close.diff()\n",
    "/\n",
    "\n",
    "pane1 = Panel(\n",
    "    ohlcv=(),       #(series, entries, exits, other_markers)\n",
    "    histogram=[],   # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n",
    "                    #[(series, name, entries, exits, other_markers)]\n",
    "    right=[(close, \"close\"),\n",
    "            ],\n",
    "    left = [        \n",
    "           (log_returns, \"log_returns\"),\n",
    "           (returns, \"returns\"),\n",
    "        #    (ret_log_diff, \"ret_log_diff\"),\n",
    "           ],\n",
    ")\n",
    "ch = chart([pane1], size=\"s\", precision=6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from statsmodels.graphics.tsaplots import plot_acf, plot_pacf\n",
    "plot_acf(returns)\n",
    "#no autocorrelation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from statsmodels.tsa.arima.model import ARIMA\n",
    "# Fit an ARIMA model\n",
    "model = ARIMA(returns, order=(0, 1, 0))  # ARIMA(0, 1, 0) is a simple random walk model\n",
    "result = model.fit()\n",
    "\n",
    "# Print summary of the model\n",
    "#print(result.summary())\n",
    "\n",
    "\n",
    "result.resid.lw.plot(session=None)\n",
    "\n",
    "plot_acf(result.resid, title='ACF of Residuals')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from statsmodels.graphics.tsaplots import month_plot\n",
    "# import matplotlib.pyplot as plt\n",
    "# plot_pacf(close, title='PACF of Close', lags=30)\n",
    "# plot_acf(close, title='ACF of Close', lags=30)\n",
    "month_plot(close)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "close.lw.plot()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "returns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "close.vbt.xloc[\"2023-01-03\":\"2024-01-03\"].obj\n",
    "# [\"01-03-2023\":\"01-O3-2024\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "insample_close = close.vbt.xloc[\"2023-01-03\":\"2024-01-03\"].get()\n",
    "insample_returns = returns.vbt.xloc[\"2023-01-03\":\"2024-01-03\"].get()\n",
    "outsample_close = close.vbt.xloc[\"2024-01-03\":\"2025-01-03\"].obj\n",
    "outsample_returns = returns.vbt.xloc[\"2024-01-03\":\"2025-01-03\"].obj\n",
    "\n",
    "insample_close.info()\n",
    "outsample_close.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from statsmodels.tsa.ar_model import AutoReg\n",
    "\n",
    "\n",
    "# Fit the AutoReg model\n",
    "model = AutoReg(insample_close, lags=5, trend=\"ct\").fit()\n",
    "\n",
    "# Generate vectorized predictions\n",
    "predictions = model.predict(start=1, end=len(insample_close))\n",
    "predictions.index = insample_close.index\n",
    "\n",
    "# # Generate predictions for the differenced data\n",
    "# differenced_predictions = result.predict(start=1, end=len(insample_returns))\n",
    "\n",
    "# differenced_predictions.index = insample_returns.index\n",
    "# # Back-transform the differenced predictions to the original scale\n",
    "# predictions = insample_close.shift(1) + differenced_predictions\n",
    "\n",
    "pane1 = Panel(\n",
    "    ohlcv=(),       #(series, entries, exits, other_markers)\n",
    "    histogram=[],   # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n",
    "                    #[(series, name, entries, exits, other_markers)]\n",
    "    right=[(insample_close, \"insample close\"),\n",
    "           (predictions, \"prediction next close lag1\")\n",
    "            ],\n",
    "    # left = [        \n",
    "    #        (differenced_predictions, \"returns_predicted\"),\n",
    "    #        (insample_returns, \"insample returns\"),],\n",
    ")\n",
    "ch = chart([pane1], size=\"s\",precision=6, title=\"AutoReg prediction\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Fit the ARIMA model on the differenced data\n",
    "model = ARIMA(insample_returns, order=(6, 1, 6))  # Note the differenced data, so d=0 here\n",
    "result = model.fit()\n",
    "\n",
    "# Generate predictions for the differenced data\n",
    "differenced_predictions = result.predict(start=1, end=len(insample_returns))\n",
    "\n",
    "differenced_predictions.index = insample_returns.index\n",
    "# Back-transform the differenced predictions to the original scale\n",
    "predictions = insample_close.shift(1) + differenced_predictions\n",
    "\n",
    "pane1 = Panel(\n",
    "    ohlcv=(),       #(series, entries, exits, other_markers)\n",
    "    histogram=[],   # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n",
    "                    #[(series, name, entries, exits, other_markers)]\n",
    "    right=[(insample_close, \"insample close\"),\n",
    "           (predictions, \"prediction close\")\n",
    "            ],\n",
    "    left = [        \n",
    "           (differenced_predictions, \"returns_predicted\"),\n",
    "           (insample_returns, \"insample returns\"),],\n",
    ")\n",
    "ch = chart([pane1], size=\"s\",precision=6, title=\"ARIMA prediction\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize an array to store the predictions\n",
    "predictions = [np.nan]  # No prediction for the first point\n",
    "\n",
    "# Rolling one-step-ahead forecasts\n",
    "for t in range(10, len(close)):  # Start from 2 to ensure enough data points\n",
    "    model = ARIMA(close[:t], order=(1, 1, 1))  # Fit ARIMA model up to time t-1\n",
    "    result = model.fit()\n",
    "    forecast = result.forecast(steps=1)\n",
    "    predictions.append(forecast.iloc[0])  # Store the forecast\n",
    "\n",
    "# Pad the predictions to align with the original series\n",
    "predictions = [np.nan] * (len(close) - len(predictions)) + predictions\n",
    "\n",
    "# Convert predictions to a Pandas Series\n",
    "predictions = pd.Series(predictions, index=close.index)\n",
    "\n",
    "pane1 = Panel(\n",
    "    ohlcv=(),       #(series, entries, exits, other_markers)\n",
    "    histogram=[],   # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n",
    "                    #[(series, name, entries, exits, other_markers)]\n",
    "    right=[(close, \"real close\"),\n",
    "           (predictions, \"real close\")\n",
    "            ],\n",
    "    left = [        \n",
    "           (log_returns, \"log_returns\"),\n",
    "           (returns, \"returns\"),],\n",
    ")\n",
    "ch = chart([pane1], size=\"s\", session=None, precision=6, title=\"One step ahead ARIMA prediction\")\n",
    "\n",
    "\n"
   ]
 =======
   "source": []
 >>>>>>> parent of 7cc9f86... vbt pipeline edits
  },
  {
   "cell_type": "code",