eng translation

This commit is contained in:
David Brazda
2024-08-30 21:06:06 +02:00
parent c11ed9d474
commit 51ba16dbe3

View File

@ -13,14 +13,14 @@
"\n", "\n",
"## Note\n", "## Note\n",
"\n", "\n",
"Order disbalance nepodminuje zmenu ceny (tzn. muze byt order disbalance na buy stranu, ale cena nemusi jit nahoru a naopak)\n", "The order imbalance does not necessarily cause a price change (i.e., there can be an order imbalance on the buy side, but the price does not have to go up, and vice versa). However, if there is a prolonged imbalance without a price change, it could indicate something.\n",
"Nicmene pokud je disbalance delsi a nedochazi ke zmene ceny - může to něco indikovat. \n", "\n",
"Vytvořit si kumulativní disbalance - kumulátory, které se budou načítat, když se budou silné disbalance, bez změny ceny. Tento akumulátor se bude nabíjet disbalancí a vybíjet příslušnou změnou ceny." "Create a cumulative imbalance — accumulators that will build up when there are strong imbalances without a price change. This accumulator will charge up with the imbalance and discharge with the corresponding price change."
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -54,9 +54,37 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"trades_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet\n",
"trades_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet\n",
"trades_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n",
"trades_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
"ohlcv_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet\n",
"trades_df-BAC-2024-05-15T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
"ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
"ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n",
"ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet\n",
"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet\n"
]
},
{
"data": {
"text/plain": [
"351"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# Define the market open and close times\n", "# Define the market open and close times\n",
"market_open = datetime.time(9, 30)\n", "market_open = datetime.time(9, 30)\n",
@ -139,11 +167,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
<<<<<<< HEAD
"execution_count": null, "execution_count": null,
=======
"execution_count": 5,
>>>>>>> parent of 7cc9f86... vbt pipeline edits
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -161,276 +185,7 @@
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
<<<<<<< HEAD
"source": [
"m30data.close.lw.plot()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Stationarity test (ADF)\n",
"if the p-value > 0.05, wee need to find the order of differencing. Use returns (current price - previous price)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"from statsmodels.tsa.stattools import adfuller\n",
"\n",
"# Start the timer\n",
"start_time = time.time()\n",
"\n",
"# Perform the Augmented Dickey-Fuller test to check for stationarity\n",
"result = adfuller(m30data.xloc[:500].get(\"Close\"))\n",
"\n",
"# Stop the timer\n",
"end_time = time.time()\n",
"\n",
"# Print the results of the ADF test\n",
"print(result)\n",
"print('ADF Statistic: %f' % result[0])\n",
"print('p-value: %f' % result[1])\n",
"\n",
"# Print the time taken\n",
"print('Time taken: %f seconds' % (end_time - start_time))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Plot of close price vs log returns of close price vs just returns (delta)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"close = m30data.get(\"Close\")\n",
"log_returns = np.log(close) - np.log(close.shift(1))\n",
"returns = close - close.shift(1) #or close.diff()\n",
"/\n",
"\n",
"pane1 = Panel(\n",
" ohlcv=(), #(series, entries, exits, other_markers)\n",
" histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n",
" #[(series, name, entries, exits, other_markers)]\n",
" right=[(close, \"close\"),\n",
" ],\n",
" left = [ \n",
" (log_returns, \"log_returns\"),\n",
" (returns, \"returns\"),\n",
" # (ret_log_diff, \"ret_log_diff\"),\n",
" ],\n",
")\n",
"ch = chart([pane1], size=\"s\", precision=6)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from statsmodels.graphics.tsaplots import plot_acf, plot_pacf\n",
"plot_acf(returns)\n",
"#no autocorrelation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from statsmodels.tsa.arima.model import ARIMA\n",
"# Fit an ARIMA model\n",
"model = ARIMA(returns, order=(0, 1, 0)) # ARIMA(0, 1, 0) is a simple random walk model\n",
"result = model.fit()\n",
"\n",
"# Print summary of the model\n",
"#print(result.summary())\n",
"\n",
"\n",
"result.resid.lw.plot(session=None)\n",
"\n",
"plot_acf(result.resid, title='ACF of Residuals')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from statsmodels.graphics.tsaplots import month_plot\n",
"# import matplotlib.pyplot as plt\n",
"# plot_pacf(close, title='PACF of Close', lags=30)\n",
"# plot_acf(close, title='ACF of Close', lags=30)\n",
"month_plot(close)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"close.lw.plot()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"returns"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"close.vbt.xloc[\"2023-01-03\":\"2024-01-03\"].obj\n",
"# [\"01-03-2023\":\"01-O3-2024\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"insample_close = close.vbt.xloc[\"2023-01-03\":\"2024-01-03\"].get()\n",
"insample_returns = returns.vbt.xloc[\"2023-01-03\":\"2024-01-03\"].get()\n",
"outsample_close = close.vbt.xloc[\"2024-01-03\":\"2025-01-03\"].obj\n",
"outsample_returns = returns.vbt.xloc[\"2024-01-03\":\"2025-01-03\"].obj\n",
"\n",
"insample_close.info()\n",
"outsample_close.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from statsmodels.tsa.ar_model import AutoReg\n",
"\n",
"\n",
"# Fit the AutoReg model\n",
"model = AutoReg(insample_close, lags=5, trend=\"ct\").fit()\n",
"\n",
"# Generate vectorized predictions\n",
"predictions = model.predict(start=1, end=len(insample_close))\n",
"predictions.index = insample_close.index\n",
"\n",
"# # Generate predictions for the differenced data\n",
"# differenced_predictions = result.predict(start=1, end=len(insample_returns))\n",
"\n",
"# differenced_predictions.index = insample_returns.index\n",
"# # Back-transform the differenced predictions to the original scale\n",
"# predictions = insample_close.shift(1) + differenced_predictions\n",
"\n",
"pane1 = Panel(\n",
" ohlcv=(), #(series, entries, exits, other_markers)\n",
" histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n",
" #[(series, name, entries, exits, other_markers)]\n",
" right=[(insample_close, \"insample close\"),\n",
" (predictions, \"prediction next close lag1\")\n",
" ],\n",
" # left = [ \n",
" # (differenced_predictions, \"returns_predicted\"),\n",
" # (insample_returns, \"insample returns\"),],\n",
")\n",
"ch = chart([pane1], size=\"s\",precision=6, title=\"AutoReg prediction\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"# Fit the ARIMA model on the differenced data\n",
"model = ARIMA(insample_returns, order=(6, 1, 6)) # Note the differenced data, so d=0 here\n",
"result = model.fit()\n",
"\n",
"# Generate predictions for the differenced data\n",
"differenced_predictions = result.predict(start=1, end=len(insample_returns))\n",
"\n",
"differenced_predictions.index = insample_returns.index\n",
"# Back-transform the differenced predictions to the original scale\n",
"predictions = insample_close.shift(1) + differenced_predictions\n",
"\n",
"pane1 = Panel(\n",
" ohlcv=(), #(series, entries, exits, other_markers)\n",
" histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n",
" #[(series, name, entries, exits, other_markers)]\n",
" right=[(insample_close, \"insample close\"),\n",
" (predictions, \"prediction close\")\n",
" ],\n",
" left = [ \n",
" (differenced_predictions, \"returns_predicted\"),\n",
" (insample_returns, \"insample returns\"),],\n",
")\n",
"ch = chart([pane1], size=\"s\",precision=6, title=\"ARIMA prediction\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Initialize an array to store the predictions\n",
"predictions = [np.nan] # No prediction for the first point\n",
"\n",
"# Rolling one-step-ahead forecasts\n",
"for t in range(10, len(close)): # Start from 2 to ensure enough data points\n",
" model = ARIMA(close[:t], order=(1, 1, 1)) # Fit ARIMA model up to time t-1\n",
" result = model.fit()\n",
" forecast = result.forecast(steps=1)\n",
" predictions.append(forecast.iloc[0]) # Store the forecast\n",
"\n",
"# Pad the predictions to align with the original series\n",
"predictions = [np.nan] * (len(close) - len(predictions)) + predictions\n",
"\n",
"# Convert predictions to a Pandas Series\n",
"predictions = pd.Series(predictions, index=close.index)\n",
"\n",
"pane1 = Panel(\n",
" ohlcv=(), #(series, entries, exits, other_markers)\n",
" histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n",
" #[(series, name, entries, exits, other_markers)]\n",
" right=[(close, \"real close\"),\n",
" (predictions, \"real close\")\n",
" ],\n",
" left = [ \n",
" (log_returns, \"log_returns\"),\n",
" (returns, \"returns\"),],\n",
")\n",
"ch = chart([pane1], size=\"s\", session=None, precision=6, title=\"One step ahead ARIMA prediction\")\n",
"\n",
"\n"
]
=======
"source": [] "source": []
>>>>>>> parent of 7cc9f86... vbt pipeline edits
}, },
{ {
"cell_type": "code", "cell_type": "code",