Compare commits
3 Commits
7188b2d003
...
48db2bc9de
| Author | SHA1 | Date | |
|---|---|---|---|
| 48db2bc9de | |||
| 978cd7e2be | |||
| b974ab7d39 |
@ -84,7 +84,7 @@ jupyterlab_server==2.27.1
|
|||||||
jupyterlab_widgets==3.0.13
|
jupyterlab_widgets==3.0.13
|
||||||
kiwisolver==1.4.5
|
kiwisolver==1.4.5
|
||||||
korean-lunar-calendar==0.3.1
|
korean-lunar-calendar==0.3.1
|
||||||
lightweight_charts @ git+https://github.com/drew2323/lightweight-charts-python.git@35f029714b23c18abe791b90a85447e959c72258
|
lightweight_charts @ git+https://github.com/drew2323/lightweight-charts-python.git@7986aa9195d9d3204a998d1a8f5778d95219a08e
|
||||||
llvmlite==0.39.1
|
llvmlite==0.39.1
|
||||||
lxml==5.3.0
|
lxml==5.3.0
|
||||||
markdown-it-py==3.0.0
|
markdown-it-py==3.0.0
|
||||||
@ -121,9 +121,11 @@ platformdirs==4.2.2
|
|||||||
plotly==5.24.0
|
plotly==5.24.0
|
||||||
prometheus_client==0.21.0
|
prometheus_client==0.21.0
|
||||||
prompt_toolkit==3.0.47
|
prompt_toolkit==3.0.47
|
||||||
|
protobuf==5.28.2
|
||||||
proxy-tools==0.1.0
|
proxy-tools==0.1.0
|
||||||
psutil==6.0.0
|
psutil==6.0.0
|
||||||
psycopg2==2.9.9
|
psycopg2==2.9.9
|
||||||
|
psycopg2-binary==2.9.9
|
||||||
ptyprocess==0.7.0
|
ptyprocess==0.7.0
|
||||||
pure_eval==0.2.3
|
pure_eval==0.2.3
|
||||||
pyarrow==11.0.0
|
pyarrow==11.0.0
|
||||||
@ -147,6 +149,7 @@ PyWavelets==1.7.0
|
|||||||
pywebview==5.2
|
pywebview==5.2
|
||||||
PyYAML==6.0.2
|
PyYAML==6.0.2
|
||||||
pyzmq==26.2.0
|
pyzmq==26.2.0
|
||||||
|
ray==2.37.0
|
||||||
referencing==0.35.1
|
referencing==0.35.1
|
||||||
regex==2024.7.24
|
regex==2024.7.24
|
||||||
requests==2.32.3
|
requests==2.32.3
|
||||||
@ -166,6 +169,7 @@ soupsieve==2.6
|
|||||||
SQLAlchemy==2.0.32
|
SQLAlchemy==2.0.32
|
||||||
sseclient-py==1.8.0
|
sseclient-py==1.8.0
|
||||||
stack-data==0.6.3
|
stack-data==0.6.3
|
||||||
|
stratlab_db @ git+https://gitea.stratlab.dev/Stratlab/db.git@0bbe486de7ac410a9375f2ccf7d557a658a662ea
|
||||||
stumpy==1.13.0
|
stumpy==1.13.0
|
||||||
TA-Lib==0.4.32
|
TA-Lib==0.4.32
|
||||||
tenacity==9.0.0
|
tenacity==9.0.0
|
||||||
@ -193,7 +197,7 @@ webencodings==0.5.1
|
|||||||
websocket-client==1.8.0
|
websocket-client==1.8.0
|
||||||
websockets==11.0.3
|
websockets==11.0.3
|
||||||
Werkzeug==3.0.4
|
Werkzeug==3.0.4
|
||||||
widgetsnbextension==4.0.9
|
widgetsnbextension==4.0.13
|
||||||
yarl==1.13.1
|
yarl==1.13.1
|
||||||
yfinance==0.2.43
|
yfinance==0.2.43
|
||||||
zipp==3.20.1
|
zipp==3.20.1
|
||||||
|
|||||||
@ -13,9 +13,38 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"trades_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet\n",
|
||||||
|
"trades_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet\n",
|
||||||
|
"trades_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n",
|
||||||
|
"trades_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
|
||||||
|
"ohlcv_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet\n",
|
||||||
|
"trades_df-BAC-2023-01-01T09:30:00-2024-10-02T16:00:00-['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']-100.parquet\n",
|
||||||
|
"trades_df-BAC-2024-05-15T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
|
||||||
|
"ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
|
||||||
|
"ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n",
|
||||||
|
"ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet\n",
|
||||||
|
"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
|
||||||
|
"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
@ -45,10 +74,10 @@
|
|||||||
"exclude_conditions = cfh.config_handler.get_val('AGG_EXCLUDED_TRADES') #standard ['C','O','4','B','7','V','P','W','U','Z','F']\n",
|
"exclude_conditions = cfh.config_handler.get_val('AGG_EXCLUDED_TRADES') #standard ['C','O','4','B','7','V','P','W','U','Z','F']\n",
|
||||||
"minsize = 100\n",
|
"minsize = 100\n",
|
||||||
"\n",
|
"\n",
|
||||||
"symbol = \"SPY\"\n",
|
"symbol = \"BAC\"\n",
|
||||||
"#datetime in zoneNY \n",
|
"#datetime in zoneNY \n",
|
||||||
"day_start = datetime(2024, 1, 1, 9, 30, 0)\n",
|
"day_start = datetime(2023, 1, 1, 9, 30, 0)\n",
|
||||||
"day_stop = datetime(2024, 1, 14, 16, 00, 0)\n",
|
"day_stop = datetime(2024, 10, 2, 16, 00, 0)\n",
|
||||||
"day_start = zoneNY.localize(day_start)\n",
|
"day_start = zoneNY.localize(day_start)\n",
|
||||||
"day_stop = zoneNY.localize(day_stop)\n",
|
"day_stop = zoneNY.localize(day_stop)\n",
|
||||||
"#filename of trades_df parquet, date are in isoformat but without time zone part\n",
|
"#filename of trades_df parquet, date are in isoformat but without time zone part\n",
|
||||||
@ -56,13 +85,15 @@
|
|||||||
"#parquet interval cache contains exclude conditions and minsize filtering\n",
|
"#parquet interval cache contains exclude conditions and minsize filtering\n",
|
||||||
"file_trades = dir + f\"trades_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}-{exclude_conditions}-{minsize}.parquet\"\n",
|
"file_trades = dir + f\"trades_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}-{exclude_conditions}-{minsize}.parquet\"\n",
|
||||||
"#file_trades = dir + f\"trades_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}.parquet\"\n",
|
"#file_trades = dir + f\"trades_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}.parquet\"\n",
|
||||||
"file_ohlcv = dir + f\"ohlcv_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}-{exclude_conditions}-{minsize}.parquet\"\n",
|
"file_ohlcv = dir + f\"ohlcv_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}-{str(exclude_conditions)}-{minsize}.parquet\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"#PRINT all parquet in directory\n",
|
"#PRINT all parquet in directory\n",
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n",
|
"files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n",
|
||||||
"for f in files:\n",
|
"for f in files:\n",
|
||||||
" print(f)"
|
" print(f)\n",
|
||||||
|
"\n",
|
||||||
|
"exclude_conditions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -71,13 +102,26 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"trades_df = fetch_daily_stock_trades(symbol, day_start, day_stop, exclude_conditions=exclude_conditions, minsize=minsize, force_remote=False, max_retries=5, backoff_factor=1)\n",
|
"#fetch trades in one go\n",
|
||||||
"trades_df"
|
"#trades_df = fetch_daily_stock_trades(symbol, day_start, day_stop, exclude_conditions=exclude_conditions, minsize=minsize, force_remote=False, max_retries=5, backoff_factor=1)\n",
|
||||||
|
"#fetch trades in parallel - for longer intervals\n",
|
||||||
|
"#trades_df = fetch_trades_parallel(symbol, day_start, day_stop, exclude_conditions=exclude_conditions, minsize=minsize, force_remote=False, max_workers=None)\n",
|
||||||
|
" \n",
|
||||||
|
"##trades_df.info()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#trades_df.to_parquet(file_trades, engine='pyarrow', compression='gzip')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
|||||||
1662
research/indicators/eval_robustness.ipynb
Normal file
1662
research/indicators/eval_robustness.ipynb
Normal file
File diff suppressed because one or more lines are too long
@ -11,7 +11,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -68,7 +68,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -80,7 +80,9 @@
|
|||||||
"trades_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n",
|
"trades_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n",
|
||||||
"trades_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
|
"trades_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
|
||||||
"ohlcv_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet\n",
|
"ohlcv_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet\n",
|
||||||
|
"trades_df-BAC-2023-01-01T09:30:00-2024-10-02T16:00:00-['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']-100.parquet\n",
|
||||||
"trades_df-BAC-2024-05-15T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
|
"trades_df-BAC-2024-05-15T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
|
||||||
|
"ohlcv_df-BAC-2023-01-01T09:30:00-2024-10-02T16:00:00-['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']-100.parquet\n",
|
||||||
"ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
|
"ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
|
||||||
"ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n",
|
"ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n",
|
||||||
"ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet\n",
|
"ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet\n",
|
||||||
@ -94,7 +96,7 @@
|
|||||||
"5"
|
"5"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 2,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -129,36 +131,22 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 1,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"ename": "NameError",
|
||||||
"output_type": "stream",
|
"evalue": "name 'basic_data' is not defined",
|
||||||
"text": [
|
"output_type": "error",
|
||||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
"traceback": [
|
||||||
"DatetimeIndex: 57966 entries, 2024-02-12 09:30:00-05:00 to 2024-02-16 15:59:59-05:00\n",
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
"Data columns (total 10 columns):\n",
|
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
||||||
" # Column Non-Null Count Dtype \n",
|
"Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mbasic_data\u001b[49m\u001b[38;5;241m.\u001b[39mdata[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBAC\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39minfo()\n\u001b[1;32m 3\u001b[0m df \u001b[38;5;241m=\u001b[39m basic_data\u001b[38;5;241m.\u001b[39mdata[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBAC\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 5\u001b[0m nan_rows \u001b[38;5;241m=\u001b[39m df[df\u001b[38;5;241m.\u001b[39misna()\u001b[38;5;241m.\u001b[39many(axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)]\n",
|
||||||
"--- ------ -------------- ----- \n",
|
"\u001b[0;31mNameError\u001b[0m: name 'basic_data' is not defined"
|
||||||
" 0 open 57966 non-null float64 \n",
|
|
||||||
" 1 high 57966 non-null float64 \n",
|
|
||||||
" 2 low 57966 non-null float64 \n",
|
|
||||||
" 3 close 57966 non-null float64 \n",
|
|
||||||
" 4 volume 57966 non-null float64 \n",
|
|
||||||
" 5 trades 57966 non-null float64 \n",
|
|
||||||
" 6 updated 57966 non-null datetime64[ns, US/Eastern]\n",
|
|
||||||
" 7 vwap 57966 non-null float64 \n",
|
|
||||||
" 8 buyvolume 57966 non-null float64 \n",
|
|
||||||
" 9 sellvolume 57966 non-null float64 \n",
|
|
||||||
"dtypes: datetime64[ns, US/Eastern](1), float64(9)\n",
|
|
||||||
"memory usage: 4.9 MB\n"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": []
|
||||||
"basic_data.data[\"BAC\"].info()"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
|||||||
1840
research/tests/database_saver.ipynb
Normal file
1840
research/tests/database_saver.ipynb
Normal file
File diff suppressed because one or more lines are too long
126
research/tests/ray_test.ipynb
Normal file
126
research/tests/ray_test.ipynb
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"2024-10-03 09:43:41,741\tINFO worker.py:1786 -- Started a local Ray instance.\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"waiting for ray\n",
|
||||||
|
"ray returned all\n",
|
||||||
|
"ray finsihed 0.4031927839969285\n",
|
||||||
|
"worker 0 started\n",
|
||||||
|
"worker 0 finsihed\n",
|
||||||
|
"worker 1 started\n",
|
||||||
|
"worker 1 finsihed\n",
|
||||||
|
"worker 2 started\n",
|
||||||
|
"worker 2 finsihed\n",
|
||||||
|
"worker 3 started\n",
|
||||||
|
"worker 3 finsihed\n",
|
||||||
|
"serial function finsihed 0.21200023603159934\n",
|
||||||
|
"Ray with 4 parts: 0.4031927839969285 seconds\n",
|
||||||
|
"Serial: 0.21200023603159934 seconds\n",
|
||||||
|
"Serial computation is faster than Ray with 4 parts\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import timeit\n",
|
||||||
|
"import ray\n",
|
||||||
|
"\n",
|
||||||
|
"# Define the expensive function\n",
|
||||||
|
"@ray.remote\n",
|
||||||
|
"def expensive_function(n):\n",
|
||||||
|
" # Generate a large random matrix\n",
|
||||||
|
" A = np.random.rand(1000, 1000)\n",
|
||||||
|
" B = np.random.rand(1000, 1000)\n",
|
||||||
|
"\n",
|
||||||
|
" # Perform the matrix multiplication\n",
|
||||||
|
" C = np.dot(A, B)\n",
|
||||||
|
" # Return the result\n",
|
||||||
|
" return C\n",
|
||||||
|
"\n",
|
||||||
|
"def expensive_function_serial(n):\n",
|
||||||
|
" print(f\"worker {n} started\")\n",
|
||||||
|
" # Generate a large random matrix\n",
|
||||||
|
" A = np.random.rand(1000, 1000)\n",
|
||||||
|
" B = np.random.rand(1000, 1000)\n",
|
||||||
|
"\n",
|
||||||
|
" # Perform the matrix multiplication\n",
|
||||||
|
" C = np.dot(A, B)\n",
|
||||||
|
"\n",
|
||||||
|
" # Return the result\n",
|
||||||
|
" print(f\"worker {n} finsihed\")\n",
|
||||||
|
" return C\n",
|
||||||
|
"\n",
|
||||||
|
"# Initialize Ray\n",
|
||||||
|
"ray.init()\n",
|
||||||
|
"\n",
|
||||||
|
"# Create 4 remote actors to distribute the work\n",
|
||||||
|
"futures = [expensive_function.remote(_) for _ in range(4)]\n",
|
||||||
|
"\n",
|
||||||
|
"# Time the function using Ray with 4 parts\n",
|
||||||
|
"start_time = timeit.default_timer()\n",
|
||||||
|
"print(\"waiting for ray\")\n",
|
||||||
|
"results = ray.get(futures)\n",
|
||||||
|
"print(\"ray returned all\")\n",
|
||||||
|
"end_time = timeit.default_timer()\n",
|
||||||
|
"ray_time = end_time - start_time\n",
|
||||||
|
"print(\"ray finsihed\", ray_time)\n",
|
||||||
|
"\n",
|
||||||
|
"# Time the function serially\n",
|
||||||
|
"start_time = timeit.default_timer()\n",
|
||||||
|
"results = [expensive_function_serial(_) for _ in range(4)]\n",
|
||||||
|
"end_time = timeit.default_timer()\n",
|
||||||
|
"serial_time = end_time - start_time\n",
|
||||||
|
"print(\"serial function finsihed\", serial_time)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# Print the results\n",
|
||||||
|
"print(f\"Ray with 4 parts: {ray_time} seconds\")\n",
|
||||||
|
"print(f\"Serial: {serial_time} seconds\")\n",
|
||||||
|
"\n",
|
||||||
|
"# Compare the results\n",
|
||||||
|
"if ray_time < serial_time:\n",
|
||||||
|
" print(\"Ray with 4 parts is faster than serial computation\")\n",
|
||||||
|
"else:\n",
|
||||||
|
" print(\"Serial computation is faster than Ray with 4 parts\")\n",
|
||||||
|
"\n",
|
||||||
|
"# Shutdown Ray\n",
|
||||||
|
"ray.shutdown()\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": ".venv",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.11"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
1012
to_explore/1_principal_components.ipynb
Normal file
1012
to_explore/1_principal_components.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
874
to_explore/markov_variance_switching.ipynb
Normal file
874
to_explore/markov_variance_switching.ipynb
Normal file
@ -0,0 +1,874 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Markov Variance Switching\n",
|
||||||
|
"\n",
|
||||||
|
"Kim, C., Nelson, C., and Startz, R. (1998). Testing for mean reversion in heteroskedastic data based on Gibbs-sampling-augmented randomization. Journal of Empirical Finance, (5)2, pp.131-154.\n",
|
||||||
|
"\n",
|
||||||
|
"**Author:** shittles\n",
|
||||||
|
"\n",
|
||||||
|
"**Created:** 2024-09-18\n",
|
||||||
|
"\n",
|
||||||
|
"**Modified:** 2024-09-19\n",
|
||||||
|
"\n",
|
||||||
|
"## Changelog\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import ray\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import plotly.express as px\n",
|
||||||
|
"import plotly.graph_objects as go\n",
|
||||||
|
"import statsmodels.api as sm\n",
|
||||||
|
"\n",
|
||||||
|
"from sklearn.compose import make_column_transformer\n",
|
||||||
|
"from sklearn.preprocessing import RobustScaler\n",
|
||||||
|
"\n",
|
||||||
|
"from vectorbtpro import *"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ray.init()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"vbt.settings.set_theme(\"dark\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Ingestion\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"symbol = \"^GSPC\" # the S&P 500 ticker"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data = vbt.YFData.pull(\n",
|
||||||
|
" symbol, start=\"50 years ago\", end=\"today\", timeframe=\"daily\", tz=\"UTC\"\n",
|
||||||
|
") # 50 years of data\n",
|
||||||
|
"\n",
|
||||||
|
"data.stats()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data.data[symbol]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data.data[symbol].index"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# The opens are corrupt...\n",
|
||||||
|
"data.plot(yaxis=dict(type=\"log\")).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# but the closes are fine.\n",
|
||||||
|
"data.data[\"^GSPC\"][\"Close\"].vbt.plot(yaxis=dict(type=\"log\")).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Cleaning\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data.data[symbol][\"Dividends\"].any()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data.data[symbol][\"Stock Splits\"].any()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data = data.remove_features([\"Dividends\", \"Stock Splits\"])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 13,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# data = data.transform(lambda df: df.loc[\"April 19th 1982\" < df.index])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 14,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# data.plot(yaxis=dict(type=\"log\")).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 15,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# len(data.index) / 365.25 # 30 years of data remaining"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Modelling\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# sr_open = data.get(\"Open\")\n",
|
||||||
|
"# sr_high = data.get(\"High\")\n",
|
||||||
|
"# sr_low = data.get(\"Low\")\n",
|
||||||
|
"sr_close = data.get(\"Close\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 17,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# sr_log_open = np.log(sr_open)\n",
|
||||||
|
"# sr_log_high = np.log(sr_high)\n",
|
||||||
|
"# sr_log_low = np.log(sr_low)\n",
|
||||||
|
"sr_log_close = np.log(sr_close)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 18,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"sr_log_returns = data.log_returns"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"sr_log_returns.vbt.plot().show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 20,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"column_transformer = make_column_transformer(\n",
|
||||||
|
" (RobustScaler(), [symbol]),\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"sr_log_returns_scaled = pd.Series(\n",
|
||||||
|
" data=column_transformer.fit_transform(pd.DataFrame(sr_log_returns)).ravel(),\n",
|
||||||
|
" index=sr_log_returns.index,\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"sr_log_returns_scaled.vbt.plot().show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 22,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"k_regimes_kns = 3"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"kns = sm.tsa.MarkovRegression(\n",
|
||||||
|
" sr_log_returns_scaled, k_regimes=k_regimes_kns, trend=\"n\", switching_variance=True\n",
|
||||||
|
")\n",
|
||||||
|
"results_kns = kns.fit()\n",
|
||||||
|
"\n",
|
||||||
|
"results_kns.summary()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"results_kns.filtered_marginal_probabilities # using data until time t (excluding time t+1, ..., T)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"results_kns.smoothed_marginal_probabilities # using data until time T"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"fig = vbt.make_subplots(\n",
|
||||||
|
" rows=k_regimes_kns,\n",
|
||||||
|
" cols=1,\n",
|
||||||
|
" y_title=\"Smoothed Marginal Variance Regime Probabilities\",\n",
|
||||||
|
" shared_xaxes=True,\n",
|
||||||
|
" subplot_titles=[\n",
|
||||||
|
" \"Medium-variance\",\n",
|
||||||
|
" \"Low-variance\",\n",
|
||||||
|
" \"High-variance\",\n",
|
||||||
|
" ], # order changes dependent on fit\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"for i in range(k_regimes_kns):\n",
|
||||||
|
" fig = results_kns.smoothed_marginal_probabilities[i].vbt.plot(\n",
|
||||||
|
" add_trace_kwargs=dict(row=i + 1, col=1), fig=fig\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"fig.update_layout(showlegend=False)\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 27,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def plot_annotated_line(\n",
|
||||||
|
" fig: go.Figure,\n",
|
||||||
|
" x: pd.Series,\n",
|
||||||
|
" y: pd.Series,\n",
|
||||||
|
" classes: pd.Series,\n",
|
||||||
|
" dict_class_colours: dict,\n",
|
||||||
|
" dict_class_labels: dict,\n",
|
||||||
|
") -> go.Figure:\n",
|
||||||
|
" \"\"\"Plot a line chart where each trace is coloured based on its class.\n",
|
||||||
|
"\n",
|
||||||
|
" Yes, plotly really doesn't support this out of the box.\n",
|
||||||
|
"\n",
|
||||||
|
" Args:\n",
|
||||||
|
" fig: Figure.\n",
|
||||||
|
" x: Indices.\n",
|
||||||
|
" y: Close prices.\n",
|
||||||
|
" classes: Regimes.\n",
|
||||||
|
" dict_class_colours: In the format {class: colour}\n",
|
||||||
|
" dict_class_labels: In the format {class: label}\n",
|
||||||
|
"\n",
|
||||||
|
" Returns:\n",
|
||||||
|
" fig: The figure.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" # Plot each segment in its corresponding color.\n",
|
||||||
|
" for i in range(len(x) - 1):\n",
|
||||||
|
" fig.add_trace(\n",
|
||||||
|
" go.Scatter(\n",
|
||||||
|
" x=x[i : i + 2],\n",
|
||||||
|
" y=y[i : i + 2],\n",
|
||||||
|
" mode=\"lines\",\n",
|
||||||
|
" line=dict(color=dict_class_colours[classes[i]], width=2),\n",
|
||||||
|
" showlegend=False, # added manually\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" # Label each regime.\n",
|
||||||
|
" for regime, colour in dict_class_colours.items():\n",
|
||||||
|
" fig.add_trace(\n",
|
||||||
|
" go.Scatter(\n",
|
||||||
|
" x=[None],\n",
|
||||||
|
" y=[None],\n",
|
||||||
|
" mode=\"lines\",\n",
|
||||||
|
" line=dict(color=colour, width=2),\n",
|
||||||
|
" name=dict_class_labels[regime],\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" return fig"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 28,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"sr_variance_regime_forecasts = results_kns.filtered_marginal_probabilities.idxmax(\n",
|
||||||
|
" axis=1\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"sr_variance_regime_predictions = results_kns.smoothed_marginal_probabilities.idxmax(\n",
|
||||||
|
" axis=1\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# sr_variance_regime_forecasts.vbt.plot().show()\n",
|
||||||
|
"sr_variance_regime_predictions.vbt.plot().show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 30,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# order changes dependent on fit\n",
|
||||||
|
"dict_variance_regime_labels = {\n",
|
||||||
|
" 0: \"Medium\",\n",
|
||||||
|
" 1: \"Low\",\n",
|
||||||
|
" 2: \"High\",\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"dict_variance_regime_colours = {\n",
|
||||||
|
" 0: \"orange\",\n",
|
||||||
|
" 1: \"green\",\n",
|
||||||
|
" 2: \"red\",\n",
|
||||||
|
"}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"fig = vbt.make_figure()\n",
|
||||||
|
"\n",
|
||||||
|
"fig = plot_annotated_line(\n",
|
||||||
|
" fig,\n",
|
||||||
|
" data.index,\n",
|
||||||
|
" sr_log_close,\n",
|
||||||
|
" # sr_variance_regime_forecasts.rolling(5).mean().fillna(0).round(0),\n",
|
||||||
|
" sr_variance_regime_forecasts,\n",
|
||||||
|
" # sr_variance_regime_predictions,\n",
|
||||||
|
" dict_variance_regime_colours,\n",
|
||||||
|
" dict_variance_regime_labels,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.update_layout(\n",
|
||||||
|
" title=\"Filtered Variance Regime Labels\",\n",
|
||||||
|
" # title=\"Smoothed Variance Regime Labels\",\n",
|
||||||
|
" xaxis_title=\"Date\",\n",
|
||||||
|
" yaxis_title=\"Log Close\",\n",
|
||||||
|
" showlegend=True,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Backtest\n",
|
||||||
|
"### Filtered marginal probabilities\n",
|
||||||
|
"A backtest using filtered marginal probabilities.\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 32,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# TODO - Double check me!\n",
|
||||||
|
"# Assuming that you sell today if yesterday was in the high-variance regime.\n",
|
||||||
|
"# entries = (sr_variance_regime_forecasts != 2).vbt.signals.fshift()\n",
|
||||||
|
"# exits = (sr_variance_regime_forecasts == 2).vbt.signals.fshift()\n",
|
||||||
|
"\n",
|
||||||
|
"# Assuming that you sell today (at the close) if today was in the high-variance regime.\n",
|
||||||
|
"entries = (sr_variance_regime_forecasts != 2)\n",
|
||||||
|
"exits = (sr_variance_regime_forecasts == 2)\n",
|
||||||
|
"\n",
|
||||||
|
"# I haven't tested any additional logic.\n",
|
||||||
|
"# entries = (sr_variance_regime_forecasts.rolling(5).mean().fillna(0).round(0) != 2)\n",
|
||||||
|
"\n",
|
||||||
|
"clean_entries, clean_exits = entries.vbt.signals.clean(exits)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"fig = sr_variance_regime_forecasts.vbt.plot()\n",
|
||||||
|
"\n",
|
||||||
|
"clean_entries.vbt.signals.plot_as_entries(sr_variance_regime_forecasts, fig=fig)\n",
|
||||||
|
"clean_exits.vbt.signals.plot_as_exits(sr_variance_regime_forecasts, fig=fig)\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"pf = vbt.Portfolio.from_signals(\n",
|
||||||
|
" close=sr_close,\n",
|
||||||
|
" entries=clean_entries,\n",
|
||||||
|
" exits=clean_exits,\n",
|
||||||
|
" direction=\"both\",\n",
|
||||||
|
" fees=0.001,\n",
|
||||||
|
" size=1.0,\n",
|
||||||
|
" size_type=vbt.pf_enums.SizeType.ValuePercent,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"pf.stats()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"pf.plot(yaxis=dict(type=\"log\")).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"pf.drawdowns.plot(yaxis=dict(type=\"log\")).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"pf.plot_underwater(pct_scale=True).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Re-fitting Every Day\n",
|
||||||
|
"A backtest with a single training and validation set, that's refit every day"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 38,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"n_days_validation = int(365.25 * 2) # 2 years of data held back for the validation set"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 39,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# slices_sr = vbt.Splitter.split_range(slice(None), new_split=-n_days_validation, index=data.index)\n",
|
||||||
|
"splitter_fr = vbt.Splitter.from_rolling(\n",
|
||||||
|
" data.index, length=len(data.index), split=len(data.index) - n_days_validation\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"splitter_fr.plot().show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 41,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Define a Ray remote function for parallelization.\n",
|
||||||
|
"@ray.remote\n",
|
||||||
|
"def compute_smoothed_marginal_probabilities(sr):\n",
|
||||||
|
" kns = sm.tsa.MarkovRegression(\n",
|
||||||
|
" sr,\n",
|
||||||
|
" k_regimes=k_regimes_kns,\n",
|
||||||
|
" trend=\"n\",\n",
|
||||||
|
" switching_variance=True,\n",
|
||||||
|
" )\n",
|
||||||
|
" results_kns = kns.fit()\n",
|
||||||
|
"\n",
|
||||||
|
" # the smoothing might not work properly out of sample\n",
|
||||||
|
" return results_kns.smoothed_marginal_probabilities.iloc[-1]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 42,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# The predict method doesn't support out of sample forecasts...\n",
|
||||||
|
"# kns = sm.tsa.MarkovRegression(\n",
|
||||||
|
"# sr_log_returns[splitter_fr.get_mask()[\"set_0\"]],\n",
|
||||||
|
"# k_regimes=k_regimes_kns,\n",
|
||||||
|
"# trend=\"n\",\n",
|
||||||
|
"# switching_variance=True,\n",
|
||||||
|
"# )\n",
|
||||||
|
"# results_kns = kns.fit()\n",
|
||||||
|
"\n",
|
||||||
|
"# results_kns.summary()\n",
|
||||||
|
"\n",
|
||||||
|
"# results_kns.predict(sr_log_returns[splitter_fr.get_mask()[\"set_1\"]])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"https://github.com/statsmodels/statsmodels/issues/7982"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 43,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"sr_train = sr_log_returns[splitter_fr.get_mask()[\"set_0\"]]\n",
|
||||||
|
"sr_validate = sr_log_returns[splitter_fr.get_mask()[\"set_1\"]]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# ...so re-fit the model every timestep.\n",
|
||||||
|
"futures = []\n",
|
||||||
|
"\n",
|
||||||
|
"sr_log_returns_to_date = sr_train.copy()\n",
|
||||||
|
"\n",
|
||||||
|
"# launch parallel tasks\n",
|
||||||
|
"for i in range(len(sr_validate)):\n",
|
||||||
|
" sr_log_returns_to_date = pd.concat(\n",
|
||||||
|
" [\n",
|
||||||
|
" sr_log_returns_to_date,\n",
|
||||||
|
" pd.Series(sr_validate.iloc[i], index=[sr_validate.index[i]]),\n",
|
||||||
|
" ]\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" futures.append(\n",
|
||||||
|
" compute_smoothed_marginal_probabilities.remote(sr_log_returns_to_date)\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
"# collect results\n",
|
||||||
|
"smoothed_marginal_probabilities = ray.get(futures)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"smoothed_marginal_probabilities = pd.concat(smoothed_marginal_probabilities, axis=1).T\n",
|
||||||
|
"\n",
|
||||||
|
"smoothed_marginal_probabilities.index.name = \"Date\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 38,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"sr_variance_regime_predictions = smoothed_marginal_probabilities.idxmax(\n",
|
||||||
|
" axis=1\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"sr_close_validate = sr_close[splitter_fr.get_mask()[\"set_1\"]]\n",
|
||||||
|
"\n",
|
||||||
|
"sr_log_close_validate = sr_log_close[splitter_fr.get_mask()[\"set_1\"]]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"fig = vbt.make_figure()\n",
|
||||||
|
"\n",
|
||||||
|
"fig = plot_annotated_line(\n",
|
||||||
|
" fig,\n",
|
||||||
|
" data.index[splitter_fr.get_mask()[\"set_1\"]],\n",
|
||||||
|
" sr_log_close_validate,\n",
|
||||||
|
" sr_variance_regime_predictions,\n",
|
||||||
|
" dict_variance_regime_colours,\n",
|
||||||
|
" dict_variance_regime_labels,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.update_layout(\n",
|
||||||
|
" title=\"Variance Regime Forecasts\",\n",
|
||||||
|
" xaxis_title=\"Date\",\n",
|
||||||
|
" yaxis_title=\"Log Close\",\n",
|
||||||
|
" showlegend=True,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 49,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# TODO - Double check me!\n",
|
||||||
|
"# Assuming that you sell today if yesterday was in the high-variance regime.\n",
|
||||||
|
"# entries = (sr_variance_regime_forecasts != 2).vbt.signals.fshift()\n",
|
||||||
|
"# exits = (sr_variance_regime_forecasts == 2).vbt.signals.fshift()\n",
|
||||||
|
"\n",
|
||||||
|
"# Assuming that you sell today (at the close) if today was in the high-variance regime.\n",
|
||||||
|
"entries = (sr_variance_regime_forecasts != 2)\n",
|
||||||
|
"exits = (sr_variance_regime_forecasts == 2)\n",
|
||||||
|
"\n",
|
||||||
|
"# I haven't tested any additional logic.\n",
|
||||||
|
"# entries = (sr_variance_regime_forecasts.rolling(5).mean().fillna(0).round(0) != 2)\n",
|
||||||
|
"\n",
|
||||||
|
"clean_entries, clean_exits = entries.vbt.signals.clean(exits)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"fig = sr_variance_regime_forecasts.vbt.plot()\n",
|
||||||
|
"\n",
|
||||||
|
"clean_entries.vbt.signals.plot_as_entries(sr_variance_regime_forecasts, fig=fig)\n",
|
||||||
|
"clean_exits.vbt.signals.plot_as_exits(sr_variance_regime_forecasts, fig=fig)\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 51,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"pf = vbt.Portfolio.from_signals(\n",
|
||||||
|
" close=sr_close_validate,\n",
|
||||||
|
" entries=clean_entries,\n",
|
||||||
|
" exits=clean_exits,\n",
|
||||||
|
" direction=\"both\",\n",
|
||||||
|
" fees=0.001,\n",
|
||||||
|
" size=1.0,\n",
|
||||||
|
" size_type=vbt.pf_enums.SizeType.ValuePercent,\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"pf.stats()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"pf.plot(yaxis=dict(type=\"log\")).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"pf.drawdowns.plot(yaxis=dict(type=\"log\")).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"pf.plot_underwater(pct_scale=True).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Observations\n",
|
||||||
|
"- For this implementation you have to use the filtered probabilities to not introduce look-ahead bias.\n",
|
||||||
|
"- If you backtest the smoothed probilities (which smoothes using all of the data) it only performs well after the great financial crisis. Why? No clue.\n",
|
||||||
|
"- It looks like its better for labelling than it is as a strategy in its current state.\n",
|
||||||
|
"- After slow recessions like the dot-com bubble, there can be a medium-variance decline which this simple strategy doesn't capture.\n",
|
||||||
|
"- After fast recessions like covid-19, there can be a high-variance rebound which this simple strategy doesn't capture.\n",
|
||||||
|
"- **It looks like you can safely leverage up during low-variance regimes.**\n",
|
||||||
|
"- Maybe you could combine this strategy with other recession-leading indicators (e.g. manufacturing/services pmi, federal funds rate, 10y-2y yield curve, 1y-3mo yield curve) to help time the tops?\n",
|
||||||
|
"- Maybe you could combine this strategy with another trend-following strategy (e.g. VWAP, EMA, BBANDS, ADX) to help time the bottoms?\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 46,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ray.shutdown()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": ".venv",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user