From 0916b029ed9613456feca21954f0e38d10797a61 Mon Sep 17 00:00:00 2001 From: David Brazda Date: Fri, 4 Oct 2024 21:59:36 +0200 Subject: [PATCH] nightly update --- research/robustness/eval_robustness.ipynb | 2550 ++++++++++++++++++++- 1 file changed, 2515 insertions(+), 35 deletions(-) diff --git a/research/robustness/eval_robustness.ipynb b/research/robustness/eval_robustness.ipynb index f5d1f65..80cee01 100644 --- a/research/robustness/eval_robustness.ipynb +++ b/research/robustness/eval_robustness.ipynb @@ -7,16 +7,23 @@ "# Robustness evaluation\n", "\n", "Input is backtest results in the format:\n", - "\n", "- Parameter combination (multiindex)\n", "- Profitability metrics (columns)\n", + "Lets explore various way to evaluate robustness.\n", "\n", - "Lets explore various way to evaluate robustness." + "These are various areas to explore\n", + "\n", + "- [1](https://chatgpt.com/share/66fc06c0-edc4-8013-b228-16ee51dacff8)\n", + "- [2](https://chatgpt.com/share/66fc0ab7-2004-8013-86ca-299af96feb57)\n", + "- [3](https://chatgpt.com/share/66ffc5b3-6f10-8013-8ffa-38b09e778e0f)\n", + "- [4](https://chatgpt.com/share/66ffc5c4-6a28-8013-92ad-ea5bef243b39)\n", + "- [5](https://chatgpt.com/share/66ffc5d4-1100-8013-9cfb-3de2aa3cb136)\n", + "- [6](https://chatgpt.com/share/66ffc5e7-0550-8013-9f88-f5069c78f7f7)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -42,9 +49,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0aa468b6ecdb4dc283eae910cef9b72f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "100%|##########| 1/1 [00:03<00:00, 3.21s/it, symbol=BAC]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "#fetching US-STOCKS ohlcv_1s\n", "from lib.db import Connection\n", @@ -55,35 +77,1071 @@ "con = Connection(db_name=DB, default_schema=SCHEMA, create_db=True)\n", "basic_data = con.pull(symbols=[SYMBOL], schema=SCHEMA,start=\"2024-08-01\", end=\"2024-08-05\", tz_convert='America/New_York')\n", "\n", - "basic_data.data[SYMBOL].info()\n", + "#basic_data.data[SYMBOL].info()\n", "\n", "#1month 1s data - 15s - 24MB\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Error fetching main session\n" + ] + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "#basic_data.ohlcv.data[SYMBOL].lw.plot()\n", - "basic_data.data[SYMBOL].lw.plot(size=\"s\")" + "basic_data.ohlcv.data[SYMBOL].lw.plot(size=\"xs\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "basic_data.data[SYMBOL].vwap.lw.plot()\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "basic_data.data[SYMBOL].vwap.lw.plot(histogram=(basic_data.data[SYMBOL].trades, \"trades\"))\n", "\n", @@ -92,9 +1150,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Define the market open and close times\n", "market_open = datetime.time(9, 30)\n", @@ -118,7 +1187,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -154,7 +1223,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -171,42 +1240,841 @@ "# t30data = t30data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", "# # t30data.data[\"BAC\"].info()\n", "\n", - "s1close = s1data.close\n", - "t1close = t1data.close\n", + "s1close = s1data.data[\"BAC\"].close\n", + "t1close = t1data.data[\"BAC\"].close\n", "\n", - "t1data.data[\"BAC\"].close.lw.plot()\n" + "#realign t1data to s1data realign_closing\n", + "\n", + "\n", + "\n", + "#t1data.data[\"BAC\"].close.lw.plot()\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "time\n", + "2024-08-01 09:30:00-04:00 40.3950\n", + "2024-08-01 09:30:01-04:00 40.3600\n", + "2024-08-01 09:30:02-04:00 40.3700\n", + "2024-08-01 09:30:03-04:00 40.3700\n", + "2024-08-01 09:30:04-04:00 40.3300\n", + " ... \n", + "2024-08-02 15:59:55-04:00 37.5799\n", + "2024-08-02 15:59:56-04:00 37.5700\n", + "2024-08-02 15:59:57-04:00 37.5800\n", + "2024-08-02 15:59:58-04:00 37.5800\n", + "2024-08-02 15:59:59-04:00 37.5800\n", + "Name: close, Length: 29909, dtype: float64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s1close" + ] + }, + { + "cell_type": "code", + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ - "from lightweight_charts import JupyterChart, chart, Panel, PlotAccessor\n", - "s5data.close.lw.plot()\n", - "\n", - "# pane1 = Panel(\n", - "# ohlcv=(s5data.ohlcv.get(),))\n", - "\n", - "# # Create the chart with the panel\n", - "# ch = chart([pane1], title=\"Chart\", sync=True, session=None, size=\"s\")" + "resampler_s = vbt.Resampler(t1data.index, s1data.index, source_freq=\"1T\", target_freq=\"1s\")\n", + "t1close_realigned = t1close.vbt.realign_closing(resampler_s)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "s1data.data[\"BAC\"].head()" + "\n", + "#display 1s close with 1min close along with 1min realigned\n", + "s1close.lw.plot(name=\"1s\", right=[(t1close, \"1min\"),(t1close_realigned, \"1min_realigned\")])\n" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#display 1min ohlcv along with 1s close with 1min close realigned\n", + "t1data.ohlcv.data[\"BAC\"].lw.plot(right=[(s1close, \"1ms\"),(t1close_realigned, \"1min_realigned\")])" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 't30data' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[13], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#resample on specific index \u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m resampler \u001b[38;5;241m=\u001b[39m vbt\u001b[38;5;241m.\u001b[39mResampler(\u001b[43mt30data\u001b[49m\u001b[38;5;241m.\u001b[39mindex, s1data\u001b[38;5;241m.\u001b[39mindex, source_freq\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m30T\u001b[39m\u001b[38;5;124m\"\u001b[39m, target_freq\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m1s\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 3\u001b[0m t30close_realigned \u001b[38;5;241m=\u001b[39m t30close\u001b[38;5;241m.\u001b[39mvbt\u001b[38;5;241m.\u001b[39mrealign_closing(resampler)\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m#resample 1min to s\u001b[39;00m\n", + "\u001b[0;31mNameError\u001b[0m: name 't30data' is not defined" + ] + } + ], "source": [ "#resample on specific index \n", "resampler = vbt.Resampler(t30data.index, s1data.index, source_freq=\"30T\", target_freq=\"1s\")\n", @@ -217,13 +2085,625 @@ "t1close_realigned = t1close.vbt.realign_closing(resampler_s)" ] }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['talib:MOM']" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "#vbt.IF.list_indicators(\"*vwap\")\n", + "vbt.IF.list_indicators(\"*mom\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MOM.run(\n", + " close,\n", + " timeperiod=Default(value=10),\n", + " timeframe=Default(value=None),\n", + " short_name='mom',\n", + " hide_params=None,\n", + " hide_default=True,\n", + " **kwargs\n", + "):\n", + " Run `MOM` indicator.\n", + " \n", + " * Inputs: `close`\n", + " * Parameters: `timeperiod`, `timeframe`\n", + " * Outputs: `real`\n", + " \n", + " Pass a list of parameter names as `hide_params` to hide their column levels, or True to hide all.\n", + " Set `hide_default` to False to show the column levels of the parameters with a default value.\n", + " \n", + " Other keyword arguments are passed to `MOM.run_pipeline`.\n" + ] + } + ], + "source": [ + "vbt.phelp(vbt.indicator(\"talib:MOM\").run)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "time\n", + "2024-08-01 09:30:00-04:00 NaN\n", + "2024-08-01 09:31:00-04:00 NaN\n", + "2024-08-01 09:32:00-04:00 NaN\n", + "2024-08-01 09:33:00-04:00 NaN\n", + "2024-08-01 09:34:00-04:00 NaN\n", + " ... \n", + "2024-08-02 15:55:00-04:00 0.1200\n", + "2024-08-02 15:56:00-04:00 0.0850\n", + "2024-08-02 15:57:00-04:00 0.1300\n", + "2024-08-02 15:58:00-04:00 0.1151\n", + "2024-08-02 15:59:00-04:00 0.2050\n", + "Name: (20, 1T), Length: 780, dtype: float64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t1mom_tt = vbt.indicator(\"talib:MOM\").run(t1data.data[\"BAC\"].close, timeperiod=20, timeframe=[\"1T\",\"5T\"])\n", + "t1mom = vbt.indicator(\"talib:MOM\").run(t1data.data[\"BAC\"].close, timeperiod=20, timeframe=\"1T\")\n", + "t1data.ohlcv.data[\"BAC\"].lw.plot(left=[(t1mom,\"mom\"),(t1mom_tt.mom.loc[:, (20,\"1T\")],),(t1mom_tt.mom.loc[:, (20,\"5T\")],)]) #right=[(s1close, \"1ms\"),(t1close_realigned, \"1min_realigned\")]\n", + "\n", + "t1mom.mom[t1mom.mom.notna()]\n", + "t1mom_tt.mom.loc[:, (20,\"1T\")]\n", + "\n", + "\n", + "#t1data.ohlcv.data[\"BAC\"].lw.plot(left=[(t1mom,\"mom\"),(t1mom_tt.mom,),(t1mom_tt.mom,)]) #right=[(s1close, \"1ms\"),(t1close_realigned, \"1min_realigned\")]\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mom_timeperiod20
mom_timeframe1T5T
time
2024-08-01 09:30:00-04:00NaNNaN
2024-08-01 09:31:00-04:00NaNNaN
2024-08-01 09:32:00-04:00NaNNaN
2024-08-01 09:33:00-04:00NaNNaN
2024-08-01 09:34:00-04:00NaNNaN
.........
2024-08-02 15:55:00-04:000.12000.1749
2024-08-02 15:56:00-04:000.08500.1749
2024-08-02 15:57:00-04:000.13000.1749
2024-08-02 15:58:00-04:000.11510.1749
2024-08-02 15:59:00-04:000.20500.3250
\n", + "

780 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + "mom_timeperiod 20 \n", + "mom_timeframe 1T 5T\n", + "time \n", + "2024-08-01 09:30:00-04:00 NaN NaN\n", + "2024-08-01 09:31:00-04:00 NaN NaN\n", + "2024-08-01 09:32:00-04:00 NaN NaN\n", + "2024-08-01 09:33:00-04:00 NaN NaN\n", + "2024-08-01 09:34:00-04:00 NaN NaN\n", + "... ... ...\n", + "2024-08-02 15:55:00-04:00 0.1200 0.1749\n", + "2024-08-02 15:56:00-04:00 0.0850 0.1749\n", + "2024-08-02 15:57:00-04:00 0.1300 0.1749\n", + "2024-08-02 15:58:00-04:00 0.1151 0.1749\n", + "2024-08-02 15:59:00-04:00 0.2050 0.3250\n", + "\n", + "[780 rows x 2 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t1mom_tt.mom" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "vbt.IF.list_indicators(\"*vwap\")\n", "vbt.phelp(vbt.VWAP.run)" ] },