From c11ed9d4748c842494530da4630b02d658e54818 Mon Sep 17 00:00:00 2001 From: David Brazda Date: Fri, 30 Aug 2024 20:49:53 +0200 Subject: [PATCH] Initial commit after copying files from flawed repository --- README.md | 29 + requirements.txt | 24 + research/basic.ipynb | 385 ++ research/chartMultipleMarkers.ipynb | 95 + research/get_trades_at_once.ipynb | 348 ++ research/indcross_parametrized.ipynb | 208 ++ research/ohlc_persistance_test.ipynb | 252 ++ research/prepare_aggregatied_data.ipynb | 458 +++ research/rsi_alpaca.ipynb | 467 +++ research/strat1/strat1_v1_MULTI.ipynb | 949 +++++ research/strat1/strat1_v1_SINGLE.ipynb | 265 ++ .../v2_SINGLE-checkpoint.ipynb | 1118 ++++++ .../CANDLEGAPS_v1_MULTI.ipynb | 932 +++++ .../CANDLEGAPS_v1_SINGLE.ipynb | 842 +++++ research/strat_LINREG_MULTI/v1_MULTI.ipynb | 949 +++++ research/strat_LINREG_MULTI/v1_SINGLE.ipynb | 584 +++ .../v2_SINGLE-checkpoint.ipynb | 1118 ++++++ research/strat_ORDER_IMBALANCE/v1_MULTI.ipynb | 932 +++++ .../strat_ORDER_IMBALANCE/v1_SINGLE.ipynb | 964 +++++ .../strat_ORDER_IMBALANCE/v2_SINGLE.ipynb | 1411 +++++++ .../v2_SINGLE-checkpoint.ipynb | 1118 ++++++ .../SUPERTREND_v1_MULTI.ipynb | 932 +++++ .../SUPERTREND_v1_SINGLE.ipynb | 679 ++++ .../strat_TIME_ENTRIES copy/v1_MULTI.ipynb | 932 +++++ .../strat_TIME_ENTRIES copy/v1_SINGLE.ipynb | 595 +++ research/test.ipynb | 161 + research/test1.ipynb | 82 + research/test1sbars.ipynb | 421 +++ research/test1sbars_roc.ipynb | 935 +++++ setup.py | 16 + to_explore/PQN_Patterns.ipynb | 499 +++ to_explore/PQN_Projections.ipynb | 266 ++ to_explore/notebooks/BasicRSI.ipynb | 700 ++++ to_explore/notebooks/CrossValidation.ipynb | 3245 +++++++++++++++++ to_explore/notebooks/MTFAnalysis.ipynb | 1525 ++++++++ to_explore/notebooks/PQN_MTF.ipynb | 372 ++ to_explore/notebooks/PQN_ParamCV.ipynb | 316 ++ to_explore/notebooks/PQN_Patterns.ipynb | 499 +++ to_explore/notebooks/PQN_Projections.ipynb | 266 ++ to_explore/notebooks/PairsTrading.ipynb | 1519 ++++++++ .../notebooks/PatternsProjections.ipynb | 2742 ++++++++++++++ .../notebooks/PortfolioOptimization.ipynb | 3178 ++++++++++++++++ to_explore/notebooks/QQ_TelegramSignals.ipynb | 1107 ++++++ to_explore/notebooks/SignalDevelopment.ipynb | 2557 +++++++++++++ to_explore/notebooks/StopSignals.ipynb | 771 ++++ to_explore/notebooks/SuperTrend.ipynb | 2472 +++++++++++++ to_explore/notebooks/TelegramSignals.ipynb | 285 ++ 47 files changed, 40520 insertions(+) create mode 100644 README.md create mode 100644 requirements.txt create mode 100644 research/basic.ipynb create mode 100644 research/chartMultipleMarkers.ipynb create mode 100644 research/get_trades_at_once.ipynb create mode 100644 research/indcross_parametrized.ipynb create mode 100644 research/ohlc_persistance_test.ipynb create mode 100644 research/prepare_aggregatied_data.ipynb create mode 100644 research/rsi_alpaca.ipynb create mode 100644 research/strat1/strat1_v1_MULTI.ipynb create mode 100644 research/strat1/strat1_v1_SINGLE.ipynb create mode 100644 research/strat_CANDLEGAPS/.ipynb_checkpoints/v2_SINGLE-checkpoint.ipynb create mode 100644 research/strat_CANDLEGAPS/CANDLEGAPS_v1_MULTI.ipynb create mode 100644 research/strat_CANDLEGAPS/CANDLEGAPS_v1_SINGLE.ipynb create mode 100644 research/strat_LINREG_MULTI/v1_MULTI.ipynb create mode 100644 research/strat_LINREG_MULTI/v1_SINGLE.ipynb create mode 100644 research/strat_ORDER_IMBALANCE/.ipynb_checkpoints/v2_SINGLE-checkpoint.ipynb create mode 100644 research/strat_ORDER_IMBALANCE/v1_MULTI.ipynb create mode 100644 research/strat_ORDER_IMBALANCE/v1_SINGLE.ipynb create mode 100644 research/strat_ORDER_IMBALANCE/v2_SINGLE.ipynb create mode 100644 research/strat_SUPERTREND/.ipynb_checkpoints/v2_SINGLE-checkpoint.ipynb create mode 100644 research/strat_SUPERTREND/SUPERTREND_v1_MULTI.ipynb create mode 100644 research/strat_SUPERTREND/SUPERTREND_v1_SINGLE.ipynb create mode 100644 research/strat_TIME_ENTRIES copy/v1_MULTI.ipynb create mode 100644 research/strat_TIME_ENTRIES copy/v1_SINGLE.ipynb create mode 100644 research/test.ipynb create mode 100644 research/test1.ipynb create mode 100644 research/test1sbars.ipynb create mode 100644 research/test1sbars_roc.ipynb create mode 100644 setup.py create mode 100644 to_explore/PQN_Patterns.ipynb create mode 100644 to_explore/PQN_Projections.ipynb create mode 100644 to_explore/notebooks/BasicRSI.ipynb create mode 100644 to_explore/notebooks/CrossValidation.ipynb create mode 100644 to_explore/notebooks/MTFAnalysis.ipynb create mode 100644 to_explore/notebooks/PQN_MTF.ipynb create mode 100644 to_explore/notebooks/PQN_ParamCV.ipynb create mode 100644 to_explore/notebooks/PQN_Patterns.ipynb create mode 100644 to_explore/notebooks/PQN_Projections.ipynb create mode 100644 to_explore/notebooks/PairsTrading.ipynb create mode 100644 to_explore/notebooks/PatternsProjections.ipynb create mode 100644 to_explore/notebooks/PortfolioOptimization.ipynb create mode 100644 to_explore/notebooks/QQ_TelegramSignals.ipynb create mode 100644 to_explore/notebooks/SignalDevelopment.ipynb create mode 100644 to_explore/notebooks/StopSignals.ipynb create mode 100644 to_explore/notebooks/SuperTrend.ipynb create mode 100644 to_explore/notebooks/TelegramSignals.ipynb diff --git a/README.md b/README.md new file mode 100644 index 0000000..acc47a2 --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ +# Research for v2realbot + +## Overview +Strategy research and development tracker. Serves as a central hub for strategizing, idea generation, and performance tracking of the stragies + +## Purpose +This repository is established as an issue tracker to: +- Facilitate the proposal and discussion of new trading strategies. +- Track the progress and refinement of existing strategies. +- Document and share insights, research findings, and performance analyses. + +## Getting Started +1. **Proposal of New Strategies**: To propose a new strategy, create a new issue with a clear and descriptive title. Outline your strategy, including its rationale, intended market conditions, and expected outcomes. + +2. **Discussion and Feedback**: Use the issues section for ongoing discussions, feedback, and collaborative refinement of strategies. + +3. **Strategy Documentation**: Each strategy should be documented in detail on the Wiki pages, including its parameters, implementation guidelines, and any relevant backtesting results. (Note: documentation either here or on [trading.mujdenik.eu](trading.mujdenik.eu) - we will see what's better in practice) + +4. **Problem Reporting and Optimization**: Report any issues or suggest optimizations for existing strategies through the Issues section, providing relevant data and analysis to support your observations. + +5. **Contribution Guidelines**: Please refer to `CONTRIBUTING.md` for detailed guidelines on how to contribute effectively to this repository. + +## Collaboration Tools +- **Issues**: For proposing, discussing, and tracking strategies. +- **Wiki**: For detailed documentation and resource sharing, please se [trading.mujdenik.eu](trading.mujdenik.eu) +- **Research folder**: Notebook dedicated do each strategy. + +## Integration with v2realbot +This repository operates in tandem with the `v2realbot` repository. Ensure all strategies are compatible and follow the guidelines for integration into the v2realbot Trading Platform. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b7359ef --- /dev/null +++ b/requirements.txt @@ -0,0 +1,24 @@ +pandas +pywebview>=5.0.5 +orjson +v2realbot @ git+https://github.com/drew2323/v2trading.git@master#egg=v2trading +lightweight-charts @ git+https://github.com/drew2323/lightweight-charts-python.git@main#egg=lightweight-charts +pyarrow +matplotlib +seaborn +alpaca-py +rich +tomli +appdirs +python-dotenv +filelock +dill +plotly +dash +dash-bootstrap-components +tinydb +tulipy +ta-lib +pywavelets +/Users/davidbrazda/Desktop/vectorbtpro-2024.2.22-py3-none-any.whl +sqlalchemy \ No newline at end of file diff --git a/research/basic.ipynb b/research/basic.ipynb new file mode 100644 index 0000000..4b5652f --- /dev/null +++ b/research/basic.ipynb @@ -0,0 +1,385 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from v2realbot.tools.loadbatch import load_batch\n", + "from v2realbot.utils.utils import zoneNY, DATA_DIR\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "batch_id = \"e44a5075\"\n", + "# res, df = load_batch(batch_id=batch_id,\n", + "# space_resolution_evenly=False,\n", + "# indicators_columns=[\"Rsi14\"],\n", + "# main_session_only=True)\n", + "# if res < 0:\n", + "# print(\"Error\" + str(res) + str(df))\n", + "# df = df[\"bars\"]\n", + "# df.info()\n", + "# df.head()\n", + "# #df.ptable()\n", + "# df.to_pickle(DATA_DIR+\"/\"+f'{batch_id}.pickle')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## FILTERING" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_pickle(DATA_DIR+\"/\"+f'{batch_id}.pickle')\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#naloadujeme do vbt symbol as column\n", + "basic_data = vbt.Data.from_data({\"BAC\": df}, tz_convert=zoneNY)\n", + "start_date = pd.Timestamp('2024-03-12 09:30', tz=zoneNY)\n", + "end_date = pd.Timestamp('2024-03-13 16:00', tz=zoneNY)\n", + "\n", + "#filter date\n", + "#basic_data = basic_data.transform(lambda df: df[df.index.date == start_date.date()])\n", + "#filter range\n", + "basic_data = basic_data.transform(lambda df: df[(df.index >= start_date) & (df.index <= end_date)])\n", + "#filtered_data = basic_data.transform(lambda df: df[(df.index >= start_date) & (df.index <= end_date)])\n", + "# #range filtered_data = data[(data.index >= start_date) & (data.index <= end_date)\n", + "#df.between_time('09:30', '16:00')\n", + "#(df.index.time >= pd.Timestamp('09:30').time()) & (df.index.time <= pd.Timestamp('16:00').time())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# basic_data.data[\"BAC\"]\n", + "rsi14" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#b = filtered_data.get().iloc[100:200] #b[[\"Open\",\"High\"]]\n", + "rsi14 = basic_data.data[\"BAC\"][\"Rsi14\"].rename(\"Rsi14\")\n", + "#create subploit\n", + "fig = vbt.make_subplots(rows=2, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}], [{\"secondary_y\": False}]])\n", + "rsi14.vbt.plot(add_trace_kwargs=dict(row=1, col=1, secondary_y=True),fig=fig)\n", + "basic_data.data[\"BAC\"].vbt.ohlcv.plot(add_trace_kwargs=dict(row=1, col=1, secondary_y=False), fig=fig)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run_rsi = vbt.talib_func(\"rsi\")\n", + "rsi_new = run_rsi(basic_data.vwap, timeperiod=15)\n", + "rsi_new = rsi_new.fillna(0)\n", + "# print(rsi_new)\n", + "# print(dir(rsi_new))\n", + "rsi14 = basic_data.data[\"BAC\"][\"Rsi14\"]\n", + "# print(rsi14)\n", + "\n", + "#zkombinujeme do stejneho dataframe skrz sloupce (axis1)\n", + "# combined_df = pd.concat([rsi_new, rsi14], axis=1)\n", + "# combined_df\n", + "\n", + "#create subplot\n", + "fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True)\n", + "\n", + "plot_rsi = vbt.talib_plot_func(\"rsi\")\n", + "plot_rsi(rsi_new, fig=fig)\n", + "plot_rsi(rsi14, fig=fig)\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "entries = rsi_new.vbt.crossed_below(30)\n", + "entries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "exits = rsi_new.vbt.crossed_above(70)\n", + "exits " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_rsi(close, rsi, entries, exits):\n", + " fig = vbt.make_subplots(rows=2, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}], [{\"secondary_y\": False}]])\n", + " basic_data.data[\"BAC\"].vbt.ohlcv.plot(add_trace_kwargs=dict(row=1, col=1, secondary_y=True),fig=fig)\n", + " rsi.vbt.plot(fig=fig, add_trace_kwargs=dict(row=1, col=1, secondary_y=False), trace_kwargs=dict(line=dict(color='grey', width=1.5)))\n", + " #close.vbt.plot(fig=fig, add_trace_kwargs=dict(row=1, col=1, secondary_y=True))\n", + " entries.vbt.signals.plot_as_entries(y=close, fig=fig, add_trace_kwargs=dict(row=1, col=1, secondary_y=True))\n", + " exits.vbt.signals.plot_as_exits(y=close, fig=fig, add_trace_kwargs=dict(row=1, col=1, secondary_y=True))\n", + " return fig\n", + "\n", + "close = basic_data.get(\"Close\")\n", + "\n", + "print(entries)\n", + "\n", + "plot_rsi(close, rsi_new, entries, exits).show()\n", + "\n", + "clean_entries, clean_exits = entries.vbt.signals.clean(exits) \n", + "\n", + "plot_rsi(close, rsi_new, clean_entries, clean_exits).show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "clean_entries.vbt.signals.total() \n", + "clean_exits.vbt.signals.total() " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "index = basic_data.wrapper.index\n", + "\n", + "#minutes from market open\n", + "market_open_time = pd.to_timedelta('09:30:00')\n", + "\n", + "# Calculate the market open datetime for each day\n", + "market_opens = index.normalize() + market_open_time\n", + "\n", + "minutes_from_open = (index - market_opens).total_seconds() / 60\n", + "# Ensuring the result is a Series\n", + "minutes_from_open = pd.Series(minutes_from_open, index=index)\n", + "\n", + "#minutes_from_open.values\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "symbol_wrapper = basic_data.get_symbol_wrapper()\n", + "\n", + "@vbt.njit\n", + "def elapsed_minutes_from_open_nb(time_in_ns):\n", + " market_opens_in_minute = 570 # 9 hours * 60 minutes + 30 minutes\n", + " current_minute = vbt.dt_nb.hour_nb(time_in_ns) * 60 + vbt.dt_nb.minute_nb(time_in_ns)\n", + " #print(\"current_minutes\", current_minutes)\n", + " # Calculate elapsed minutes since market open at 9:30 AM\n", + " minutes_from_open = current_minute - market_opens_in_minute\n", + " print( \"elapsed_from_open\", minutes_from_open)\n", + " return minutes_from_open if minutes_from_open >= 0 else 0\n", + "\n", + "@vbt.njit\n", + "def entry_place_func_nb(c, low, close, time_in_ns, rsi14, window_open, window_close):\n", + " # if c.from_i == 0: # (1)!\n", + " # c.out[0] = True\n", + " # return -1\n", + " # print(\"ted\")\n", + " # print(c.from_i)\n", + " #exit_i = c.from_i - c.wait # (2)!\n", + " #exit_price = close[exit_i, c.col] # (3)!\n", + " #hit_price = exit_price * (1 - th)\n", + " market_open_minutes = 570 # 9 hours * 60 minutes + 30 minutes\n", + "\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + "\n", + " current_minutes = vbt.dt_nb.hour_nb(time_in_ns[i]) * 60 + vbt.dt_nb.minute_nb(time_in_ns[i])\n", + " #print(\"current_minutes\", current_minutes)\n", + " # Calculate elapsed minutes since market open at 9:30 AM\n", + " elapsed_from_open = current_minutes - market_open_minutes\n", + " elapsed_from_open = elapsed_from_open if elapsed_from_open >= 0 else 0\n", + " #print( \"elapsed_from_open\", elapsed_from_open)\n", + "\n", + " #elapsed_from_open = elapsed_minutes_from_open_nb(time_in_ns) \n", + " in_window = elapsed_from_open > window_open and elapsed_from_open < window_close\n", + " #print(\"in_window\", in_window)\n", + " # if in_window:\n", + " # print(\"in window\")\n", + "\n", + " if in_window and rsi14[i] > 60: # and low[i, c.col] <= hit_price: # and hour == 9: # (4)!\n", + " return out_i\n", + " return -1\n", + "\n", + "#whether the date changed\n", + "# day_changed_nb(\n", + "# ts1,\n", + "# ts2\n", + "# )\n", + "\n", + "# h_ns(ts1) int\n", + "\n", + "\n", + "@vbt.njit\n", + "def exit_place_func_nb(c, high, close, time_index, tp, sl): # (5)!\n", + " entry_i = c.from_i - c.wait\n", + " entry_price = close[entry_i, c.col]\n", + " hit_price = entry_price * (1 + tp)\n", + " stop_price = entry_price * (1 - sl)\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + "\n", + " \n", + "\n", + "\n", + " last_bar_of_day = vbt.dt_nb.day_changed_nb(time_index[i], time_index[i + 1])\n", + "\n", + " #print(next_day)\n", + " if last_bar_of_day: #pokud je dalsi next day, tak zavirame posledni\n", + " print(\"ted\",out_i)\n", + " return out_i\n", + " if high[i, c.col] >= hit_price or close[i, c.col] <= stop_price :\n", + " return out_i\n", + " return -1\n", + "\n", + "index = basic_data.wrapper.index\n", + "\n", + "#minutes from market open\n", + "market_open_time = pd.to_timedelta('09:30:00')\n", + "\n", + "# Calculate the market open datetime for each day\n", + "market_opens = index.normalize() + market_open_time\n", + "\n", + "minutes_from_open = (index - market_opens).total_seconds() / 60\n", + "\n", + "print(minutes_from_open)\n", + "\n", + "\n", + "#index 9:30 az 10:00\n", + "time_entry_window = ((index.time >= pd.Timestamp(\"09:30:00\").time())&\n", + " (index.time <= pd.Timestamp(\"14:00:00\").time()))\n", + "\n", + "\n", + "print(time_entry_window)\n", + "print(rsi_new)\n", + "rsi_entries = rsi_new.vbt.crossed_below(64)\n", + "rsi_entries = rsi_entries < 40\n", + "rsi_entries_array = rsi_entries.vbt.to_1d_array()\n", + "print(rsi_entries_array)\n", + "\n", + "\n", + "entries, exits = vbt.pd_acc.signals.generate_both( # (6)!\n", + " symbol_wrapper.shape,\n", + " entry_place_func_nb=entry_place_func_nb,\n", + " #timeindex to ns to numba\n", + " entry_place_args=(vbt.Rep(\"low\"), vbt.Rep(\"close\"), vbt.dt.to_ns(basic_data.wrapper.index), vbt.Rep(\"rsi14\"), 0, 380), # (7)!\n", + " exit_place_func_nb=exit_place_func_nb,\n", + " exit_place_args=(vbt.Rep(\"high\"), vbt.Rep(\"close\"), vbt.dt.to_ns(basic_data.wrapper.index), 0.001, 0.001),\n", + " wrapper=symbol_wrapper,\n", + " broadcast_named_args=dict( # (8)!\n", + " high=basic_data.get(\"High\"),\n", + " low=basic_data.get(\"Low\"),\n", + " close=basic_data.get(\"Close\"),\n", + " rsi14=basic_data.get(\"Rsi14\"),\n", + " window_open=10,\n", + " window_close=60\n", + " ),\n", + " broadcast_kwargs=dict(post_func=np.asarray) # (9)!\n", + ")\n", + "\n", + "\n", + "plot_rsi(close, rsi_new, entries, exits).show()\n", + "\n", + "# fig = basic_data.plot(\n", + "# symbol=\"BAC\", \n", + "# ohlc_trace_kwargs=dict(opacity=0.5), \n", + "# plot_volume=False\n", + "# )\n", + "\n", + "# #rsi_entries.vbt.plot(fig=fig)\n", + "# entries.vbt.signals.plot_as_entries(\n", + "# y=close, fig=fig)\n", + "# exits.vbt.signals.plot_as_exits(\n", + "# y=close, fig=fig)\n", + "# fig.show() # (10)!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/chartMultipleMarkers.ipynb b/research/chartMultipleMarkers.ipynb new file mode 100644 index 0000000..497b1e8 --- /dev/null +++ b/research/chartMultipleMarkers.ipynb @@ -0,0 +1,95 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import vectorbtpro as vbt\n", + "from lightweight_charts import chart, Panel\n", + "\n", + "# Pulling ETH-USD data\n", + "data = vbt.YFData.pull(\"ETH-USD\")\n", + "close = data.close\n", + "high = data.high\n", + "low = data.low\n", + "\n", + "# Define a simple moving average crossover strategy using EWM\n", + "short_ma = vbt.MA.run(close, window=6, wtype=\"exp\").ma\n", + "long_ma = vbt.MA.run(close, window=73, wtype=\"exp\").ma\n", + "\n", + "# Generate signals\n", + "long_entries = short_ma > long_ma\n", + "long_exits = short_ma < long_ma\n", + "short_entries = short_ma < long_ma\n", + "short_exits = short_ma > long_ma\n", + "\n", + "clean_long_entries, clean_long_exits = long_entries.vbt.signals.clean(long_exits)\n", + "clean_short_entries, clean_short_exits = short_entries.vbt.signals.clean(short_exits)\n", + "\n", + "# ohlcv_df = data.ohlcv.get()\n", + "\n", + "#assume i want to display simple entries or exits on series or ohlcv \n", + "#based on tuple positions it determines entries or exits (and set colors and shape accordingly)\n", + "pane1 = Panel(\n", + " ohlcv=(data.ohlcv.get(), clean_long_entries, clean_short_entries)\n", + ")\n", + "ch = chart([pane1], title=\"Chart with Entry/Exit Markers\", session=None, size=\"s\")\n", + "\n", + "#if you want to display more entries or exits, use tuples with their colors\n", + "pane1 = Panel(\n", + " ohlcv=(data.ohlcv.get(),\n", + " [(clean_long_entries, \"yellow\"), (clean_short_entries, \"pink\")], #list of entries tuples with color\n", + " [(clean_long_exits, \"yellow\"), (clean_short_exits, \"pink\")] #list of exits tuples with color\n", + " ), \n", + ")\n", + "\n", + "# # Create the chart with the panel\n", + "ch = chart([pane1], title=\"Chart with EntryShort/ExitShort (yellow) and EntryLong/ExitLong markers (pink)\", sync=True, session=None, size=\"s\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "\n", + "\n", + "# # Add the markers to the chart using markers_set method\n", + "# entry_signals = pd.DataFrame({\n", + "# 'time': clean_long_entries.index.astype(str),\n", + "# 'value': clean_long_entries.values\n", + "# }).dropna()\n", + "# entry_signals['value'] = entry_signals['value'].astype(bool)\n", + "\n", + "# ch.markers_set(entry_signals, type='entries')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/get_trades_at_once.ipynb b/research/get_trades_at_once.ipynb new file mode 100644 index 0000000..220dddd --- /dev/null +++ b/research/get_trades_at_once.ipynb @@ -0,0 +1,348 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Loading trades and vectorized aggregation\n", + "Describes how to fetch trades (remote/cached) and use new vectorized aggregation to aggregate bars of given type (time, volume, dollar) and resolution\n", + "\n", + "`fetch_trades_parallel` enables to fetch trades of given symbol and interval, also can filter conditions and minimum size. return `trades_df`\n", + "`aggregate_trades` acceptss `trades_df` and ressolution and type of bars (VOLUME, TIME, DOLLAR) and return aggregated ohlcv dataframe `ohlcv_df`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from numba import jit\n", + "from alpaca.data.historical import StockHistoricalDataClient\n", + "from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n", + "from alpaca.data.requests import StockTradesRequest\n", + "from v2realbot.enums.enums import BarType\n", + "import time\n", + "from datetime import datetime\n", + "from v2realbot.utils.utils import parse_alpaca_timestamp, ltp, zoneNY, send_to_telegram, fetch_calendar_data\n", + "import pyarrow\n", + "from v2realbot.loader.aggregator_vectorized import fetch_daily_stock_trades, fetch_trades_parallel, generate_time_bars_nb, aggregate_trades\n", + "import vectorbtpro as vbt\n", + "import v2realbot.utils.config_handler as cfh\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 20) # Number of rows per page\n", + "# pd.set_option('display.float_format', '{:.9f}'.format)\n", + "\n", + "\n", + "#trade filtering\n", + "exclude_conditions = cfh.config_handler.get_val('AGG_EXCLUDED_TRADES') #standard ['C','O','4','B','7','V','P','W','U','Z','F']\n", + "minsize = 100\n", + "\n", + "symbol = \"SPY\"\n", + "#datetime in zoneNY \n", + "day_start = datetime(2024, 1, 1, 9, 30, 0)\n", + "day_stop = datetime(2024, 1, 14, 16, 00, 0)\n", + "day_start = zoneNY.localize(day_start)\n", + "day_stop = zoneNY.localize(day_stop)\n", + "#filename of trades_df parquet, date are in isoformat but without time zone part\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "#parquet interval cache contains exclude conditions and minsize filtering\n", + "file_trades = dir + f\"trades_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}-{exclude_conditions}-{minsize}.parquet\"\n", + "#file_trades = dir + f\"trades_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}.parquet\"\n", + "file_ohlcv = dir + f\"ohlcv_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}-{exclude_conditions}-{minsize}.parquet\"\n", + "\n", + "#PRINT all parquet in directory\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "for f in files:\n", + " print(f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "trades_df = fetch_daily_stock_trades(symbol, day_start, day_stop, exclude_conditions=exclude_conditions, minsize=minsize, force_remote=False, max_retries=5, backoff_factor=1)\n", + "trades_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Either load trades or ohlcv from parquet if exists\n", + "\n", + "#trades_df = fetch_trades_parallel(symbol, day_start, day_stop, exclude_conditions=exclude_conditions, minsize=50, max_workers=20) #exclude_conditions=['C','O','4','B','7','V','P','W','U','Z','F'])\n", + "# trades_df.to_parquet(file_trades, engine='pyarrow', compression='gzip')\n", + "\n", + "trades_df = pd.read_parquet(file_trades,engine='pyarrow')\n", + "ohlcv_df = aggregate_trades(symbol=symbol, trades_df=trades_df, resolution=1, type=BarType.TIME)\n", + "ohlcv_df.to_parquet(file_ohlcv, engine='pyarrow', compression='gzip')\n", + "\n", + "# ohlcv_df = pd.read_parquet(file_ohlcv,engine='pyarrow')\n", + "# trades_df = pd.read_parquet(file_trades,engine='pyarrow')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "file_name = \"\"\n", + "ohlcv_df = pd.read_parquet(file_ohlcv,engine='pyarrow')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ohlcv_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "# Calculate daily returns\n", + "ohlcv_df['returns'] = ohlcv_df['close'].pct_change().dropna()\n", + "#same as above but pct_change is from 3 datapoints back, but only if it is the same date, else na\n", + "\n", + "\n", + "# Plot the probability distribution curve\n", + "plt.figure(figsize=(10, 6))\n", + "sns.histplot(df['returns'].dropna(), kde=True, stat='probability', bins=30)\n", + "plt.title('Probability Distribution of Daily Returns')\n", + "plt.xlabel('Daily Returns')\n", + "plt.ylabel('Probability')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "# Define the intervals from 5 to 20 s, returns for each interval\n", + "#maybe use rolling window?\n", + "intervals = range(5, 21, 5)\n", + "\n", + "# Create columns for percentage returns\n", + "rolling_window = 50\n", + "\n", + "# Normalize the returns using rolling mean and std\n", + "for N in intervals:\n", + " column_name = f'returns_{N}'\n", + " rolling_mean = ohlcv_df[column_name].rolling(window=rolling_window).mean()\n", + " rolling_std = ohlcv_df[column_name].rolling(window=rolling_window).std()\n", + " ohlcv_df[f'norm_{column_name}'] = (ohlcv_df[column_name] - rolling_mean) / rolling_std\n", + "\n", + "# Display the dataframe with normalized return columns\n", + "ohlcv_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate the sum of the normalized return columns for each row\n", + "ohlcv_df['sum_norm_returns'] = ohlcv_df[[f'norm_returns_{N}' for N in intervals]].sum(axis=1)\n", + "\n", + "# Sort the DataFrame based on the sum of normalized returns in descending order\n", + "df_sorted = ohlcv_df.sort_values(by='sum_norm_returns', ascending=False)\n", + "\n", + "# Display the top rows with the highest sum of normalized returns\n", + "df_sorted\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Drop initial rows with NaN values due to pct_change\n", + "ohlcv_df.dropna(inplace=True)\n", + "\n", + "# Plotting the probability distribution curves\n", + "plt.figure(figsize=(14, 8))\n", + "for N in intervals:\n", + " sns.kdeplot(ohlcv_df[f'returns_{N}'].dropna(), label=f'Returns {N}', fill=True)\n", + "\n", + "plt.title('Probability Distribution of Percentage Returns')\n", + "plt.xlabel('Percentage Return')\n", + "plt.ylabel('Density')\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "# Plot the probability distribution curve\n", + "plt.figure(figsize=(10, 6))\n", + "sns.histplot(ohlcv_df['returns'].dropna(), kde=True, stat='probability', bins=30)\n", + "plt.title('Probability Distribution of Daily Returns')\n", + "plt.xlabel('Daily Returns')\n", + "plt.ylabel('Probability')\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#show only rows from ohlcv_df where returns > 0.005\n", + "ohlcv_df[ohlcv_df['returns'] > 0.0005]\n", + "\n", + "#ohlcv_df[ohlcv_df['returns'] < -0.005]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#ohlcv where index = date 2024-03-13 and between hour 12\n", + "\n", + "a = ohlcv_df.loc['2024-03-13 12:00:00':'2024-03-13 13:00:00']\n", + "a" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ohlcv_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "trades_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ohlcv_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "trades_df.to_parquet(\"trades_df-spy-0111-0111.parquett\", engine='pyarrow', compression='gzip')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "trades_df.to_parquet(\"trades_df-spy-111-0516.parquett\", engine='pyarrow', compression='gzip', allow_truncated_timestamps=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ohlcv_df.to_parquet(\"ohlcv_df-spy-111-0516.parquett\", engine='pyarrow', compression='gzip')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basic_data = vbt.Data.from_data(vbt.symbol_dict({symbol: ohlcv_df}), tz_convert=zoneNY)\n", + "vbt.settings['plotting']['auto_rangebreaks'] = True\n", + "basic_data.ohlcv.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#access just BCA\n", + "#df_filtered = df.loc[\"BAC\"]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/indcross_parametrized.ipynb b/research/indcross_parametrized.ipynb new file mode 100644 index 0000000..b834019 --- /dev/null +++ b/research/indcross_parametrized.ipynb @@ -0,0 +1,208 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "celkovy optimalizacni backtest na vetsim oknu 1 - 300\n", + "a možná take to udělat jako parametr\n", + "zkusit CV\n", + "zobrazit nejak robustnost parametru" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from v2realbot.tools.loadbatch import load_batch\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "res, df = load_batch(batch_id=\"f1ac6651\", #138170bc 0fb5043a bde6d0be f1ac6651\n", + " space_resolution_evenly=False,\n", + " indicators_columns=[\"Rsi14\"],\n", + " main_session_only=True,\n", + " verbose = False)\n", + "if res < 0:\n", + " print(\"Error\" + str(res) + str(df))\n", + "df = df[\"bars\"]\n", + "\n", + "df\n", + "\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": df}), tz_convert=zoneNY)\n", + "#m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "basic_data = basic_data.transform(lambda df: df.between_time('09:30', '16:00'))\n", + "#basic_data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.open_api_ref(vbt.base)\n", + "\n", + "vbt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##na toto udelat crosssvalidationu nebo alespon na testovacim ci jinem obdobi\n", + "#take udelat long leg - tato je shortovaci\n", + "\n", + "#8\t-0.06\t-0.2\t0.0028\t0.0048\t4.156254\n", + "\n", + "#short combination ok for train(4)/test(0.1) (window 1-90, fe 95-100)\n", + "#2,\t-0.02,\t-0.25,\t0.0018,\t0.0068\n", + "\n", + "#dalsi ok hodnota shortu for train/test 4/1\n", + "#70,\t8,\t-0.06,\t-0.2,\t0.0013,\t0.0053\t\n", + "\n", + "\n", + "#kombinace bez roc_th, train/test 7/-1.5\n", + "#70\t7\t-0.07\t0.0033\t0.0063\n", + "\n", + "#opet bez roc_th, train(5.77)/test 0.9 - spolus tsl_stop + tsl_th\n", + "#29\t7\t-0.09\t0.0033\t0.0068\n", + "\n", + "#bez roc_th a s trailing sl train/test 8.1/-0.8 \n", + "#70\t2\t-0.05\t0.0018\t0.0068\n", + "\n", + "\n", + "# TODO:\n", + "#- vyzkouset zda nejvyhodnejsi kombinace krom train/testu funguje i na nasledujicich dnech po trainu\n", + "# -zkusit najit v short datasetu neco vyhodneho co funguji i na testu\n", + "# - dodelat kombinace pro long signaly\n", + "# - zkusit walk forward\n", + "# - vytvorit vysledkove totoznou na v2realbot\n", + "# - podivat se jak detailne funguji tsl_stop a tsl_th\n", + "\n", + "#70,\t4,\t-0.07,\t0.0048,\t0.0068\t\n", + "\n", + "\n", + "entry_window_closes, mom_timeperiod, mom_th, sl_stop, tp_stop = 8,\t3,\t0.07,\t0.0028,\t0.0033\t\n", + "roc_th = 0\n", + "momshort = vbt.indicator(\"talib:MOM\").run(basic_data.close, timeperiod=mom_timeperiod, short_name = \"slope_short\")\n", + "rocp = vbt.indicator(\"talib:ROC\").run(basic_data.close, short_name = \"rocp\")\n", + "#rate of change + momentum\n", + "\n", + "#momshort.plot rocp.real_crossed_below(roc_th) & \n", + "short_signal = momshort.real_crossed_below(mom_th)\n", + "\n", + "long_signal = momshort.real_crossed_above(mom_th)\n", + "\n", + "# print(\"short signal\")\n", + "# print(short_signal.value_counts())\n", + "\n", + "#forced_exit = pd.Series(False, index=close.index)\n", + "forced_exit = basic_data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signal & entry_window_open)\n", + "exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "# print(short_exits.value_counts())\n", + "# print(short_entries.value_counts())\n", + "\n", + "#fig = plot_2y_close([],[momshort, rocp], close)\n", + "#short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + "#print(sl_stop)\n", + "#short_entries=short_entries, short_exits=short_exits,\n", + "pf = vbt.Portfolio.from_signals(close=basic_data, entries=entries, exits=exits, tsl_stop=sl_stop, tp_stop = tp_stop, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "pf.stats()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.get_drawdowns().records_readable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.orders.records_readable" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/ohlc_persistance_test.ipynb b/research/ohlc_persistance_test.ipynb new file mode 100644 index 0000000..2d6e48b --- /dev/null +++ b/research/ohlc_persistance_test.ipynb @@ -0,0 +1,252 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test persistance\n", + "\n", + "ohlcv and trades persistence with bar type and trade filtering and minsize support\n", + "\n", + "```\n", + "/OHLCV/\n", + " ├── {bar_type}/ (1s)\n", + " │ ├── {resolution}/\n", + " │ │ ├── {filtered_trades}-{min_trade_size}/\n", + " │ │ │ ├── {day}/\n", + " │ │ │ │ └── hashedname.parquet\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from v2realbot.tools.loadbatch import load_batch\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n", + "import v2realbot.utils.config_handler as cfh\n", + "init_notebook_mode(all_interactive=True)\n", + "from v2realbot.enums.enums import BarType\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "#LOAD FROM BATCH\n", + "# res, df = load_batch(batch_id=\"f1ac6651\", #138170bc 0fb5043a bde6d0be f1ac6651\n", + "# space_resolution_evenly=False,\n", + "# indicators_columns=[\"Rsi14\"],\n", + "# main_session_only=True,\n", + "# verbose = False)\n", + "# if res < 0:\n", + "# print(\"Error\" + str(res) + str(df))\n", + "# df = df[\"bars\"]\n", + "\n", + "# basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": df}), tz_convert=zoneNY)\n", + "# #m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "# basic_data = basic_data.transform(lambda df: df.between_time('09:30', '16:00'))\n", + "# #basic_data.info()\n", + "\n", + "#LOAD FROM PARQUET\n", + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "print('\\n'.join(map(str, files)))\n", + "file_name = \"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\"\n", + "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", + "\n", + "#filter ohlcv_df to certain date range (assuming datetime index)\n", + "#ohlcv_df = ohlcv_df.loc[\"2024-05-14 09:30\":\"2024-05-15 09:35\"]\n", + "\n", + "#add vwap column to ohlcv_df\n", + "#ohlcv_df[\"hlcc4\"] = (ohlcv_df[\"close\"] + ohlcv_df[\"high\"] + ohlcv_df[\"low\"] + ohlcv_df[\"close\"]) / 4\n", + "\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": ohlcv_df}), tz_convert=zoneNY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#basic_data.data[\"BAC\"].info()\n", + "#ohlcv_df group by week number of rows\n", + "# ohlcv_df['close'].groupby(pd.Grouper(freq='ME')).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#trade filtering\n", + "exclude_conditions = cfh.config_handler.get_val('AGG_EXCLUDED_TRADES') #standard ['C','O','4','B','7','V','P','W','U','Z','F']\n", + "minsize = 100\n", + "exclude_conditions_str = ''.join(exclude_conditions)\n", + "exclude_conditions_str" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basic_data.data[\"BAC\"].info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Aim is to store\n", + "OHLCV grouped by symbol, day, resolution\n", + "and \n", + "bar type\n", + "excluded_conditions\n", + "minsize\n", + "main session" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bartype= BarType.TIME\n", + "resolution = \"1s\"\n", + "trade_filter = exclude_conditions_str+\"-\"+str(minsize)\n", + "dir = \"/OHLCV/\"+bartype+\"/\"+resolution+\"/\"+trade_filter+\"/\"\n", + "#dir = DATA_DIR + dir\n", + "basic_data.to_parquet(partition_by=\"day\", keep_groupby_names=False, path_or_buf=dir, mkdir_kwargs=dict(mkdir=True)) \n", + "#partition_by=\"day\",\n", + "\n", + "#naloaduje partitionvana 1s data skrz 90 dni za 2s\n", + "#day_data = vbt.ParquetData.pull(\"BAC\", paths=dir, filters=[(\"group\", \">\", \"2024-01-02\"),(\"group\", \"<=\", \"2024-01-09\")]) #, \n", + "# day_data[\"2024-05-01\":\"2024-05-14\"].get()\n", + "\n", + "# day_data.data[\"BAC\"].info()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#naloaduje partitionvana 1s data skrz 90 dni za 2s\n", + "day_data = vbt.ParquetData.pull(\"BAC\", paths=dir, filters=[(\"group\", \">=\", \"2024-01-02\"),(\"group\", \"<=\", \"2024-01-09\")]) #, \n", + "# day_data[\"2024-05-01\":\"2024-05-14\"].get()\n", + "\n", + "day_data.data[\"BAC\"].info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = basic_data.close\n", + "#group by close by day, using pandas grouper\n", + "#close.groupby(pd.Grouper(freq='ME')).mean()\n", + "\n", + "#using Grouper of vectorbtpro\n", + "#close.vbt.group_by(pd.Grouper(freq='ME')).mean()\n", + "\n", + "#basic_data.wrapper.get_columns()\n", + "basic_data.wrapper.get_freq()\n", + "# vbt.pdir(basic_data.wrapper)\n", + "# basic_data.wrapper\n", + "basic_data.wrapper.grouper.is_grouped()\n", + "\n", + "vbt.pdir(basic_data.wrapper.grouper)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "grouper = basic_data.wrapper.index.vbt.get_grouper(\"ME\")\n", + "\n", + "for group, group_idx in grouper:\n", + " print(group, group_idx)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#prevede 1milion dat (6mes 1s) na dict za 10ss\n", + "df = day_data.data[\"BAC\"]\n", + "df_dict = df.to_dict(orient='list')\n", + "\n", + "# Convert the index (which is the time) to a list of float timestamps\n", + "df_dict['time'] = [timestamp.timestamp() for timestamp in df.index]\n", + "\n", + "df_dict" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/prepare_aggregatied_data.ipynb b/research/prepare_aggregatied_data.ipynb new file mode 100644 index 0000000..4a20f11 --- /dev/null +++ b/research/prepare_aggregatied_data.ipynb @@ -0,0 +1,458 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Loading trades and vectorized aggregation\n", + "This notebook fetches the trades from remote or local cache and aggregates them to bars of given type (time, volume, dollar) and resolution\n", + "\n", + "`fetch_trades_parallel` enables to fetch trades of given symbol and interval, also can filter conditions and minimum size. return `trades_df`\n", + "`aggregate_trades` acceptss `trades_df` and ressolution and type of bars (VOLUME, TIME, DOLLAR) and return aggregated ohlcv dataframe `ohlcv_df`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import load_dotenv\n", + "\n", + "#as V2realbot is client , load env variables here\n", + "env_file = \"/Users/davidbrazda/Documents/Development/python/.env\"\n", + "# Load the .env file\n", + "load_dotenv(env_file)\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "from numba import jit\n", + "from alpaca.data.historical import StockHistoricalDataClient\n", + "from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n", + "from alpaca.data.requests import StockTradesRequest\n", + "from v2realbot.enums.enums import BarType\n", + "import time\n", + "from datetime import datetime\n", + "from v2realbot.utils.utils import parse_alpaca_timestamp, ltp, zoneNY, send_to_telegram, fetch_calendar_data\n", + "import pyarrow\n", + "from v2realbot.loader.aggregator_vectorized import fetch_daily_stock_trades, fetch_trades_parallel, generate_time_bars_nb, aggregate_trades\n", + "import vectorbtpro as vbt\n", + "import v2realbot.utils.config_handler as cfh\n", + "from appdirs import user_data_dir\n", + "from pathlib import Path\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 20) # Number of rows per page\n", + "# pd.set_option('display.float_format', '{:.9f}'.format)\n", + "\n", + "\n", + "#trade filtering\n", + "exclude_conditions = cfh.config_handler.get_val('AGG_EXCLUDED_TRADES') #standard ['C','O','4','B','7','V','P','W','U','Z','F']\n", + "minsize = 100\n", + "\n", + "symbol = \"BAC\"\n", + "#datetime in zoneNY \n", + "day_start = datetime(2023, 1, 1, 9, 30, 0)\n", + "day_stop = datetime(2024, 5, 25, 15, 30, 0)\n", + "day_start = zoneNY.localize(day_start)\n", + "day_stop = zoneNY.localize(day_stop)\n", + "#filename of trades_df parquet, date are in isoformat but without time zone part\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "#parquet interval cache contains exclude conditions and minsize filtering\n", + "file_trades = dir + f\"trades_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H_%M_%S')}-{day_stop.strftime('%Y-%m-%dT%H_%M_%S')}-{''.join(exclude_conditions)}-{minsize}.parquet\"\n", + "#file_trades = dir + f\"trades_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}.parquet\"\n", + "file_ohlcv = dir + f\"ohlcv_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H_%M_%S')}-{day_stop.strftime('%Y-%m-%dT%H_%M_%S')}-{''.join(exclude_conditions)}-{minsize}.parquet\"\n", + "print(file_trades)\n", + "print(file_ohlcv)\n", + "#PRINT all parquet in directory\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "for f in files:\n", + " print(f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Either load trades or ohlcv from parquet if exists\n", + "#trades_df = fetch_trades_parallel(symbol, day_start, day_stop, exclude_conditions=exclude_conditions, minsize=minsize, max_workers=30) #exclude_conditions=['C','O','4','B','7','V','P','W','U','Z','F'])\n", + "#trades_df.to_parquet(file_trades, engine='pyarrow', compression='gzip')\n", + "#trades_df.to_parquet(file_trades, engine='pyarrow', compression='gzip')\n", + "#filenames = [dir+\"trades_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet\",dir+\"trades_df-BAC-2024-05-15T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\"]\n", + "trades_df = pd.read_parquet(dir+\"trades_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\",engine='pyarrow')\n", + "#focused = trades_df.loc[\"2024-02-16 11:23:11\":\"2024-02-16 11:24:26\"]\n", + "#focused\n", + "ohlcv_df = aggregate_trades(symbol=symbol, trades_df=trades_df, resolution=1, type=BarType.TIME)\n", + "ohlcv_df.to_parquet(file_ohlcv, engine='pyarrow', compression='gzip')\n", + "\n", + "#ohlcv_df = pd.read_parquet(file_ohlcv,engine='pyarrow')\n", + "# trades_df = pd.read_parquet(file_trades,engine='pyarrow')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "trades_df = None\n", + "ohlcv_df = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#ohlcv_df.info()\n", + "#trades_df.info()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "a = trades_df.loc[(\"BAC\", \"2024-02-16 09:30\"):(\"BAC\",\"2024-02-16 09:32:11\")]\n", + "a" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ohlcv_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#trades_df.info()\n", + "focused = trades_df.loc[(\"BAC\", \"2024-02-16 09:30:00\"):(\"BAC\", \"2024-02-16 10:24:26\")]\n", + "focused" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "trades_df.loc[\"2024-02-16 09:30:00\":\"2024-02-16 10:24:26\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "focohlc = ohlcv_df.loc[\"2024-02-16 09:30:00\":\"2024-02-16 10:24:26\"]\n", + "focohlc\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "focohlc.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#trades_df.to_parquet(dir + \"trades_df-BAC-2024-01-01T09:30:00-2024-05-14T16:00:00-CO4B7VPWUZF-100.parquet\", engine='pyarrow', compression='gzip')\n", + "#trades_df = pd.read_parquet(dir + \"trades_df-BAC-2024-01-01T09:30:00-2024-05-14T16:00:00-CO4B7VPWUZF-100.parquet\",engine='pyarrow')\n", + "\n", + "#trades_df.to_parquet(file_trades, engine='pyarrow', compression='gzip')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "trades_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "file_trades" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "file_name = \"\"\n", + "ohlcv_df = pd.read_parquet(file_ohlcv,engine='pyarrow')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ohlcv_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "# Calculate daily returns\n", + "ohlcv_df['returns'] = ohlcv_df['close'].pct_change().dropna()\n", + "#same as above but pct_change is from 3 datapoints back, but only if it is the same date, else na\n", + "\n", + "\n", + "# Plot the probability distribution curve\n", + "plt.figure(figsize=(10, 6))\n", + "sns.histplot(df['returns'].dropna(), kde=True, stat='probability', bins=30)\n", + "plt.title('Probability Distribution of Daily Returns')\n", + "plt.xlabel('Daily Returns')\n", + "plt.ylabel('Probability')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "# Define the intervals from 5 to 20 s, returns for each interval\n", + "#maybe use rolling window?\n", + "intervals = range(5, 21, 5)\n", + "\n", + "# Create columns for percentage returns\n", + "rolling_window = 50\n", + "\n", + "# Normalize the returns using rolling mean and std\n", + "for N in intervals:\n", + " column_name = f'returns_{N}'\n", + " rolling_mean = ohlcv_df[column_name].rolling(window=rolling_window).mean()\n", + " rolling_std = ohlcv_df[column_name].rolling(window=rolling_window).std()\n", + " ohlcv_df[f'norm_{column_name}'] = (ohlcv_df[column_name] - rolling_mean) / rolling_std\n", + "\n", + "# Display the dataframe with normalized return columns\n", + "ohlcv_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate the sum of the normalized return columns for each row\n", + "ohlcv_df['sum_norm_returns'] = ohlcv_df[[f'norm_returns_{N}' for N in intervals]].sum(axis=1)\n", + "\n", + "# Sort the DataFrame based on the sum of normalized returns in descending order\n", + "df_sorted = ohlcv_df.sort_values(by='sum_norm_returns', ascending=False)\n", + "\n", + "# Display the top rows with the highest sum of normalized returns\n", + "df_sorted\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Drop initial rows with NaN values due to pct_change\n", + "ohlcv_df.dropna(inplace=True)\n", + "\n", + "# Plotting the probability distribution curves\n", + "plt.figure(figsize=(14, 8))\n", + "for N in intervals:\n", + " sns.kdeplot(ohlcv_df[f'returns_{N}'].dropna(), label=f'Returns {N}', fill=True)\n", + "\n", + "plt.title('Probability Distribution of Percentage Returns')\n", + "plt.xlabel('Percentage Return')\n", + "plt.ylabel('Density')\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "# Plot the probability distribution curve\n", + "plt.figure(figsize=(10, 6))\n", + "sns.histplot(ohlcv_df['returns'].dropna(), kde=True, stat='probability', bins=30)\n", + "plt.title('Probability Distribution of Daily Returns')\n", + "plt.xlabel('Daily Returns')\n", + "plt.ylabel('Probability')\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#show only rows from ohlcv_df where returns > 0.005\n", + "ohlcv_df[ohlcv_df['returns'] > 0.0005]\n", + "\n", + "#ohlcv_df[ohlcv_df['returns'] < -0.005]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#ohlcv where index = date 2024-03-13 and between hour 12\n", + "\n", + "a = ohlcv_df.loc['2024-03-13 12:00:00':'2024-03-13 13:00:00']\n", + "a" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ohlcv_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "trades_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ohlcv_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "trades_df.to_parquet(\"trades_df-spy-0111-0111.parquett\", engine='pyarrow', compression='gzip')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "trades_df.to_parquet(\"trades_df-spy-111-0516.parquett\", engine='pyarrow', compression='gzip', allow_truncated_timestamps=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ohlcv_df.to_parquet(\"ohlcv_df-spy-111-0516.parquett\", engine='pyarrow', compression='gzip')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basic_data = vbt.Data.from_data(vbt.symbol_dict({symbol: ohlcv_df}), tz_convert=zoneNY)\n", + "vbt.settings['plotting']['auto_rangebreaks'] = True\n", + "basic_data.ohlcv.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#access just BCA\n", + "#df_filtered = df.loc[\"BAC\"]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/rsi_alpaca.ipynb b/research/rsi_alpaca.ipynb new file mode 100644 index 0000000..87d1dfe --- /dev/null +++ b/research/rsi_alpaca.ipynb @@ -0,0 +1,467 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from v2realbot.tools.loadbatch import load_batch\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "from datetime import timedelta, datetime\n", + "import vectorbtpro as vbt\n", + "import os\n", + "from itables import init_notebook_mode, show\n", + "\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "# Alpaca API credentials\n", + "ALPACA_API_KEY = os.environ.get('ACCOUNT1_PAPER_API_KEY')\n", + "ALPACA_API_SECRET = os.environ.get('ACCOUNT1_PAPER_SECRET_KEY')\n", + "\n", + "# Initialize Alpaca data client\n", + "alpaca_data = vbt.AlpacaData.set_custom_settings(client_config=dict(\n", + " api_key=ALPACA_API_KEY,\n", + " secret_key=ALPACA_API_SECRET\n", + " )\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fetch Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the symbol, start, and end dates for your data\n", + "symbol = 'BAC'\n", + "start_date = datetime.now() - timedelta(days=10) # Last 30 days\n", + "end_date = datetime.now() - timedelta(days=1) # yesterday\n", + "time_interval = '1T' # 1-minute intervals '1 minute'\n", + "\n", + "basic_data = vbt.AlpacaData.pull([\"BAC\"], start=start_date, end=end_date, timeframe=time_interval, tz=\"America/New_York\")\n", + "basic_data = basic_data.transform(lambda x: x.between_time(\"9:30\",\"16:00\"))\n", + "#basic_data.data[\"BAC\"].vbt.ohlcv.plot()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# filter dates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#filter na dny\n", + "# dates_of_interest = pd.to_datetime(['2024-04-22', '2024-04-23']).tz_localize('US/Eastern')\n", + "# filtered_df = df.loc[df.index.normalize().isin(dates_of_interest)]\n", + "\n", + "# df = filtered_df\n", + "# df.info()\n", + "\n", + "basic_data.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "\n", + "m1_data.data[\"BAC\"]\n", + "#m5_data = m1_data.resample(\"5T\")\n", + "\n", + "#m5_data.data[\"BAC\"].head(10)\n", + "\n", + "m15_data = m1_data.resample(\"15T\")\n", + "\n", + "m15_data = m15_data.transform(lambda x: x.between_time(\"9:30\",\"15:59\"))\n", + "\n", + "m15 = m15_data.data[\"BAC\"]\n", + "\n", + "m15.vbt.ohlcv.plot()\n", + "\n", + "m15\n", + "\n", + "# m1_data.wrapper.index\n", + "\n", + "# m1_resampler = m1_data.wrapper.get_resampler(\"1T\")\n", + "# m1_resampler.index_difference(reverse=True)\n", + "\n", + "\n", + "# m5_resampler.prettify()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Calculate VWAP" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#WWAP\n", + "vbt.phelp(vbt.VWAP.run)\n", + "close = m1_data.close\n", + "high = m1_data.high\n", + "low = m1_data.low\n", + "volume = m1_data.volume\n", + "vwapD = vbt.VWAP.run(high, low, close, volume, anchor=\"D\")\n", + "# vwapT = vbt.VWAP.run(high, low, close, volume, anchor=\"T\")\n", + "\n", + "#vwap.vwap\n", + "\n", + "fig = m1_data.data[\"BAC\"].vbt.ohlcv.plot()\n", + "vwapD.vwap.vbt.plot(fig=fig)\n", + "#vwapT.vwap.vbt.plot(fig=fig)\n", + "fig.show()\n", + "\n", + "vwapD.vwap\n", + "\n", + "#vwap = vbt.VWAP.run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m1_data.data[\"BAC\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# defining ENTRY WINDOW and forced EXIT window" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#m1_data.data[\"BAC\"].info()\n", + "import datetime\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 350\n", + "\n", + "forced_exit_start = 360\n", + "forced_exit_end = 390\n", + "\n", + "forced_exit = m1_data.symbol_wrapper.fill(False)\n", + "entry_window_open= m1_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "\n", + "#entry_window_open.info()\n", + "# forced_exit.tail(100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = m1_data.close\n", + "\n", + "rsi = vbt.RSI.run(close, window=14)\n", + "\n", + "long_entries = (rsi.rsi.vbt.crossed_below(28) & entry_window_open)\n", + "long_exits = (rsi.rsi.vbt.crossed_above(70) | forced_exit)\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "#long_entries.value_counts()\n", + "long_exits.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_rsi(rsi, close, entries, exits):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"RSI\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " rsi.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " entries.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=True)) \n", + " exits.vbt.signals.plot_as_exits(close, fig=fig, add_trace_kwargs=dict(secondary_y=True)) \n", + " return fig\n", + "\n", + "plot_rsi(rsi, close, long_entries, long_exits)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rsi.rsi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.phelp(vbt.Portfolio.from_signals)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sl_stop = np.arange(0.03/100, 0.4/100, 0.05/100).tolist()\n", + "# Using the round function\n", + "sl_stop = [round(val, 4) for val in sl_stop]\n", + "print(sl_stop)\n", + "sl_stop = vbt.Param(sl_stop) #np.nan mean s no stoploss\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=close, entries=long_entries, exits=long_exits, sl_stop=sl_stop, tp_stop = sl_stop, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0003,0.0018)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[0.03].plot_trade_signals()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# pristup k pf jako multi index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#pf[0.03].plot()\n", + "#pf.order_records\n", + "pf[(0.03)].stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#zgrupovane statistiky\n", + "stats_df = pf.stats([\n", + " 'total_return',\n", + " 'total_trades',\n", + " 'win_rate',\n", + " 'expectancy'\n", + "], agg_func=None)\n", + "stats_df\n", + "\n", + "\n", + "stats_df.nlargest(10, 'Total Return [%]')\n", + "#stats_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0011,0.0013000000000000002)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pandas.tseries.offsets import DateOffset\n", + "\n", + "temp_data = basic_data['2024-4-22']\n", + "temp_data\n", + "res1m = temp_data[[\"Open\", \"High\", \"Low\", \"Close\", \"Volume\"]]\n", + "\n", + "# Define a custom date offset that starts at 9:30 AM and spans 4 hours\n", + "custom_offset = DateOffset(hours=4, minutes=30)\n", + "\n", + "# res1m = res1m.get().resample(\"4H\").agg({ \n", + "# \"Open\": \"first\",\n", + "# \"High\": \"max\",\n", + "# \"Low\": \"min\",\n", + "# \"Close\": \"last\",\n", + "# \"Volume\": \"sum\"\n", + "# })\n", + "\n", + "res4h = res1m.resample(\"1h\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res4h.data\n", + "\n", + "res15m = res1m.resample(\"15T\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res15m.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@vbt.njit\n", + "def long_entry_place_func_nb(c, low, close, time_in_ns, rsi14, window_open, window_close):\n", + " market_open_minutes = 570 # 9 hours * 60 minutes + 30 minutes\n", + "\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + "\n", + " current_minutes = vbt.dt_nb.hour_nb(time_in_ns[i]) * 60 + vbt.dt_nb.minute_nb(time_in_ns[i])\n", + " #print(\"current_minutes\", current_minutes)\n", + " # Calculate elapsed minutes since market open at 9:30 AM\n", + " elapsed_from_open = current_minutes - market_open_minutes\n", + " elapsed_from_open = elapsed_from_open if elapsed_from_open >= 0 else 0\n", + " #print( \"elapsed_from_open\", elapsed_from_open)\n", + "\n", + " #elapsed_from_open = elapsed_minutes_from_open_nb(time_in_ns) \n", + " in_window = elapsed_from_open > window_open and elapsed_from_open < window_close\n", + " #print(\"in_window\", in_window)\n", + " # if in_window:\n", + " # print(\"in window\")\n", + "\n", + " if in_window and rsi14[i] > 60: # and low[i, c.col] <= hit_price: # and hour == 9: # (4)!\n", + " return out_i\n", + " return -1\n", + "\n", + "@vbt.njit\n", + "def long_exit_place_func_nb(c, high, close, time_index, tp, sl): # (5)!\n", + " entry_i = c.from_i - c.wait\n", + " entry_price = close[entry_i, c.col]\n", + " hit_price = entry_price * (1 + tp)\n", + " stop_price = entry_price * (1 - sl)\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " last_bar_of_day = vbt.dt_nb.day_changed_nb(time_index[i], time_index[i + 1])\n", + "\n", + " #print(next_day)\n", + " if last_bar_of_day: #pokud je dalsi next day, tak zavirame posledni\n", + " print(\"ted\",out_i)\n", + " return out_i\n", + " if close[i, c.col] >= hit_price or close[i, c.col] <= stop_price :\n", + " return out_i\n", + " return -1\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(np.random.random(size=(5, 10)), columns=list('abcdefghij'))\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.sum()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/strat1/strat1_v1_MULTI.ipynb b/research/strat1/strat1_v1_MULTI.ipynb new file mode 100644 index 0000000..d1cc970 --- /dev/null +++ b/research/strat1/strat1_v1_MULTI.ipynb @@ -0,0 +1,949 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from v2realbot.tools.loadbatch import load_batch\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n", + "\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "#LOAD FROM BATCH\n", + "# res, df = load_batch(batch_id=\"0fb5043a\", #0fb5043a bde6d0be\n", + "# space_resolution_evenly=False,\n", + "# indicators_columns=[\"Rsi14\"],\n", + "# main_session_only=True,\n", + "# verbose = False)\n", + "# if res < 0:\n", + "# print(\"Error\" + str(res) + str(df))\n", + "# df = df[\"bars\"]\n", + "\n", + "# #df\n", + "\n", + "# basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": df}), tz_convert=zoneNY)\n", + "# #m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "# basic_data = basic_data.transform(lambda df: df.between_time('09:30', '16:00'))\n", + "\n", + "#LOAD FROM PARQUET\n", + "#LOAD FROM PARQUET\n", + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "#print('\\n'.join(map(str, files)))\n", + "file_name = \"ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\"\n", + "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"SPY\": ohlcv_df}), tz_convert=zoneNY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#parameters (primary y line, secondary y line, close)\n", + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig\n", + "\n", + "# close = basic_data.xloc[\"09:30\":\"10:00\"].close" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#PIPELINE - FOR - LOOP\n", + "\n", + "#indicator parameters\n", + "mom_timeperiod = list(range(2, 12))\n", + "\n", + "#uzavreni okna od 1 do 200\n", + "#entry_window_closes = list(range(2, 50, 3))\n", + "entry_window_closes = [5, 10, 30, 45]\n", + "#entry_window_closes = 30\n", + "#threshold entries parameters\n", + "#long\n", + "mom_th = np.round(np.arange(0.01, 0.5 + 0.02, 0.02),4).tolist()#-0.02\n", + "# short\n", + "#mom_th = np.round(np.arange(-0.01, -0.3 - 0.02, -0.02),4).tolist()#-0.02\n", + "roc_th = np.round(np.arange(-0.2, -0.8 - 0.05, -0.05),4).tolist()#-0.2\n", + "#print(mom_th, roc_th)\n", + "\n", + "#portfolio simulation parameters\n", + "sl_stop =np.round(np.arange(0.02/100, 0.7/100, 0.05/100),4).tolist()\n", + "tp_stop = np.round(np.arange(0.02/100, 0.7/100, 0.05/100),4).tolist()\n", + "\n", + "combs = list(product(mom_timeperiod, mom_th, roc_th, sl_stop, tp_stop))\n", + "\n", + "@vbt.parameterized(merge_func = \"concat\", random_subset = 2000, show_progress=True) \n", + "def test_strat(entry_window_closes=60,\n", + " mom_timeperiod=2,\n", + " mom_th=-0.04,\n", + " #roc_th=-0.2,\n", + " sl_stop=0.19/100,\n", + " tp_stop=0.19/100):\n", + " # mom_timeperiod=2\n", + " # mom_th=-0.06\n", + " # roc_th=-0.2\n", + " # sl_stop=0.04/100\n", + " # tp_stop=0.04/100\n", + "\n", + " momshort = vbt.indicator(\"talib:MOM\").run(basic_data.close, timeperiod=mom_timeperiod, short_name = \"slope_short\")\n", + " rocp = vbt.indicator(\"talib:ROC\").run(basic_data.close, short_name = \"rocp\")\n", + " #rate of change + momentum\n", + "\n", + " #momshort.plot rocp.real_crossed_below(roc_th) & \n", + " #short_signal = momshort.real_crossed_below(mom_th)\n", + " long_signal = momshort.real_crossed_above(mom_th)\n", + " # print(\"short signal\")\n", + " # print(short_signal.value_counts())\n", + "\n", + " #forced_exit = pd.Series(False, index=close.index)\n", + " forced_exit = basic_data.symbol_wrapper.fill(False)\n", + " #entry_window_open = pd.Series(False, index=close.index)\n", + " entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + " #print(entry_window_closes, \"entry window closes\")\n", + " # Calculate the time difference in minutes from market open for each timestamp\n", + " elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + " entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + " #print(entry_window_open.value_counts())\n", + "\n", + " forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + " #short_entries = (short_signal & entry_window_open)\n", + " #short_exits = forced_exit\n", + " entries = (long_signal & entry_window_open)\n", + " exits = forced_exit\n", + " #long_entries.info()\n", + " #number of trues and falses in long_entries\n", + " #print(short_exits.value_counts())\n", + " #print(short_entries.value_counts())\n", + "\n", + " #fig = plot_2y_close([],[momshort, rocp], close)\n", + " #short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " #print(sl_stop)\n", + " #tsl_th=sl_stop, \n", + " #short_entries=short_entries, short_exits=short_exits,\n", + " pf = vbt.Portfolio.from_signals(close=basic_data.close, entries=entries, exits=exits, tsl_stop=sl_stop, tp_stop = tp_stop, fees=0.0167/100, freq=\"1s\", price=\"close\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + " \n", + " return pf.stats([\n", + " 'total_return',\n", + " 'max_dd', \n", + " 'total_trades', \n", + " 'win_rate', \n", + " 'expectancy'\n", + " ])\n", + "\n", + "pf_results = test_strat(vbt.Param(entry_window_closes),\n", + " vbt.Param(mom_timeperiod),\n", + " vbt.Param(mom_th),\n", + " #vbt.Param(roc_th)\n", + " vbt.Param(sl_stop),\n", + " vbt.Param(tp_stop, condition=\"tp_stop > sl_stop\"))\n", + "pf_results = pf_results.unstack(level=-1)\n", + "pf_results.sort_values(by=[\"Total Return [%]\", \"Max Drawdown [%]\"], ascending=[False, True])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#pf_results.load(\"10tiscomb.pickle\")\n", + "#pf_results.info()\n", + "\n", + "vbt.save(pf_results, \"8tiscomb_tsl.pickle\")\n", + "\n", + "# pf_results = vbt.load(\"8tiscomb_tsl.pickle\")\n", + "# pf_results\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# parallel_coordinates method¶\n", + "\n", + "# attach_px_methods..plot_func(\n", + "# *args,\n", + "# layout=None,\n", + "# **kwargs\n", + "# )\n", + "\n", + "# pf_results.vbt.px.parallel_coordinates() #ocdf\n", + "\n", + "res = pf_results.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf_results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.decomposition import PCA\n", + "from sklearn.preprocessing import StandardScaler\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Assuming pf_results is your DataFrame\n", + "# Convert columns to numeric, assuming NaNs where conversion fails\n", + "metrics = ['Total Return [%]', 'Max Drawdown [%]', 'Total Trades']\n", + "for metric in metrics:\n", + " pf_results[metric] = pd.to_numeric(pf_results[metric], errors='coerce')\n", + "\n", + "# Handle missing values, for example filling with the median\n", + "pf_results['Max Drawdown [%]'].fillna(pf_results['Max Drawdown [%]'].median(), inplace=True)\n", + "\n", + "# Extract the metrics into a new DataFrame\n", + "data_for_pca = pf_results[metrics]\n", + "\n", + "# Standardize the data before applying PCA\n", + "scaler = StandardScaler()\n", + "data_scaled = scaler.fit_transform(data_for_pca)\n", + "\n", + "# Apply PCA\n", + "pca = PCA(n_components=2) # Adjust components as needed\n", + "principal_components = pca.fit_transform(data_scaled)\n", + "\n", + "# Create a DataFrame with the principal components\n", + "pca_results = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])\n", + "\n", + "# Visualize the results\n", + "plt.figure(figsize=(8,6))\n", + "plt.scatter(pca_results['PC1'], pca_results['PC2'], alpha=0.5)\n", + "plt.xlabel('Principal Component 1')\n", + "plt.ylabel('Principal Component 2')\n", + "plt.title('PCA of Strategy Optimization Results')\n", + "plt.grid(True)\n", + "plt.savefig(\"ddd.png\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check if there is any unnamed level and rename it\n", + "if None in df.index.names:\n", + " # Generate new names list replacing None with 'stat'\n", + " new_names = ['stat' if name is None else name for name in df.index.names]\n", + " df.index.set_names(new_names, inplace=True)\n", + "\n", + "rs= df\n", + "\n", + "rs.info()\n", + "\n", + "\n", + "# # Now, 'stat' is the name of the previously unnamed level\n", + "\n", + "# # Filter for 'Total Return' assuming it is a correct identifier in the 'stat' level\n", + "# total_return_series = df.xs('Total Return [%]', level='stat')\n", + "\n", + "# # Sort the Series to get the largest 'Total Return' values\n", + "# sorted_series = total_return_series.sort_values(ascending=False)\n", + "\n", + "# # Print the sorted filtered data\n", + "# sorted_series.head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sorted_series.vbt.save()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#df.info()\n", + "total_return_series = df.xs('Total Return [%]')\n", + "sorted_series = total_return_series.sort_values(ascending=False)\n", + "\n", + "# Display the top N entries, e.g., top 5\n", + "sorted_series.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "comb_stats_df.nlargest(10, 'Total Return [%]')\n", + "#stats_df.info()\n", + "\n", + "\n", + "8\t-0.06\t-0.2\t0.0028\t0.0048\t4.156254\n", + "4 -0.02 -0.25 0.0028 0.0048 0.84433\n", + "3 -0.02 -0.25 0.0033 0.0023 Total Return [%] 0.846753\n", + "#2\t-0.04\t-0.2\t0.0019\t0.0019\n", + "# 2\t-0.04\t-0.2\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.25\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.3\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.35\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.4\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.2\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.25\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.3\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.35\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.4\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basic_data.symbols" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + ">>> def apply_func(ts, entries, exits, fastw, sloww, minp=None):\n", + "... fast_ma = vbt.nb.rolling_mean_nb(ts, fastw, minp=minp)\n", + "... slow_ma = vbt.nb.rolling_mean_nb(ts, sloww, minp=minp)\n", + "... entries[:] = vbt.nb.crossed_above_nb(fast_ma, slow_ma) \n", + "... exits[:] = vbt.nb.crossed_above_nb(slow_ma, fast_ma)\n", + "... return (fast_ma, slow_ma) \n", + "\n", + ">>> CrossSig = vbt.IF(\n", + "... class_name=\"CrossSig\",\n", + "... input_names=['ts'],\n", + "... in_output_names=['entries', 'exits'],\n", + "... param_names=['fastw', 'sloww'],\n", + "... output_names=['fast_ma', 'slow_ma']\n", + "... ).with_apply_func(\n", + "... apply_func,\n", + "... in_output_settings=dict(\n", + "... entries=dict(dtype=np.bool_), #initialize output with bool\n", + "... exits=dict(dtype=np.bool_)\n", + "... )\n", + "... )\n", + ">>> cross_sig = CrossSig.run(ts2, 2, 4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#PIPELINE - parameters in one go\n", + "\n", + "\n", + "#TOTO prepsat do FOR-LOOPu\n", + "\n", + "\n", + "#indicator parameters\n", + "mom_timeperiod = list(range(2, 6))\n", + "\n", + "#threshold entries parameters\n", + "mom_th = np.round(np.arange(-0.02, -0.1 - 0.02, -0.02),4).tolist()#-0.02\n", + "roc_th = np.round(np.arange(-0.2, -0.4 - 0.05, -0.05),4).tolist()#-0.2\n", + "#print(mom_th, roc_th)\n", + "#jejich product\n", + "# mom_th_prod, roc_th_prod = zip(*product(mom_th, roc_th))\n", + "\n", + "# #convert threshold to vbt param\n", + "# mom_th_index = vbt.Param(mom_th_prod, name='mom_th_th') \n", + "# roc_th_index = vbt.Param(roc_th_prod, name='roc_th_th')\n", + "\n", + "mom_th = vbt.Param(mom_th, name='mom_th')\n", + "roc_th = vbt.Param(roc_th, name='roc_th')\n", + "\n", + "#portfolio simulation parameters\n", + "sl_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "sl_stop = [round(val, 4) for val in sl_stop]\n", + "tp_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "tp_stop = [round(val, 4) for val in tp_stop]\n", + "sl_stop = vbt.Param(sl_stop) #np.nan mean s no stoploss\n", + "tp_stop = vbt.Param(tp_stop) #np.nan mean s no stoploss\n", + "\n", + "\n", + "#def test_mom(window=14, mom_th=0.2, roc_th=0.2, sl_stop=0.03/100, tp_stop=0.03/100):\n", + "#close = basic_data.xloc[\"09:30\":\"10:00\"].close\n", + "momshort = vbt.indicator(\"talib:MOM\").run(basic_data.get(\"Close\"), timeperiod=mom_timeperiod, short_name = \"slope_short\")\n", + "\n", + "#ht_trendline = vbt.indicator(\"talib:HT_TRENDLINE\").run(close, short_name = \"httrendline\")\n", + "rocp = vbt.indicator(\"talib:ROC\").run(basic_data.get(\"Close\"), short_name = \"rocp\")\n", + "#rate of change + momentum\n", + "\n", + "rocp_signal = rocp.real_crossed_below(mom_th)\n", + "mom_signal = momshort.real_crossed_below(roc_th)\n", + "\n", + "#mom_signal\n", + "print(rocp_signal.info())\n", + "print(mom_signal.info())\n", + "#print(rocp.real)\n", + "\n", + "\n", + "short_signal = (mom_signal.vbt & rocp_signal)\n", + "\n", + "# #short_signal = (rocp.real_crossed_below(roc_th_index) & momshort.real_crossed_below(mom_th_index))\n", + "# forced_exit = m1_data.symbol_wrapper.fill(False)\n", + "# entry_window_open= m1_data.symbol_wrapper.fill(False)\n", + "\n", + "\n", + "# # Calculate the time difference in minutes from market open for each timestamp\n", + "# elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "# entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "# forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "# short_entries = (short_signal & entry_window_open)\n", + "# short_exits = forced_exit\n", + "# #long_entries.info()\n", + "# #number of trues and falses in long_entries\n", + "# #short_exits.value_counts()\n", + "# #short_entries.value_counts()\n", + "\n", + "\n", + "# pf = vbt.Portfolio.from_signals(close=close, short_entries=short_entries, short_exits=short_exits, sl_stop=sl_stop, tp_stop = tp_stop, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# filter dates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#filter na dny\n", + "dates_of_interest = pd.to_datetime(['2024-04-22']).tz_localize('US/Eastern')\n", + "filtered_df = df.loc[df.index.normalize().isin(dates_of_interest)]\n", + "\n", + "df = filtered_df\n", + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import plotly.io as pio\n", + "# pio.renderers.default = 'notebook'\n", + "\n", + "#naloadujeme do vbt symbol as column\n", + "basic_data = vbt.Data.from_data({\"BAC\": df}, tz_convert=zoneNY)\n", + "\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "#basic_data.data[\"BAC\"].vbt.ohlcv.plot()\n", + "\n", + "#basic_data.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "\n", + "m1_data.data[\"BAC\"]\n", + "#m5_data = m1_data.resample(\"5T\")\n", + "\n", + "#m5_data.data[\"BAC\"].head(10)\n", + "\n", + "# m15_data = m1_data.resample(\"15T\")\n", + "\n", + "# m15 = m15_data.data[\"BAC\"]\n", + "\n", + "# m15.vbt.ohlcv.plot()\n", + "\n", + "# m1_data.wrapper.index\n", + "\n", + "# m1_resampler = m1_data.wrapper.get_resampler(\"1T\")\n", + "# m1_resampler.index_difference(reverse=True)\n", + "\n", + "\n", + "# m5_resampler.prettify()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MOM indicator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.phelp(vbt.indicator(\"talib:ROCP\").run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "vyuzití rychleho klesani na sekundove urovni behem open rush\n", + "- MOM + ROC during open rush\n", + "- short signal\n", + "- pipeline kombinace thresholdu pro vstup mom_th, roc_th + hodnota sl_stop a tp_stop (pripadne trailing) - nalezeni optimalni kombinace atributu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# fig = plot_2y_close([ht_trendline],[momshort, rocp], close)\n", + "# short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + "\n", + "#parameters (primary y line, secondary y line, close)\n", + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig\n", + "\n", + "close = m1_data.xloc[\"09:30\":\"10:00\"].close\n", + "momshort = vbt.indicator(\"talib:MOM\").run(close, timeperiod=3, short_name = \"slope_short\")\n", + "ht_trendline = vbt.indicator(\"talib:HT_TRENDLINE\").run(close, short_name = \"httrendline\")\n", + "rocp = vbt.indicator(\"talib:ROC\").run(close, short_name = \"rocp\")\n", + "#rate of change + momentum\n", + "short_signal = (rocp.real_crossed_below(-0.2) & momshort.real_crossed_below(-0.02))\n", + "#indlong = vbt.indicator(\"talib:MOM\").run(close, timeperiod=10, short_name = \"slope_long\")\n", + "fig = plot_2y_close([ht_trendline],[momshort, rocp], close)\n", + "short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = m1_data.close\n", + "#vbt.phelp(vbt.OLS.run)\n", + "\n", + "#oer steepmnes of regression line\n", + "#talib.LINEARREG_SLOPE(close, timeperiod=timeperiod)\n", + "#a také ON BALANCE VOLUME - http://5.161.179.223:8000/static/js/vbt/api/indicators/custom/obv/index.html\n", + "\n", + "\n", + "\n", + "mom_ind = vbt.indicator(\"talib:MOM\") \n", + "#vbt.phelp(mom_ind.run)\n", + "\n", + "mom = mom_ind.run(close, timeperiod=10)\n", + "\n", + "plot_2y_close(mom, close)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# defining ENTRY WINDOW and forced EXIT window" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#m1_data.data[\"BAC\"].info()\n", + "import datetime\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 2\n", + "entry_window_closes = 30\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "forced_exit = m1_data.symbol_wrapper.fill(False)\n", + "entry_window_open= m1_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "\n", + "#entry_window_open.info()\n", + "# forced_exit.tail(100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = m1_data.close\n", + "\n", + "#rsi = vbt.RSI.run(close, window=14)\n", + "\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "#short_exits.value_counts()\n", + "short_entries.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_rsi(close, entries, exits):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"RSI\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " #rsi.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " entries.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + " exits.vbt.signals.plot_as_exits(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + " return fig\n", + "\n", + "plot_rsi(close, short_entries, short_exits)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.phelp(vbt.Portfolio.from_signals)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sl_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "sl_stop = [round(val, 4) for val in sl_stop]\n", + "print(sl_stop)\n", + "sl_stop = vbt.Param(sl_stop) #np.nan mean s no stoploss\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=close, short_entries=short_entries, short_exits=short_exits, sl_stop=0.03/100, tp_stop = 0.03/100, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "#pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#list of orders\n", + "#pf.orders.records_readable\n", + "#pf.orders.plots()\n", + "#pf.stats()\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0015,0.0013)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[0.03].plot_trade_signals()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# pristup k pf jako multi index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#pf[0.03].plot()\n", + "#pf.order_records\n", + "pf[(0.03)].stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#zgrupovane statistiky\n", + "stats_df = pf.stats([\n", + " 'total_return',\n", + " 'total_trades',\n", + " 'win_rate',\n", + " 'expectancy'\n", + "], agg_func=None)\n", + "stats_df\n", + "\n", + "\n", + "stats_df.nlargest(10, 'Total Return [%]')\n", + "#stats_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0011,0.0013000000000000002)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pandas.tseries.offsets import DateOffset\n", + "\n", + "temp_data = basic_data['2024-4-22']\n", + "temp_data\n", + "res1m = temp_data[[\"Open\", \"High\", \"Low\", \"Close\", \"Volume\"]]\n", + "\n", + "# Define a custom date offset that starts at 9:30 AM and spans 4 hours\n", + "custom_offset = DateOffset(hours=4, minutes=30)\n", + "\n", + "# res1m = res1m.get().resample(\"4H\").agg({ \n", + "# \"Open\": \"first\",\n", + "# \"High\": \"max\",\n", + "# \"Low\": \"min\",\n", + "# \"Close\": \"last\",\n", + "# \"Volume\": \"sum\"\n", + "# })\n", + "\n", + "res4h = res1m.resample(\"1h\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res4h.data\n", + "\n", + "res15m = res1m.resample(\"15T\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res15m.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@vbt.njit\n", + "def long_entry_place_func_nb(c, low, close, time_in_ns, rsi14, window_open, window_close):\n", + " market_open_minutes = 570 # 9 hours * 60 minutes + 30 minutes\n", + "\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + "\n", + " current_minutes = vbt.dt_nb.hour_nb(time_in_ns[i]) * 60 + vbt.dt_nb.minute_nb(time_in_ns[i])\n", + " #print(\"current_minutes\", current_minutes)\n", + " # Calculate elapsed minutes since market open at 9:30 AM\n", + " elapsed_from_open = current_minutes - market_open_minutes\n", + " elapsed_from_open = elapsed_from_open if elapsed_from_open >= 0 else 0\n", + " #print( \"elapsed_from_open\", elapsed_from_open)\n", + "\n", + " #elapsed_from_open = elapsed_minutes_from_open_nb(time_in_ns) \n", + " in_window = elapsed_from_open > window_open and elapsed_from_open < window_close\n", + " #print(\"in_window\", in_window)\n", + " # if in_window:\n", + " # print(\"in window\")\n", + "\n", + " if in_window and rsi14[i] > 60: # and low[i, c.col] <= hit_price: # and hour == 9: # (4)!\n", + " return out_i\n", + " return -1\n", + "\n", + "@vbt.njit\n", + "def long_exit_place_func_nb(c, high, close, time_index, tp, sl): # (5)!\n", + " entry_i = c.from_i - c.wait\n", + " entry_price = close[entry_i, c.col]\n", + " hit_price = entry_price * (1 + tp)\n", + " stop_price = entry_price * (1 - sl)\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " last_bar_of_day = vbt.dt_nb.day_changed_nb(time_index[i], time_index[i + 1])\n", + "\n", + " #print(next_day)\n", + " if last_bar_of_day: #pokud je dalsi next day, tak zavirame posledni\n", + " print(\"ted\",out_i)\n", + " return out_i\n", + " if close[i, c.col] >= hit_price or close[i, c.col] <= stop_price :\n", + " return out_i\n", + " return -1\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(np.random.random(size=(5, 10)), columns=list('abcdefghij'))\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.sum()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/strat1/strat1_v1_SINGLE.ipynb b/research/strat1/strat1_v1_SINGLE.ipynb new file mode 100644 index 0000000..10d6033 --- /dev/null +++ b/research/strat1/strat1_v1_SINGLE.ipynb @@ -0,0 +1,265 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "celkovy optimalizacni backtest na vetsim oknu 1 - 300\n", + "a možná take to udělat jako parametr\n", + "zkusit CV\n", + "zobrazit nejak robustnost parametru" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from v2realbot.tools.loadbatch import load_batch\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n", + "\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "vbt.settings.returns.year_freq = \"252 days\" \n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "#LOAD FROM BATCH\n", + "# res, df = load_batch(batch_id=\"f1ac6651\", #138170bc 0fb5043a bde6d0be f1ac6651\n", + "# space_resolution_evenly=False,\n", + "# indicators_columns=[\"Rsi14\"],\n", + "# main_session_only=True,\n", + "# verbose = False)\n", + "# if res < 0:\n", + "# print(\"Error\" + str(res) + str(df))\n", + "# df = df[\"bars\"]\n", + "\n", + "# basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": df}), tz_convert=zoneNY)\n", + "# #m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "# basic_data = basic_data.transform(lambda df: df.between_time('09:30', '16:00'))\n", + "# #basic_data.info()\n", + "\n", + "#LOAD FROM PARQUET\n", + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "print('\\n'.join(map(str, files)))\n", + "file_name = \"ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\"\n", + "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"SPY\": ohlcv_df}), tz_convert=zoneNY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# basic_data.stats()\n", + "\n", + "basic_data.data[\"SPY\"].info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.open_api_ref(vbt.base)\n", + "\n", + "vbt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##na toto udelat crosssvalidationu nebo alespon na testovacim ci jinem obdobi\n", + "#take udelat long leg - tato je shortovaci\n", + "\n", + "#8\t-0.06\t-0.2\t0.0028\t0.0048\t4.156254\n", + "\n", + "#short combination ok for train(4)/test(0.1) (window 1-90, fe 95-100)\n", + "#2,\t-0.02,\t-0.25,\t0.0018,\t0.0068\n", + "\n", + "#dalsi ok hodnota shortu for train/test 4/1\n", + "#70,\t8,\t-0.06,\t-0.2,\t0.0013,\t0.0053\t\n", + "\n", + "\n", + "#kombinace bez roc_th, train/test 7/-1.5\n", + "#70\t7\t-0.07\t0.0033\t0.0063\n", + "\n", + "#opet bez roc_th, train(5.77)/test 0.9 - spolus tsl_stop + tsl_th\n", + "#29\t7\t-0.09\t0.0033\t0.0068\n", + "\n", + "#bez roc_th a s trailing sl train/test 8.1/-0.8 \n", + "#70\t2\t-0.05\t0.0018\t0.0068\n", + "\n", + "\n", + "#SPY - short\n", + "# entry_window_closes\tmom_timeperiod\tmom_th\tsl_stop\ttp_stop\t\t\t\t\t\n", + "# 10\t6\t-0.13\t0.0022\t0.0057\n", + "\n", + "#5\t7\t-0.19\t0.0037\t0.0042\n", + "\n", + "#SPY - long\n", + "#45\t4\t0.27\t0.0047\t0.0067\n", + "\n", + "# TODO:\n", + "#- vyzkouset zda nejvyhodnejsi kombinace krom train/testu funguje i na nasledujicich dnech po trainu\n", + "# -zkusit najit v short datasetu neco vyhodneho co funguji i na testu\n", + "# - dodelat kombinace pro long signaly\n", + "# - zkusit walk forward\n", + "# - vytvorit vysledkove totoznou na v2realbot\n", + "# - podivat se jak detailne funguji tsl_stop a tsl_th\n", + "\n", + "#70,\t4,\t-0.07,\t0.0048,\t0.0068\t\n", + "\n", + "\n", + "entry_window_closes, mom_timeperiod, mom_th, sl_stop, tp_stop = 8,\t3,\t0.07,\t0.0028,\t0.0033\t\n", + "roc_th = 0\n", + "momshort = vbt.indicator(\"talib:MOM\").run(basic_data.close, timeperiod=mom_timeperiod, short_name = \"slope_short\")\n", + "rocp = vbt.indicator(\"talib:ROC\").run(basic_data.close, short_name = \"rocp\")\n", + "#rate of change + momentum\n", + "\n", + "#momshort.plot rocp.real_crossed_below(roc_th) & \n", + "short_signal = momshort.real_crossed_below(mom_th)\n", + "\n", + "long_signal = momshort.real_crossed_above(mom_th)\n", + "\n", + "# print(\"short signal\")\n", + "# print(short_signal.value_counts())\n", + "\n", + "#forced_exit = pd.Series(False, index=close.index)\n", + "forced_exit = basic_data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signal & entry_window_open)\n", + "exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "# print(short_exits.value_counts())\n", + "# print(short_entries.value_counts())\n", + "\n", + "#fig = plot_2y_close([],[momshort, rocp], close)\n", + "#short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + "#print(sl_stop)\n", + "#short_entries=short_entries, short_exits=short_exits,\n", + "pf = vbt.Portfolio.from_signals(close=basic_data, entries=entries, exits=exits, tsl_stop=sl_stop, tp_stop = tp_stop, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf_B = pf.resample(\"30T\")\n", + "\n", + "pf_B.stats()\n", + "pf_B.orders.records_readable\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.get_drawdowns().records_readable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.orders.records_readable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.value.plot().show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/strat_CANDLEGAPS/.ipynb_checkpoints/v2_SINGLE-checkpoint.ipynb b/research/strat_CANDLEGAPS/.ipynb_checkpoints/v2_SINGLE-checkpoint.ipynb new file mode 100644 index 0000000..58a878c --- /dev/null +++ b/research/strat_CANDLEGAPS/.ipynb_checkpoints/v2_SINGLE-checkpoint.ipynb @@ -0,0 +1,1118 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ORDER Imbalance\n", + "\n", + "* introduced buyvolume and sellvolume on bar level.\n", + "* calculated order imbalance ratio (buyvolume-sellvolume/totalvolume)\n", + "* calculated on multiple timeframes\n", + "* entry based on confluences imbalances\n", + "\n", + "## Note\n", + "\n", + "Order disbalance nepodminuje zmenu ceny (tzn. muze byt order disbalance na buy stranu, ale cena nemusi jit nahoru a naopak)\n", + "Nicmene pokud je disbalance delsi a nedochazi ke zmene ceny - může to něco indikovat. \n", + "Vytvořit si kumulativní disbalance - kumulátory, které se budou načítat, když se budou silné disbalance, bez změny ceny. Tento akumulátor se bude nabíjet disbalancí a vybíjet příslušnou změnou ceny." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import load_dotenv\n", + "\n", + "#as V2realbot is client , load env variables here\n", + "env_file = \"/Users/davidbrazda/Documents/Development/python/.env\"\n", + "# Load the .env file\n", + "load_dotenv(env_file)\n", + "\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "# from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "from v2realbot.config import DATA_DIR\n", + "from lightweight_charts import JupyterChart, chart, Panel\n", + "from IPython.display import display\n", + "\n", + "# init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "#LOAD FROM PARQUET\n", + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "print('\\n'.join(map(str, files)))\n", + "file_name = \"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet\"\n", + "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", + "#filter ohlcv_df to certain date range (assuming datetime index)\n", + "#ohlcv_df = ohlcv_df.loc[\"2024-02-12 9:30\":\"2024-02-14 16:00\"]\n", + "\n", + "#add vwap column to ohlcv_df\n", + "#ohlcv_df[\"hlcc4\"] = (ohlcv_df[\"close\"] + ohlcv_df[\"high\"] + ohlcv_df[\"low\"] + ohlcv_df[\"close\"]) / 4\n", + "\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": ohlcv_df}), tz_convert=zoneNY)\n", + "ohlcv_df= None\n", + "basic_data.wrapper.index.normalize().nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add resample function to custom columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro.utils.config import merge_dicts, Config, HybridConfig\n", + "from vectorbtpro import _typing as tp\n", + "from vectorbtpro.generic import nb as generic_nb\n", + "\n", + "_feature_config: tp.ClassVar[Config] = HybridConfig(\n", + " {\n", + " \"buyvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " ),\n", + " \"sellvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " ),\n", + " \"trades\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " )\n", + " }\n", + ")\n", + "\n", + "basic_data._feature_config = _feature_config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basic_data.data[\"BAC\"].info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','trades','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t1data.data[\"BAC\"].info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bbands = vbt.talib(\"BBANDS\").run(\n", + " t1data.get(\"Close\"))\n", + "\n", + "\n", + "supertrend = vbt.SUPERTREND.run(t1data.high, t1data.low, t1data.close, period=14, multiplier=3)\n", + "#supertrend.output_names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "serka = supertrend.direction\n", + "\n", + "#a = serka.vbt.xloc[\"2024-02-12 09:30\":\"2024-02-12 09:32\"].obj\n", + "\n", + "a = serka.vbt.xloc[slice(\"2024-02-12 09:30\",\"2024-02-12 09:32\")].obj\n", + " \n", + "a\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = t1data.data[\"BAC\"]\n", + "\n", + "df.vbt.xloc[\"2024-02-12 09:30\":\"2024-02-12 09:32\"].obj" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "supertrend.trend" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pane1 = Panel(\n", + " ohlcv=(t1data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6), opacity\")]\n", + " right=[#(bbands,), #[(series, name, entries, exits, other_markers)]\n", + " (supertrend.trend,\"STtrend\"),\n", + " (supertrend.long,\"STlong\"),\n", + " (supertrend.short,\"STshort\")\n", + " ],\n", + " left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "pane2 = Panel(\n", + " ohlcv=(t1data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6), opacity\")]\n", + " right=[(bbands,), #[(series, name, entries, exits, other_markers)]\n", + " # (supertrend.trend,\"STtrend\"),\n", + " # (supertrend.long,\"STlong\"),\n", + " # (supertrend.short,\"STshort\")\n", + " ],\n", + " # left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "ch = chart([pane1, pane2], sync=True, size=\"s\", xloc=slice(\"2024-02-12 09:30\",\"2024-03-12\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "buyvolume = t1data.data[\"BAC\"].buyvolume\n", + "sellvolume = t1data.data[\"BAC\"].sellvolume\n", + "totalvolume = buyvolume + sellvolume\n", + "\n", + "#adjust to minimal value to avoid division by zero\n", + "sellvolume_adjusted = sellvolume.replace(0, 1e-10)\n", + "oibratio = buyvolume / sellvolume\n", + "\n", + "#cumulative order flow (net difference)\n", + "cof = buyvolume - sellvolume\n", + "\n", + "# Calculate the order imbalance (net differene) normalize the order imbalance by calculating the difference between buy and sell volumes and then scaling it by the total volume.\n", + "order_imbalance = cof / totalvolume\n", + "order_imbalance = order_imbalance.fillna(0) #nan nahradime 0\n", + "\n", + "order_imbalance_allvolume = cof / t1data.data[\"BAC\"].volume\n", + "\n", + "order_imbalance_sma = vbt.indicator(\"talib:EMA\").run(order_imbalance, timeperiod=5)\n", + "short_signals = order_imbalance.vbt < -0.5\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_signals.value_counts()\n", + "short_signals.name = \"short_entries\"\n", + "#.fillna(False)\n", + "short_exits = short_signals.shift(-2).fillna(False).astype(bool)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pane1 = Panel(\n", + " ohlcv=(t1data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " histogram=[(order_imbalance_allvolume, \"oib_allvolume\", \"rgba(53, 94, 59, 0.6)\",0.5),\n", + " (t1data.data[\"BAC\"].trades, \"trades\",None,0.4),\n", + " ], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n", + " # right=[\n", + " # (supertrend.trend,\"STtrend\"),\n", + " # (supertrend.long,\"STlong\"),\n", + " # (supertrend.short,\"STshort\")\n", + " # ],\n", + " # left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "pane2 = Panel(\n", + " ohlcv=(basic_data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " left=[(basic_data.data[\"BAC\"].trades, \"trades\")],\n", + " histogram=[(basic_data.data[\"BAC\"].trades, \"trades_hist\", \"white\", 0.5)], #\"rgba(53, 94, 59, 0.6)\"\n", + " # ], # [(series, name, \"rgba(53, 94, 59, 0.6)\")]\n", + " # right=[\n", + " # (supertrend.trend,\"STtrend\"),\n", + " # (supertrend.long,\"STlong\"),\n", + " # (supertrend.short,\"STshort\")\n", + " # ],\n", + " # left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "\n", + "ch = chart([pane1, pane2], size=\"m\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##z tohoto si udelat plot funkci (i pro entries,exits)\n", + "#t1data = t1data[[\"open\", \"high\", \"low\", \"close\", \"volume\"]]\n", + "chart = JupyterChart(width=1000, height=600, inner_width=1, inner_height=0.5, leftScale=True)\n", + "#set resolution\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "chart.set(t1data.data[\"BAC\"])\n", + "line_vwap = chart.create_line(name=\"vwap\")#, color=\"blue\")\n", + "line_vwap.set(t1data.vwap)\n", + "\n", + "\n", + "chart.topbar.textbox(\"title\",\"Nadpis\")\n", + "chart2 = chart.create_subchart(position='right', width=1, height=0.5, sync=True, leftScale=True)\n", + "t2data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"5T\")\n", + "t2data = t2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "\n", + "#5min close realigned to 1T\n", + "close_realigned = t2data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line1 = chart.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line1.set(close_realigned)\n", + "\n", + "#sma z realigned dat\n", + "sma_tp = 20\n", + "sma_t2 = vbt.indicator(\"talib:EMA\").run(close_realigned, timeperiod=sma_tp)\n", + "smaline = chart.create_line(name=f\"sma{sma_tp}\")#, color=\"blue\")\n", + "smaline.set(sma_t2)\n", + "\n", + "\n", + "#sma z puvodnich resamplovanych dat plus navic realign, melo by byt stejne \n", + "sma_real = vbt.indicator(\"talib:EMA\").run(t2data.close, timeperiod=sma_tp)\n", + "sma_real_value = sma_real.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "sma_real_value = sma_real_value[sma_real_value.index.dayofweek < 5]\n", + "smaline_real = chart.create_line(name=f\"smareal{sma_tp}\", color=\"yellow\")\n", + "smaline_real.set(sma_real_value)\n", + "\n", + "#resample 15T\n", + "t15data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"15T\")\n", + "t15data = t15data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "#5min close realigned to 1T\n", + "close_15realigned = t15data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_15realigned = close_15realigned[close_15realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line2 = chart.create_line(name=\"15minclose\")#, color=\"pink\")\n", + "line2.set(close_15realigned)\n", + "\n", + "\n", + "chart.legend(True)\n", + "hst = chart2.create_histogram(name=\"buyvolume\", color=\"rgba(53, 94, 59, 0.6)\") #green transparent\n", + "hst1 = chart2.create_histogram(name=\"sellvolume\", color=\"rgba(165, 42, 42, 0.6)\") #red transparent\n", + "hst.set(t1data.data[\"BAC\"])\n", + "hst1.set(t1data.data[\"BAC\"])\n", + "line2 = chart2.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line2.set(close_realigned)\n", + "\n", + "lineoib = chart2.create_line(name=\"oib\", priceScaleId=\"left\") #color=\"violet\", \n", + "#lineoib.scale(0.7,0)\n", + "lineoib.set(order_imbalance_allvolume)\n", + "\n", + "lineoib_sma = chart2.create_line(name=\"oibsma5\", priceScaleId=\"left\") #, color=\"blue\", \n", + "lineoib_sma.set(order_imbalance_sma)\n", + "\n", + "chart.fit()\n", + "chart2.legend(True)\n", + "#\n", + "line2.markers_set(short_signals, \"entries\")\n", + "# TODO jelikoz se davaji do jednoho pole je treba zajistit spravne sortovani\n", + "# domyslet jak to pojmout iterativni doplnovani markeru\n", + "line2.markers_set(short_exits, \"exits\")\n", + "\n", + "\n", + "chart2.fit()\n", + "chart.load()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "short_signals.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#vbt.IF.list_indicators(\"*ma\")\n", + "vbt.phelp(vbt.indicator(\"talib:EMA\").run)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sma = vbt.indicator(\"talib:EMA\").run(t1data.close, timeperiod=20)\n", + "sma.real.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rr = vbt.RSI.run(t1data.close)\n", + "type(rr)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "buyvolume = t1data.data[\"BAC\"].buyvolume\n", + "sellvolume = t1data.data[\"BAC\"].sellvolume\n", + "totalvolume = buyvolume + sellvolume\n", + "\n", + "#adjust to minimal value to avoid division by zero\n", + "sellvolume_adjusted = sellvolume.replace(0, 1e-10)\n", + "oibratio = buyvolume / sellvolume\n", + "\n", + "#cumulative order flow (net difference)\n", + "cof = buyvolume - sellvolume\n", + "\n", + "# Calculate the order imbalance (net differene) normalize the order imbalance by calculating the difference between buy and sell volumes and then scaling it by the total volume.\n", + "order_imbalance = cof / totalvolume\n", + "order_imbalance = order_imbalance.fillna(0) #nan nahradime 0\n", + "\n", + "order_imbalance_allvolume = cof / t1data.data[\"BAC\"].volume" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance_sma = vbt.indicator(\"talib:EMA\").run(order_imbalance, timeperiod=5)\n", + "short_signals = order_imbalance.vbt < -0.5\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_signals.value_counts()\n", + "short_signals.name = \"short_entries\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "short_signals.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "short_signals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance.fillna(0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance.vbt.plot()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chartN = JupyterChart(width=500, height=300, inner_width=1, inner_height=0.3, leftScale=True)\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "chartN.set(t1data.data[\"BAC\"])\n", + "line_sma = chartN.create_line(name=\"sma\", priceScaleId=\"right\")#, color=\"blue\")\n", + "line_sma.set(sma)\n", + "# line_sma.markers_set(short_signals, \"entries\")\n", + "# line_sma.markers_set(short_exits, \"exits\")\n", + "# hst = chartN.create_histogram(name=\"oivol\")\n", + "# hst.set(order_imbalance_allvolume)\n", + "# chartN.legend(True)\n", + "# chartN.fit()\n", + "\n", + "# subchart = chartN.create_subchart(position='right', width=1, height=0.5, sync=False, leftScale=True)\n", + "# # subchart.set(t1data.data[\"BAC\"])\n", + "# line_sma1 = subchart.create_line(name=\"smao\", priceScaleId=\"left\")#, color=\"blue\")\n", + "# line_sma1.set(sma)\n", + "# # line_sma1.markers_set(short_signals, \"entries\")\n", + "# # line_sma1.markers_set(short_exits, \"exits\")\n", + "# hsto = subchart.create_histogram(name=\"oivolo\")\n", + "# hsto.set(order_imbalance_sma)\n", + "\n", + "chart2 = chartN.create_subchart(position='left', width=1, height=0.5, sync=True, leftScale=True, toolbox=True)\n", + "# hst = chart2.create_histogram(name=\"buyvolume\", color=\"rgba(53, 94, 59, 0.6)\") #green transparent\n", + "# hst1 = chart2.create_histogram(name=\"sellvolume\", color=\"rgba(165, 42, 42, 0.6)\") #red transparent\n", + "# hst.set(t1data.data[\"BAC\"])\n", + "# hst1.set(t1data.data[\"BAC\"])\n", + "line2 = chart2.create_line(name=\"sma\")#, color=\"green\")\n", + "line2.set(sma)\n", + "chart2.topbar.textbox(\"title\",\"Nadpis\")\n", + "# chartN.topbar.textbox(\"title\",\"NadpisT\")\n", + "\n", + "# subchart.legend(True)\n", + "# subchart.fit()\n", + "chartN.load()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##z tohoto si udelat plot funkci (i pro entries,exits)\n", + "#t1data = t1data[[\"open\", \"high\", \"low\", \"close\", \"volume\"]]\n", + "chart = JupyterChart(width=1000, height=600, inner_width=1, inner_height=0.5, leftScale=True)\n", + "#set resolution\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "chart.set(t1data.data[\"BAC\"])\n", + "line_vwap = chart.create_line(name=\"vwap\")#, color=\"blue\")\n", + "line_vwap.set(t1data.vwap)\n", + "\n", + "\n", + "chart.topbar.textbox(\"title\",\"Nadpis\")\n", + "chart2 = chart.create_subchart(position='right', width=1, height=0.5, sync=True, leftScale=True)\n", + "t2data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"5T\")\n", + "t2data = t2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "\n", + "#5min close realigned to 1T\n", + "close_realigned = t2data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line1 = chart.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line1.set(close_realigned)\n", + "\n", + "#sma z realigned dat\n", + "sma_tp = 20\n", + "sma_t2 = vbt.indicator(\"talib:EMA\").run(close_realigned, timeperiod=sma_tp)\n", + "smaline = chart.create_line(name=f\"sma{sma_tp}\")#, color=\"blue\")\n", + "smaline.set(sma_t2)\n", + "\n", + "\n", + "#sma z puvodnich resamplovanych dat plus navic realign, melo by byt stejne \n", + "sma_real = vbt.indicator(\"talib:EMA\").run(t2data.close, timeperiod=sma_tp)\n", + "sma_real_value = sma_real.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "sma_real_value = sma_real_value[sma_real_value.index.dayofweek < 5]\n", + "smaline_real = chart.create_line(name=f\"smareal{sma_tp}\", color=\"yellow\")\n", + "smaline_real.set(sma_real_value)\n", + "\n", + "#resample 15T\n", + "t15data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"15T\")\n", + "t15data = t15data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "#5min close realigned to 1T\n", + "close_15realigned = t15data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_15realigned = close_15realigned[close_15realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line2 = chart.create_line(name=\"15minclose\")#, color=\"pink\")\n", + "line2.set(close_15realigned)\n", + "\n", + "\n", + "chart.legend(True)\n", + "hst = chart2.create_histogram(name=\"buyvolume\", color=\"rgba(53, 94, 59, 0.6)\") #green transparent\n", + "hst1 = chart2.create_histogram(name=\"sellvolume\", color=\"rgba(165, 42, 42, 0.6)\") #red transparent\n", + "hst.set(t1data.data[\"BAC\"])\n", + "hst1.set(t1data.data[\"BAC\"])\n", + "line2 = chart2.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line2.set(close_realigned)\n", + "\n", + "lineoib = chart2.create_line(name=\"oib\", priceScaleId=\"left\") #color=\"violet\", \n", + "#lineoib.scale(0.7,0)\n", + "lineoib.set(order_imbalance_allvolume)\n", + "\n", + "lineoib_sma = chart2.create_line(name=\"oibsma5\", priceScaleId=\"left\") #, color=\"blue\", \n", + "lineoib_sma.set(order_imbalance_sma)\n", + "\n", + "chart.fit()\n", + "chart2.legend(True)\n", + "#\n", + "line2.markers_set(short_signals, \"entries\")\n", + "# TODO jelikoz se davaji do jednoho pole je treba zajistit spravne sortovani\n", + "# domyslet jak to pojmout iterativni doplnovani markeru\n", + "line2.markers_set(short_exits, \"exits\")\n", + "\n", + "\n", + "chart2.fit()\n", + "chart.load()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sma.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#priminds list (same Y as price), secinds list (secondary Y napr. rsi), close, voluminds (volume based) list\n", + "def plot_2y_close(priminds, secinds, close, volumeinds, ohlcv=None):\n", + " fig = vbt.make_subplots(rows=2, cols=1, shared_xaxes=True, \n", + " specs=[[{\"secondary_y\": True}], [{\"secondary_y\": False}]], \n", + " vertical_spacing=0.02, subplot_titles=(\"Price and Indicators\", \"Volume\"))\n", + "\n", + " if ohlcv is not None:\n", + " ohlcv.vbt.ohlcv.plot(fig=fig, add_trace_kwargs=dict(row=1, col=1))\n", + "\n", + " # Plotting the close price\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False,row=1, col=1), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " \n", + " # Plotting primary indicators on the first row\n", + " for ind in priminds:\n", + " if isinstance(ind, pd.Series):\n", + " #if series has no name, make the name same as the variable name\n", + " \n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + " \n", + " # Plotting secondary indicators on the first row\n", + " for ind in secinds:\n", + " #ind = ind.rename(str(ind.name))\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1), trace_kwargs=dict(line=dict(color=\"rgba(255, 0, 0, 0.4)\")))\n", + " \n", + " for indvolume in volumeinds:\n", + " # Plotting the volume on the second row\n", + " indvolume.rename(str(indvolume.name)).vbt.barplot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=2, col=1))\n", + " #vbt.Bar(indvolume, fig=fig, add_trace_kwargs=dict(secondary_y=False, row=2, col=1))\n", + " \n", + " return fig\n", + "\n", + "fig = plot_2y_close([sma], [order_imbalance.rename(\"order_imbalance_norm\"),order_imbalance_sma.real.rename(\"oib_sma\")], t1data.close, [t1data.data[\"BAC\"].buyvolume, t1data.data[\"BAC\"].sellvolume, t1data.volume], t1data.data[\"BAC\"])\n", + "fig.update_yaxes(range=[33,34], secondary_y=False, row=1, col=1) #update y axis range\n", + "fig.update_yaxes(range=[-1,1], secondary_y=True, row=1, col=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "t0data = basic_data\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"1T\")\n", + "t2data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"15T\")\n", + "t3data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"30T\")\n", + "t4data = basic_data[['open', 'high', 'low', 'close', 'volume', 'vwap']].resample(\"D\").dropna()\n", + "\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t2data = t2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t3data = t3data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "#30min data to daily\n", + "# t4data = t3data.resample(\"D\").dropna()\n", + "\n", + "#t4data = t4data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "#m1data.data[\"SPY\"].info()\n", + "\n", + "#m1data.data[\"SPY\"].vbt.ohlcv.plot()\n", + "#h2data.data[\"SPY\"].vbt.ohlcv.plot()\n", + "#ddata.data[\"SPY\"]\n", + "t2data.data[\"BAC\"].vbt.ohlcv.plot().show()\n", + "\n", + "\n", + "#t4data.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t2data.close\n", + "\n", + "#in df remove rows with nan\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#realign na 1T = t1data + oriznout main session\n", + "t2data_vwap = t2data.vwap.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "t3data_vwap = t3data.vwap.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "t4data_vwap = t4data.vwap.vbt.realign_closing(\"1T\").dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t2data_vwap" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t4data.clos.vbt \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "obvind = vbt.indicator.obv.run(t1data.close, t1data.volume)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t1_lengtgh = 15\n", + "t2_length = 15\n", + "t3_length = 15\n", + "t4_length = 5\n", + "t1_th = 0.1\n", + "t2_th = 0.1\n", + "t3_th = 0.1\n", + "t4_th = 0.1\n", + "\n", + "\n", + "\n", + "#minute\n", + "t1slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t1data.close, timeperiod=t1_lengtgh) # -0.09, 0.09\n", + "t2slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t2data.vwap, timeperiod=t2_length) # -0.08 , 0.079\n", + "t3slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t3data.vwap, timeperiod=t3_length) # -0.08, 0.08\n", + "#daily\n", + "t4slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t4data.vwap, timeperiod=t4_length) # -0.1, 0.09\n", + "\n", + "plot_2y_close(priminds=[], secinds=[t1slope, t2slope, t3slope, t4slope], close=t1data.close).show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#thirtymin_slope = thirtymin_slope.real.rename(\"30min\") #timto se prejmenuje real na 30min\n", + "t3slope = t3slope.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "##filter daily_slope_to_compare to only monday to friday\n", + "t3slope = t3slope[t3slope.index.dayofweek < 5]\n", + "\n", + "#t3slope.info()\n", + "\n", + "t2slope = t2slope.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "##filter daily_slope_to_compare to only monday to friday\n", + "t2slope = t2slope[t2slope.index.dayofweek < 5]\n", + "\n", + "t2slope.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "oibratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#\n", + "short_signals = order_imbalance.vbt < -0.3\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_signals.value_counts()\n", + "\n", + "long_signals = order_imbalance.vbt > 0.3\n", + "#entries = oibratio.vbt > 10\n", + "long_signals.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = vbt.make_subplots(rows=3, cols=1, shared_xaxes=True, \n", + " specs=[[{\"secondary_y\": True}], [{\"secondary_y\": True}], [{\"secondary_y\": False}]], \n", + " vertical_spacing=0.02, subplot_titles=(\"Price and Indicators\", \"Volume\"))\n", + "t1data.data[\"BAC\"].vbt.ohlcv.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + "#oibratio.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1))\n", + "order_imbalance.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1))\n", + "long_signals.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"LONGS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"limegreen\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ), add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + "\n", + "short_signals.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"SHORTS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"red\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ), add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# thirtymin_slope_to_compare.vbt.xloc[\"04-16-2024\"].get()\n", + "thirty_down_signal.vbt.xloc[\"04-16-2024\"].get()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#short_signal = t1slope.real_below(t1_th) & t2slope.real_below(t2_th) & t3slope.real_below(t3_th) & t4slope.real_below(t4_th)\n", + "#long_signal = t1slope.real_above(t1_th) & t2slope.real_above(t2_th) & t3slope.real_above(t3_th) & t4slope.real_above(t4_th)\n", + "\n", + "#test na daily s reversem crossed 0\n", + "short_signal = t2slope.vbt < -0.01 & t3slope.vbt < -0.01 #min value of threshold\n", + "long_signal = t2slope.vbt > 0.01 & t3slope.vbt > 0.01 #min\n", + "\n", + "# thirty_up_signal = t3slope.vbt.crossed_above(0.01)\n", + "# thirty_down_signal = t3slope.vbt.crossed_below(-0.01)\n", + "\n", + "fig = plot_2y_close(priminds=[], secinds=[t3slope], close=t1data.close)\n", + "#short_signal.vbt.signals.plot_as_entries(basic_data.close, fig=fig)\n", + "\n", + "short_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"SHORTS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"red\", symbol=\"triangle-down\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "long_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"LONGS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"limegreen\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "\n", + "# thirty_down_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"DOWN30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"yellow\", symbol=\"triangle-down\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "# thirty_up_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"UP30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"grey\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "# thirtymin_slope_to_compare.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True), trace_kwargs=dict(name=\"30min slope\",\n", + "# line=dict(color=\"yellow\"), \n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "fig.show()\n", + "# print(\"short signal\")\n", + "# print(short_signal.value_counts())\n", + "\n", + "#forced_exit = pd.Series(False, index=close.index)\n", + "forced_exit = basic_data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signal & entry_window_open)\n", + "exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "# print(short_exits.value_counts())\n", + "# print(short_entries.value_counts())\n", + "\n", + "#fig = plot_2y_close([],[momshort, rocp], close)\n", + "#short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + "#print(sl_stop)\n", + "#short_entries=short_entries, short_exits=short_exits,\n", + "# pf = vbt.Portfolio.from_signals(close=basic_data, entries=short_entries, exits=exits, tsl_stop=0.005, tp_stop = 0.05, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "# pf.stats()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "forced_exit = t1data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= t1data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signals & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signals & entry_window_open)\n", + "exits = forced_exit\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=t1data, entries=entries, exits=exits, short_entries=short_entries, short_exits=exits,\n", + "td_stop=2, time_delta_format=\"rows\",\n", + "tsl_stop=0.005, tp_stop = 0.005, fees=0.0167/100)#, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.get_drawdowns().records_readable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.orders.records_readable" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/strat_CANDLEGAPS/CANDLEGAPS_v1_MULTI.ipynb b/research/strat_CANDLEGAPS/CANDLEGAPS_v1_MULTI.ipynb new file mode 100644 index 0000000..99c2982 --- /dev/null +++ b/research/strat_CANDLEGAPS/CANDLEGAPS_v1_MULTI.ipynb @@ -0,0 +1,932 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from v2realbot.tools.loadbatch import load_batch\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n", + "\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "#LOAD FROM PARQUET\n", + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "#print('\\n'.join(map(str, files)))\n", + "file_name = \"ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\"\n", + "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"SPY\": ohlcv_df}), tz_convert=zoneNY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#parameters (primary y line, secondary y line, close)\n", + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig\n", + "\n", + "# close = basic_data.xloc[\"09:30\":\"10:00\"].close" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#PIPELINE - FOR - LOOP\n", + "\n", + "#indicator parameters\n", + "mom_timeperiod = list(range(2, 12))\n", + "\n", + "#uzavreni okna od 1 do 200\n", + "#entry_window_closes = list(range(2, 50, 3))\n", + "entry_window_closes = [5, 10, 30, 45]\n", + "#entry_window_closes = 30\n", + "#threshold entries parameters\n", + "#long\n", + "mom_th = np.round(np.arange(0.01, 0.5 + 0.02, 0.02),4).tolist()#-0.02\n", + "# short\n", + "#mom_th = np.round(np.arange(-0.01, -0.3 - 0.02, -0.02),4).tolist()#-0.02\n", + "roc_th = np.round(np.arange(-0.2, -0.8 - 0.05, -0.05),4).tolist()#-0.2\n", + "#print(mom_th, roc_th)\n", + "\n", + "#portfolio simulation parameters\n", + "sl_stop =np.round(np.arange(0.02/100, 0.7/100, 0.05/100),4).tolist()\n", + "tp_stop = np.round(np.arange(0.02/100, 0.7/100, 0.05/100),4).tolist()\n", + "\n", + "combs = list(product(mom_timeperiod, mom_th, roc_th, sl_stop, tp_stop))\n", + "\n", + "@vbt.parameterized(merge_func = \"concat\", random_subset = 2000, show_progress=True) \n", + "def test_strat(entry_window_closes=60,\n", + " mom_timeperiod=2,\n", + " mom_th=-0.04,\n", + " #roc_th=-0.2,\n", + " sl_stop=0.19/100,\n", + " tp_stop=0.19/100):\n", + " # mom_timeperiod=2\n", + " # mom_th=-0.06\n", + " # roc_th=-0.2\n", + " # sl_stop=0.04/100\n", + " # tp_stop=0.04/100\n", + "\n", + " momshort = vbt.indicator(\"talib:MOM\").run(basic_data.close, timeperiod=mom_timeperiod, short_name = \"slope_short\")\n", + " rocp = vbt.indicator(\"talib:ROC\").run(basic_data.close, short_name = \"rocp\")\n", + " #rate of change + momentum\n", + "\n", + " #momshort.plot rocp.real_crossed_below(roc_th) & \n", + " #short_signal = momshort.real_crossed_below(mom_th)\n", + " long_signal = momshort.real_crossed_above(mom_th)\n", + " # print(\"short signal\")\n", + " # print(short_signal.value_counts())\n", + "\n", + " #forced_exit = pd.Series(False, index=close.index)\n", + " forced_exit = basic_data.symbol_wrapper.fill(False)\n", + " #entry_window_open = pd.Series(False, index=close.index)\n", + " entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + " #print(entry_window_closes, \"entry window closes\")\n", + " # Calculate the time difference in minutes from market open for each timestamp\n", + " elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + " entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + " #print(entry_window_open.value_counts())\n", + "\n", + " forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + " #short_entries = (short_signal & entry_window_open)\n", + " #short_exits = forced_exit\n", + " entries = (long_signal & entry_window_open)\n", + " exits = forced_exit\n", + " #long_entries.info()\n", + " #number of trues and falses in long_entries\n", + " #print(short_exits.value_counts())\n", + " #print(short_entries.value_counts())\n", + "\n", + " #fig = plot_2y_close([],[momshort, rocp], close)\n", + " #short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " #print(sl_stop)\n", + " #tsl_th=sl_stop, \n", + " #short_entries=short_entries, short_exits=short_exits,\n", + " pf = vbt.Portfolio.from_signals(close=basic_data.close, entries=entries, exits=exits, tsl_stop=sl_stop, tp_stop = tp_stop, fees=0.0167/100, freq=\"1s\", price=\"close\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + " \n", + " return pf.stats([\n", + " 'total_return',\n", + " 'max_dd', \n", + " 'total_trades', \n", + " 'win_rate', \n", + " 'expectancy'\n", + " ])\n", + "\n", + "pf_results = test_strat(vbt.Param(entry_window_closes),\n", + " vbt.Param(mom_timeperiod),\n", + " vbt.Param(mom_th),\n", + " #vbt.Param(roc_th)\n", + " vbt.Param(sl_stop),\n", + " vbt.Param(tp_stop, condition=\"tp_stop > sl_stop\"))\n", + "pf_results = pf_results.unstack(level=-1)\n", + "pf_results.sort_values(by=[\"Total Return [%]\", \"Max Drawdown [%]\"], ascending=[False, True])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#pf_results.load(\"10tiscomb.pickle\")\n", + "#pf_results.info()\n", + "\n", + "vbt.save(pf_results, \"8tiscomb_tsl.pickle\")\n", + "\n", + "# pf_results = vbt.load(\"8tiscomb_tsl.pickle\")\n", + "# pf_results\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# parallel_coordinates method¶\n", + "\n", + "# attach_px_methods..plot_func(\n", + "# *args,\n", + "# layout=None,\n", + "# **kwargs\n", + "# )\n", + "\n", + "# pf_results.vbt.px.parallel_coordinates() #ocdf\n", + "\n", + "res = pf_results.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf_results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.decomposition import PCA\n", + "from sklearn.preprocessing import StandardScaler\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Assuming pf_results is your DataFrame\n", + "# Convert columns to numeric, assuming NaNs where conversion fails\n", + "metrics = ['Total Return [%]', 'Max Drawdown [%]', 'Total Trades']\n", + "for metric in metrics:\n", + " pf_results[metric] = pd.to_numeric(pf_results[metric], errors='coerce')\n", + "\n", + "# Handle missing values, for example filling with the median\n", + "pf_results['Max Drawdown [%]'].fillna(pf_results['Max Drawdown [%]'].median(), inplace=True)\n", + "\n", + "# Extract the metrics into a new DataFrame\n", + "data_for_pca = pf_results[metrics]\n", + "\n", + "# Standardize the data before applying PCA\n", + "scaler = StandardScaler()\n", + "data_scaled = scaler.fit_transform(data_for_pca)\n", + "\n", + "# Apply PCA\n", + "pca = PCA(n_components=2) # Adjust components as needed\n", + "principal_components = pca.fit_transform(data_scaled)\n", + "\n", + "# Create a DataFrame with the principal components\n", + "pca_results = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])\n", + "\n", + "# Visualize the results\n", + "plt.figure(figsize=(8,6))\n", + "plt.scatter(pca_results['PC1'], pca_results['PC2'], alpha=0.5)\n", + "plt.xlabel('Principal Component 1')\n", + "plt.ylabel('Principal Component 2')\n", + "plt.title('PCA of Strategy Optimization Results')\n", + "plt.grid(True)\n", + "plt.savefig(\"ddd.png\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check if there is any unnamed level and rename it\n", + "if None in df.index.names:\n", + " # Generate new names list replacing None with 'stat'\n", + " new_names = ['stat' if name is None else name for name in df.index.names]\n", + " df.index.set_names(new_names, inplace=True)\n", + "\n", + "rs= df\n", + "\n", + "rs.info()\n", + "\n", + "\n", + "# # Now, 'stat' is the name of the previously unnamed level\n", + "\n", + "# # Filter for 'Total Return' assuming it is a correct identifier in the 'stat' level\n", + "# total_return_series = df.xs('Total Return [%]', level='stat')\n", + "\n", + "# # Sort the Series to get the largest 'Total Return' values\n", + "# sorted_series = total_return_series.sort_values(ascending=False)\n", + "\n", + "# # Print the sorted filtered data\n", + "# sorted_series.head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sorted_series.vbt.save()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#df.info()\n", + "total_return_series = df.xs('Total Return [%]')\n", + "sorted_series = total_return_series.sort_values(ascending=False)\n", + "\n", + "# Display the top N entries, e.g., top 5\n", + "sorted_series.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "comb_stats_df.nlargest(10, 'Total Return [%]')\n", + "#stats_df.info()\n", + "\n", + "\n", + "8\t-0.06\t-0.2\t0.0028\t0.0048\t4.156254\n", + "4 -0.02 -0.25 0.0028 0.0048 0.84433\n", + "3 -0.02 -0.25 0.0033 0.0023 Total Return [%] 0.846753\n", + "#2\t-0.04\t-0.2\t0.0019\t0.0019\n", + "# 2\t-0.04\t-0.2\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.25\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.3\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.35\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.4\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.2\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.25\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.3\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.35\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.4\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basic_data.symbols" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + ">>> def apply_func(ts, entries, exits, fastw, sloww, minp=None):\n", + "... fast_ma = vbt.nb.rolling_mean_nb(ts, fastw, minp=minp)\n", + "... slow_ma = vbt.nb.rolling_mean_nb(ts, sloww, minp=minp)\n", + "... entries[:] = vbt.nb.crossed_above_nb(fast_ma, slow_ma) \n", + "... exits[:] = vbt.nb.crossed_above_nb(slow_ma, fast_ma)\n", + "... return (fast_ma, slow_ma) \n", + "\n", + ">>> CrossSig = vbt.IF(\n", + "... class_name=\"CrossSig\",\n", + "... input_names=['ts'],\n", + "... in_output_names=['entries', 'exits'],\n", + "... param_names=['fastw', 'sloww'],\n", + "... output_names=['fast_ma', 'slow_ma']\n", + "... ).with_apply_func(\n", + "... apply_func,\n", + "... in_output_settings=dict(\n", + "... entries=dict(dtype=np.bool_), #initialize output with bool\n", + "... exits=dict(dtype=np.bool_)\n", + "... )\n", + "... )\n", + ">>> cross_sig = CrossSig.run(ts2, 2, 4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#PIPELINE - parameters in one go\n", + "\n", + "\n", + "#TOTO prepsat do FOR-LOOPu\n", + "\n", + "\n", + "#indicator parameters\n", + "mom_timeperiod = list(range(2, 6))\n", + "\n", + "#threshold entries parameters\n", + "mom_th = np.round(np.arange(-0.02, -0.1 - 0.02, -0.02),4).tolist()#-0.02\n", + "roc_th = np.round(np.arange(-0.2, -0.4 - 0.05, -0.05),4).tolist()#-0.2\n", + "#print(mom_th, roc_th)\n", + "#jejich product\n", + "# mom_th_prod, roc_th_prod = zip(*product(mom_th, roc_th))\n", + "\n", + "# #convert threshold to vbt param\n", + "# mom_th_index = vbt.Param(mom_th_prod, name='mom_th_th') \n", + "# roc_th_index = vbt.Param(roc_th_prod, name='roc_th_th')\n", + "\n", + "mom_th = vbt.Param(mom_th, name='mom_th')\n", + "roc_th = vbt.Param(roc_th, name='roc_th')\n", + "\n", + "#portfolio simulation parameters\n", + "sl_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "sl_stop = [round(val, 4) for val in sl_stop]\n", + "tp_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "tp_stop = [round(val, 4) for val in tp_stop]\n", + "sl_stop = vbt.Param(sl_stop) #np.nan mean s no stoploss\n", + "tp_stop = vbt.Param(tp_stop) #np.nan mean s no stoploss\n", + "\n", + "\n", + "#def test_mom(window=14, mom_th=0.2, roc_th=0.2, sl_stop=0.03/100, tp_stop=0.03/100):\n", + "#close = basic_data.xloc[\"09:30\":\"10:00\"].close\n", + "momshort = vbt.indicator(\"talib:MOM\").run(basic_data.get(\"Close\"), timeperiod=mom_timeperiod, short_name = \"slope_short\")\n", + "\n", + "#ht_trendline = vbt.indicator(\"talib:HT_TRENDLINE\").run(close, short_name = \"httrendline\")\n", + "rocp = vbt.indicator(\"talib:ROC\").run(basic_data.get(\"Close\"), short_name = \"rocp\")\n", + "#rate of change + momentum\n", + "\n", + "rocp_signal = rocp.real_crossed_below(mom_th)\n", + "mom_signal = momshort.real_crossed_below(roc_th)\n", + "\n", + "#mom_signal\n", + "print(rocp_signal.info())\n", + "print(mom_signal.info())\n", + "#print(rocp.real)\n", + "\n", + "\n", + "short_signal = (mom_signal.vbt & rocp_signal)\n", + "\n", + "# #short_signal = (rocp.real_crossed_below(roc_th_index) & momshort.real_crossed_below(mom_th_index))\n", + "# forced_exit = m1_data.symbol_wrapper.fill(False)\n", + "# entry_window_open= m1_data.symbol_wrapper.fill(False)\n", + "\n", + "\n", + "# # Calculate the time difference in minutes from market open for each timestamp\n", + "# elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "# entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "# forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "# short_entries = (short_signal & entry_window_open)\n", + "# short_exits = forced_exit\n", + "# #long_entries.info()\n", + "# #number of trues and falses in long_entries\n", + "# #short_exits.value_counts()\n", + "# #short_entries.value_counts()\n", + "\n", + "\n", + "# pf = vbt.Portfolio.from_signals(close=close, short_entries=short_entries, short_exits=short_exits, sl_stop=sl_stop, tp_stop = tp_stop, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# filter dates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#filter na dny\n", + "dates_of_interest = pd.to_datetime(['2024-04-22']).tz_localize('US/Eastern')\n", + "filtered_df = df.loc[df.index.normalize().isin(dates_of_interest)]\n", + "\n", + "df = filtered_df\n", + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import plotly.io as pio\n", + "# pio.renderers.default = 'notebook'\n", + "\n", + "#naloadujeme do vbt symbol as column\n", + "basic_data = vbt.Data.from_data({\"BAC\": df}, tz_convert=zoneNY)\n", + "\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "#basic_data.data[\"BAC\"].vbt.ohlcv.plot()\n", + "\n", + "#basic_data.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "\n", + "m1_data.data[\"BAC\"]\n", + "#m5_data = m1_data.resample(\"5T\")\n", + "\n", + "#m5_data.data[\"BAC\"].head(10)\n", + "\n", + "# m15_data = m1_data.resample(\"15T\")\n", + "\n", + "# m15 = m15_data.data[\"BAC\"]\n", + "\n", + "# m15.vbt.ohlcv.plot()\n", + "\n", + "# m1_data.wrapper.index\n", + "\n", + "# m1_resampler = m1_data.wrapper.get_resampler(\"1T\")\n", + "# m1_resampler.index_difference(reverse=True)\n", + "\n", + "\n", + "# m5_resampler.prettify()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MOM indicator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.phelp(vbt.indicator(\"talib:ROCP\").run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "vyuzití rychleho klesani na sekundove urovni behem open rush\n", + "- MOM + ROC during open rush\n", + "- short signal\n", + "- pipeline kombinace thresholdu pro vstup mom_th, roc_th + hodnota sl_stop a tp_stop (pripadne trailing) - nalezeni optimalni kombinace atributu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# fig = plot_2y_close([ht_trendline],[momshort, rocp], close)\n", + "# short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + "\n", + "#parameters (primary y line, secondary y line, close)\n", + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig\n", + "\n", + "close = m1_data.xloc[\"09:30\":\"10:00\"].close\n", + "momshort = vbt.indicator(\"talib:MOM\").run(close, timeperiod=3, short_name = \"slope_short\")\n", + "ht_trendline = vbt.indicator(\"talib:HT_TRENDLINE\").run(close, short_name = \"httrendline\")\n", + "rocp = vbt.indicator(\"talib:ROC\").run(close, short_name = \"rocp\")\n", + "#rate of change + momentum\n", + "short_signal = (rocp.real_crossed_below(-0.2) & momshort.real_crossed_below(-0.02))\n", + "#indlong = vbt.indicator(\"talib:MOM\").run(close, timeperiod=10, short_name = \"slope_long\")\n", + "fig = plot_2y_close([ht_trendline],[momshort, rocp], close)\n", + "short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = m1_data.close\n", + "#vbt.phelp(vbt.OLS.run)\n", + "\n", + "#oer steepmnes of regression line\n", + "#talib.LINEARREG_SLOPE(close, timeperiod=timeperiod)\n", + "#a také ON BALANCE VOLUME - http://5.161.179.223:8000/static/js/vbt/api/indicators/custom/obv/index.html\n", + "\n", + "\n", + "\n", + "mom_ind = vbt.indicator(\"talib:MOM\") \n", + "#vbt.phelp(mom_ind.run)\n", + "\n", + "mom = mom_ind.run(close, timeperiod=10)\n", + "\n", + "plot_2y_close(mom, close)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# defining ENTRY WINDOW and forced EXIT window" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#m1_data.data[\"BAC\"].info()\n", + "import datetime\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 2\n", + "entry_window_closes = 30\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "forced_exit = m1_data.symbol_wrapper.fill(False)\n", + "entry_window_open= m1_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "\n", + "#entry_window_open.info()\n", + "# forced_exit.tail(100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = m1_data.close\n", + "\n", + "#rsi = vbt.RSI.run(close, window=14)\n", + "\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "#short_exits.value_counts()\n", + "short_entries.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_rsi(close, entries, exits):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"RSI\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " #rsi.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " entries.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + " exits.vbt.signals.plot_as_exits(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + " return fig\n", + "\n", + "plot_rsi(close, short_entries, short_exits)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.phelp(vbt.Portfolio.from_signals)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sl_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "sl_stop = [round(val, 4) for val in sl_stop]\n", + "print(sl_stop)\n", + "sl_stop = vbt.Param(sl_stop) #np.nan mean s no stoploss\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=close, short_entries=short_entries, short_exits=short_exits, sl_stop=0.03/100, tp_stop = 0.03/100, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "#pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#list of orders\n", + "#pf.orders.records_readable\n", + "#pf.orders.plots()\n", + "#pf.stats()\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0015,0.0013)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[0.03].plot_trade_signals()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# pristup k pf jako multi index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#pf[0.03].plot()\n", + "#pf.order_records\n", + "pf[(0.03)].stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#zgrupovane statistiky\n", + "stats_df = pf.stats([\n", + " 'total_return',\n", + " 'total_trades',\n", + " 'win_rate',\n", + " 'expectancy'\n", + "], agg_func=None)\n", + "stats_df\n", + "\n", + "\n", + "stats_df.nlargest(10, 'Total Return [%]')\n", + "#stats_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0011,0.0013000000000000002)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pandas.tseries.offsets import DateOffset\n", + "\n", + "temp_data = basic_data['2024-4-22']\n", + "temp_data\n", + "res1m = temp_data[[\"Open\", \"High\", \"Low\", \"Close\", \"Volume\"]]\n", + "\n", + "# Define a custom date offset that starts at 9:30 AM and spans 4 hours\n", + "custom_offset = DateOffset(hours=4, minutes=30)\n", + "\n", + "# res1m = res1m.get().resample(\"4H\").agg({ \n", + "# \"Open\": \"first\",\n", + "# \"High\": \"max\",\n", + "# \"Low\": \"min\",\n", + "# \"Close\": \"last\",\n", + "# \"Volume\": \"sum\"\n", + "# })\n", + "\n", + "res4h = res1m.resample(\"1h\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res4h.data\n", + "\n", + "res15m = res1m.resample(\"15T\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res15m.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@vbt.njit\n", + "def long_entry_place_func_nb(c, low, close, time_in_ns, rsi14, window_open, window_close):\n", + " market_open_minutes = 570 # 9 hours * 60 minutes + 30 minutes\n", + "\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + "\n", + " current_minutes = vbt.dt_nb.hour_nb(time_in_ns[i]) * 60 + vbt.dt_nb.minute_nb(time_in_ns[i])\n", + " #print(\"current_minutes\", current_minutes)\n", + " # Calculate elapsed minutes since market open at 9:30 AM\n", + " elapsed_from_open = current_minutes - market_open_minutes\n", + " elapsed_from_open = elapsed_from_open if elapsed_from_open >= 0 else 0\n", + " #print( \"elapsed_from_open\", elapsed_from_open)\n", + "\n", + " #elapsed_from_open = elapsed_minutes_from_open_nb(time_in_ns) \n", + " in_window = elapsed_from_open > window_open and elapsed_from_open < window_close\n", + " #print(\"in_window\", in_window)\n", + " # if in_window:\n", + " # print(\"in window\")\n", + "\n", + " if in_window and rsi14[i] > 60: # and low[i, c.col] <= hit_price: # and hour == 9: # (4)!\n", + " return out_i\n", + " return -1\n", + "\n", + "@vbt.njit\n", + "def long_exit_place_func_nb(c, high, close, time_index, tp, sl): # (5)!\n", + " entry_i = c.from_i - c.wait\n", + " entry_price = close[entry_i, c.col]\n", + " hit_price = entry_price * (1 + tp)\n", + " stop_price = entry_price * (1 - sl)\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " last_bar_of_day = vbt.dt_nb.day_changed_nb(time_index[i], time_index[i + 1])\n", + "\n", + " #print(next_day)\n", + " if last_bar_of_day: #pokud je dalsi next day, tak zavirame posledni\n", + " print(\"ted\",out_i)\n", + " return out_i\n", + " if close[i, c.col] >= hit_price or close[i, c.col] <= stop_price :\n", + " return out_i\n", + " return -1\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(np.random.random(size=(5, 10)), columns=list('abcdefghij'))\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.sum()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/strat_CANDLEGAPS/CANDLEGAPS_v1_SINGLE.ipynb b/research/strat_CANDLEGAPS/CANDLEGAPS_v1_SINGLE.ipynb new file mode 100644 index 0000000..c4f1b53 --- /dev/null +++ b/research/strat_CANDLEGAPS/CANDLEGAPS_v1_SINGLE.ipynb @@ -0,0 +1,842 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CANDLEGAPS\n", + "\n", + "* gaps on second based bars indicates short-term up/down move\n", + "\n", + "TODO:\n", + "* dodělat shorty\n", + "* přidat kombinace angle nebo nějaké podobné krátkodobé momentum jako doplňkový indikátor\n", + "* vyzkouset ruzne timeframe (sec a min) + hodnotu gapu a dalsi podminky" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import load_dotenv\n", + "\n", + "#as V2realbot is client , load env variables here\n", + "env_file = \"/Users/davidbrazda/Documents/Development/python/.env\"\n", + "# Load the .env file\n", + "load_dotenv(env_file)\n", + "\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "# from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "from v2realbot.config import DATA_DIR\n", + "from lightweight_charts import JupyterChart, chart, Panel, PlotAccessor\n", + "from IPython.display import display\n", + "\n", + "# init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "#LOAD FROM PARQUET\n", + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "print('\\n'.join(map(str, files)))\n", + "file_name = \"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet\"\n", + "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", + "#filter ohlcv_df to certain date range (assuming datetime index)\n", + "#ohlcv_df = ohlcv_df.loc[\"2024-02-12 9:30\":\"2024-02-14 16:00\"]\n", + "\n", + "#add vwap column to ohlcv_df\n", + "#ohlcv_df[\"hlcc4\"] = (ohlcv_df[\"close\"] + ohlcv_df[\"high\"] + ohlcv_df[\"low\"] + ohlcv_df[\"close\"]) / 4\n", + "\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": ohlcv_df}), tz_convert=zoneNY)\n", + "ohlcv_df= None\n", + "basic_data.wrapper.index.normalize().nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basic_data.data[\"BAC\"].info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add resample function to custom columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro.utils.config import merge_dicts, Config, HybridConfig\n", + "from vectorbtpro import _typing as tp\n", + "from vectorbtpro.generic import nb as generic_nb\n", + "\n", + "_feature_config: tp.ClassVar[Config] = HybridConfig(\n", + " {\n", + " \"buyvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " ),\n", + " \"sellvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " ),\n", + " \"trades\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " )\n", + " }\n", + ")\n", + "\n", + "basic_data._feature_config = _feature_config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','trades','sellvolume']]\n", + "\n", + "s2data = s1data.resample(\"2s\")\n", + "s2data = s2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "s5data = s1data.resample(\"5s\")\n", + "s5data = s5data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','trades','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "# t1data.data[\"BAC\"].info()\n", + "\n", + "t30data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','trades','sellvolume']].resample(\"30T\")\n", + "t30data = t30data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "# t30data.data[\"BAC\"].info()\n", + "\n", + "s2close = s2data.close\n", + "s1close = s1data.close\n", + "t1close = t1data.close\n", + "t30close = t30data.close\n", + "t30volume = t30data.volume\n", + "\n", + "#resample on specific index \n", + "resampler = vbt.Resampler(t30data.index, s1data.index, source_freq=\"30T\", target_freq=\"1s\")\n", + "t30close_realigned = t30close.vbt.realign_closing(resampler)\n", + "\n", + "#resample 1min to s\n", + "resampler_s = vbt.Resampler(t1data.index, s1data.index, source_freq=\"1T\", target_freq=\"1s\")\n", + "t1close_realigned = t1close.vbt.realign_closing(resampler_s)\n", + "\n", + "resampler_s = vbt.Resampler(s2data.index, s1data.index, source_freq=\"2s\", target_freq=\"1s\")\n", + "s2close_realigned = s2close.vbt.realign_closing(resampler_s)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.IF.list_indicators(\"*vwap\")\n", + "vbt.phelp(vbt.VWAP.run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# VWAP" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "t1vwap_h = vbt.VWAP.run(t1data.high, t1data.low, t1data.close, t1data.volume, anchor=\"H\")\n", + "t1vwap_d = vbt.VWAP.run(t1data.high, t1data.low, t1data.close, t1data.volume, anchor=\"D\")\n", + "t1vwap_t = vbt.VWAP.run(t1data.high, t1data.low, t1data.close, t1data.volume, anchor=\"T\")\n", + "\n", + "t1vwap_h_real = t1vwap_h.vwap.vbt.realign_closing(resampler_s)\n", + "t1vwap_d_real = t1vwap_d.vwap.vbt.realign_closing(resampler_s)\n", + "t1vwap_t_real = t1vwap_t.vwap.vbt.realign_closing(resampler_s)\n", + "\n", + "#t1vwap_5t.xloc[\"2024-01-3 09:30:00\":\"2024-01-03 16:00:00\"].plot()\n", + "\n", + "div_rel = (s1data.close.vbt - t1vwap_h_real) - 1\n", + "\n", + "div_rel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#m30data.close.lw.plot()\n", + "#quick few liner\n", + "pane1 = Panel(\n", + " histogram=[\n", + " #(s1data.volume, \"volume\",None, 0.8),\n", + " #(m30volume, \"m30volume\",None, 1)\n", + " ], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n", + " right=[\n", + " (s1data.close, \"1s close\"),\n", + " (s2data.close, \"1s close\"),\n", + " (t1data.close, \"1min close\"),\n", + " (t1vwap_t, \"1mvwap_t\"),\n", + " (t1vwap_h, \"1mvwap_h\"),\n", + " (t1vwap_d, \"1mvwap_d\"),\n", + " (t1vwap_t_real, \"1mvwap_t_real\"),\n", + " (t1vwap_h_real, \"1mvwap_h_real\"),\n", + " (t1vwap_d_real, \"1mvwap_d_real\")\n", + " # (t1close_realigned, \"1min close realigned\"),\n", + " # (m30data.close, \"30min-close\"),\n", + " # (m30close_realigned, \"30min close realigned\"),\n", + " ],\n", + " left = [\n", + " (div_rel, \"reldiv1s_1Hvwap\",)\n", + " ]\n", + ")\n", + "ch = chart([pane1], size=\"s\", xloc=slice(\"2024-05-1 09:30:00\",\"2024-05-5 16:00:00\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SUPERTREND" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "supertrend_s1 = vbt.SUPERTREND.run(s1data.high, s1data.low, s1data.close, period=5, multiplier=3)\n", + "direction_series_s1 = supertrend_s1.direction\n", + "supertrend_t1 = vbt.SUPERTREND.run(t1data.high, t1data.low, t1data.close, period=14, multiplier=3)\n", + "direction_series_t1 = supertrend_t1.direction\n", + "supertrend_t30 = vbt.SUPERTREND.run(t30data.high, t30data.low, t30data.close, period=14, multiplier=3)\n", + "direction_series_t30 = supertrend_t30.direction\n", + "\n", + "resampler_1t_sec = vbt.Resampler(direction_series_t1.index, direction_series_s1.index, source_freq=\"1T\", target_freq=\"1s\")\n", + "resampler_30t_sec = vbt.Resampler(direction_series_t30.index, direction_series_s1.index, source_freq=\"30T\", target_freq=\"1s\")\n", + "direction_series_t1_realigned = direction_series_t1.vbt.realign_closing(resampler_1t_sec)\n", + "direction_series_t30_realigned = direction_series_t30.vbt.realign_closing(resampler_30t_sec)\n", + "\n", + "#supertrend_s1.xloc[\"2024-01-3 09:30:00\":\"2024-01-03 16:00:00\"].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# aligned_ups= pd.Series(False, index=direction_real.index)\n", + "# aligned_downs= pd.Series(False, index=direction_real.index)\n", + "\n", + "# aligned_ups = direction_real == 1 & supertrend.direction == 1\n", + "# aligned_ups" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s5close = s5data.data[\"BAC\"].close\n", + "s5open = s5data.data[\"BAC\"].open\n", + "s5high = s5data.data[\"BAC\"].high\n", + "s5low = s5data.data[\"BAC\"].low\n", + "s5close_prev = s5close.shift(1)\n", + "s5open_prev = s5open.shift(1)\n", + "s5high_prev = s5high.shift(1)\n", + "s5low_prev = s5low.shift(1)\n", + "#gap nahoru od byci svicky a nevraci se zpet na jeji uroven\n", + "entry_ups = (s5close_prev > s5open_prev) & (s5open > s5high_prev + 0.010) & (s5close > s5close_prev) & (s5close > s5open)\n", + "\n", + "print(entry_ups.value_counts())\n", + "\n", + "entry_downs = (s5close_prev < s5open_prev) & (s5open < s5low_prev - 0.012) & (s5close < s5close_prev)\n", + "\n", + "print(entry_downs.value_counts())\n", + "\n", + "#entry_ups.info()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Entry window" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 10\n", + "entry_window_closes = 370\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#entry_ups = pd.Series(False, index=s5data.index)\n", + "\n", + "entry_window_open= pd.Series(False, index=entry_ups.index)\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (entry_ups.index.hour - market_open.hour) * 60 + (entry_ups.index.minute - market_open.minute)\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "entry_ups = entry_ups & entry_window_open\n", + "# entry_ups\n", + "\n", + "entry_down_window_open= pd.Series(False, index=entry_downs.index)\n", + "entry_down_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "entry_downs = entry_downs & entry_down_window_open\n", + "\n", + "forced_exits = pd.Series(False, index=s5data.index)\n", + "forced_exits[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "\n", + "# forced_exits\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "entry_ups.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s5vwap_h = vbt.VWAP.run(s5data.high, s5data.low, s5data.close, s5data.volume, anchor=\"H\")\n", + "s5vwap_d = vbt.VWAP.run(s5data.high, s5data.low, s5data.close, s5data.volume, anchor=\"D\")\n", + "\n", + "# s5vwap_h_real = s5vwap_h.vwap.vbt.realign_closing(resampler_s)\n", + "# s5vwap_d_real = s5vwap_d.vwap.vbt.realign_closing(resampler_s)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pane1 = Panel(\n", + " ohlcv=(s5data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6), opacity\")]\n", + " right=[#(bbands,), #[(series, name, entries, exits, other_markers)]\n", + " (s5data.data[\"BAC\"].close, \"close\", entry_ups, entry_downs),\n", + " (s5data.data[\"BAC\"].open, \"open\"),\n", + " (s5vwap_h, \"vwap5s_H\",),\n", + " (s5vwap_d, \"vwap5s_D\",)\n", + " # (t1data.data[\"BAC\"].vwap, \"vwap\"),\n", + " # (t1data.close, \"1min close\"),\n", + " # (supertrend_s1.trend,\"STtrend\"),\n", + " # (supertrend_s1.long,\"STlong\"),\n", + " # (supertrend_s1.short,\"STshort\")\n", + " ],\n", + " left = [\n", + " #(direction_series_s1,\"direction_s1\"),\n", + " # (direction_series_t1,\"direction_t1\"),\n", + " # (direction_series_t30,\"direction_t30\")\n", + " \n", + " ],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "# pane2 = Panel(\n", + "# ohlcv=(t1data.data[\"BAC\"],uptrend_m30, downtrend_m30), #(series, entries, exits, other_markers)\n", + "# histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6), opacity\")]\n", + "# left=[#(bbands,), #[(series, name, entries, exits, other_markers)]\n", + "# (direction_real,\"direction30min_real\"),\n", + "# ],\n", + "# # left = [(supertrendm30.direction,\"STdirection30\")],\n", + "# # # right=[(bbands.upperband, \"upperband\",),\n", + "# # # (bbands.lowerband, \"lowerband\",),\n", + "# # # (bbands.middleband, \"middleband\",)\n", + "# # # ], #[(series, name, entries, exits, other_markers)]\n", + "# middle1=[],\n", + "# middle2=[],\n", + "# title = \"1m\")\n", + "\n", + "ch = chart([pane1], sync=True, size=\"s\", xloc=slice(\"2024-05-20 09:30:00\",\"2024-05-25 16:00:00\"), precision=6)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pd.set_option('display.max_rows', None)\n", + "# data = s5data.xloc[\"2024-01-03 09:30:00\":\"2024-03-10 16:00:00\"]\n", + "# entry = entry_ups.vbt.xloc[\"2024-01-03 09:30:00\":\"2024-03-10 16:00:00\"].obj\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=s5data, entries=entry_ups, exits=forced_exits, direction=\"longonly\", sl_stop=0.05/100, tp_stop = 0.05/100, fees=0.0167/100, freq=\"5s\")\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.xloc[\"2024-02-20 09:30:00\":\"2024-05-25 16:00:00\"].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.pdir(pf)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.xloc[\"2024-05-20 09:30:00\":\"2024-05-25 16:00:00\"].asset_value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hourly_returns = pf.returns.resample(\"h\").get()\n", + "hourly_returns.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.returns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.value.vbt.lineplot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf5 = pf.xloc[\"2024-05-20 09:30:00\":\"2024-05-25 16:00:00\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "monthly_returns = pf5.returns_acc.resample(\"5T\").get()\n", + "monthly_returns = monthly_returns[monthly_returns!=0]\n", + "\n", + "monthly_returns\n", + "#monthly_returns.vbt.heatmap() \n", + "# fig = monthly_returns.vbt.heatmap() \n", + "# fig = monthly_returns.vbt.ts_heatmap() " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pd.set_option('display.max_rows', None)\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot().save_png()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.trades.records_readable.sort_values(by=\"PnL\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.xloc[\"2024-03-13 09:30:00\":\"2024-03-20 16:00:00\"].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.xloc[\"2024-01-26 09:30:00\":\"2024-01-28 16:00:00\"].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pd.set_option('display.max_rows', None)\n", + "pf.stats()\n", + "# pf.xloc[\"monday\"].stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "buyvolume = t1data.data[\"BAC\"].buyvolume\n", + "sellvolume = t1data.data[\"BAC\"].sellvolume\n", + "totalvolume = buyvolume + sellvolume\n", + "\n", + "#adjust to minimal value to avoid division by zero\n", + "sellvolume_adjusted = sellvolume.replace(0, 1e-10)\n", + "oibratio = buyvolume / sellvolume\n", + "\n", + "#cumulative order flow (net difference)\n", + "cof = buyvolume - sellvolume\n", + "\n", + "# Calculate the order imbalance (net differene) normalize the order imbalance by calculating the difference between buy and sell volumes and then scaling it by the total volume.\n", + "order_imbalance = cof / totalvolume\n", + "order_imbalance = order_imbalance.fillna(0) #nan nahradime 0\n", + "\n", + "order_imbalance_allvolume = cof / t1data.data[\"BAC\"].volume\n", + "\n", + "order_imbalance_sma = vbt.indicator(\"talib:EMA\").run(order_imbalance, timeperiod=5)\n", + "short_signals = order_imbalance.vbt < -0.5\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_signals.value_counts()\n", + "short_signals.name = \"short_entries\"\n", + "#.fillna(False)\n", + "short_exits = short_signals.shift(-2).fillna(False).astype(bool)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pane1 = Panel(\n", + " ohlcv=(t1data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " histogram=[(order_imbalance_allvolume, \"oib_allvolume\", \"rgba(53, 94, 59, 0.6)\",0.5),\n", + " (t1data.data[\"BAC\"].trades, \"trades\",None,0.4),\n", + " ], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n", + " # right=[\n", + " # (supertrend.trend,\"STtrend\"),\n", + " # (supertrend.long,\"STlong\"),\n", + " # (supertrend.short,\"STshort\")\n", + " # ],\n", + " # left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "pane2 = Panel(\n", + " ohlcv=(basic_data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " left=[(basic_data.data[\"BAC\"].trades, \"trades\")],\n", + " histogram=[(basic_data.data[\"BAC\"].trades, \"trades_hist\", \"white\", 0.5)], #\"rgba(53, 94, 59, 0.6)\"\n", + " # ], # [(series, name, \"rgba(53, 94, 59, 0.6)\")]\n", + " # right=[\n", + " # (supertrend.trend,\"STtrend\"),\n", + " # (supertrend.long,\"STlong\"),\n", + " # (supertrend.short,\"STshort\")\n", + " # ],\n", + " # left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "\n", + "ch = chart([pane1, pane2], size=\"m\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#short_signal = t1slope.real_below(t1_th) & t2slope.real_below(t2_th) & t3slope.real_below(t3_th) & t4slope.real_below(t4_th)\n", + "#long_signal = t1slope.real_above(t1_th) & t2slope.real_above(t2_th) & t3slope.real_above(t3_th) & t4slope.real_above(t4_th)\n", + "\n", + "#test na daily s reversem crossed 0\n", + "short_signal = t2slope.vbt < -0.01 & t3slope.vbt < -0.01 #min value of threshold\n", + "long_signal = t2slope.vbt > 0.01 & t3slope.vbt > 0.01 #min\n", + "\n", + "# thirty_up_signal = t3slope.vbt.crossed_above(0.01)\n", + "# thirty_down_signal = t3slope.vbt.crossed_below(-0.01)\n", + "\n", + "fig = plot_2y_close(priminds=[], secinds=[t3slope], close=t1data.close)\n", + "#short_signal.vbt.signals.plot_as_entries(basic_data.close, fig=fig)\n", + "\n", + "short_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"SHORTS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"red\", symbol=\"triangle-down\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "long_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"LONGS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"limegreen\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "\n", + "# thirty_down_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"DOWN30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"yellow\", symbol=\"triangle-down\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "# thirty_up_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"UP30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"grey\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "# thirtymin_slope_to_compare.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True), trace_kwargs=dict(name=\"30min slope\",\n", + "# line=dict(color=\"yellow\"), \n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "fig.show()\n", + "# print(\"short signal\")\n", + "# print(short_signal.value_counts())\n", + "\n", + "#forced_exit = pd.Series(False, index=close.index)\n", + "forced_exit = basic_data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signal & entry_window_open)\n", + "exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "# print(short_exits.value_counts())\n", + "# print(short_entries.value_counts())\n", + "\n", + "#fig = plot_2y_close([],[momshort, rocp], close)\n", + "#short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + "#print(sl_stop)\n", + "#short_entries=short_entries, short_exits=short_exits,\n", + "# pf = vbt.Portfolio.from_signals(close=basic_data, entries=short_entries, exits=exits, tsl_stop=0.005, tp_stop = 0.05, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "# pf.stats()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "forced_exit = t1data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= t1data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signals & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signals & entry_window_open)\n", + "exits = forced_exit\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=t1data, entries=entries, exits=exits, short_entries=short_entries, short_exits=exits,\n", + "td_stop=2, time_delta_format=\"rows\",\n", + "tsl_stop=0.005, tp_stop = 0.005, fees=0.0167/100)#, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.get_drawdowns().records_readable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.orders.records_readable" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/strat_LINREG_MULTI/v1_MULTI.ipynb b/research/strat_LINREG_MULTI/v1_MULTI.ipynb new file mode 100644 index 0000000..d1cc970 --- /dev/null +++ b/research/strat_LINREG_MULTI/v1_MULTI.ipynb @@ -0,0 +1,949 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from v2realbot.tools.loadbatch import load_batch\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n", + "\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "#LOAD FROM BATCH\n", + "# res, df = load_batch(batch_id=\"0fb5043a\", #0fb5043a bde6d0be\n", + "# space_resolution_evenly=False,\n", + "# indicators_columns=[\"Rsi14\"],\n", + "# main_session_only=True,\n", + "# verbose = False)\n", + "# if res < 0:\n", + "# print(\"Error\" + str(res) + str(df))\n", + "# df = df[\"bars\"]\n", + "\n", + "# #df\n", + "\n", + "# basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": df}), tz_convert=zoneNY)\n", + "# #m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "# basic_data = basic_data.transform(lambda df: df.between_time('09:30', '16:00'))\n", + "\n", + "#LOAD FROM PARQUET\n", + "#LOAD FROM PARQUET\n", + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "#print('\\n'.join(map(str, files)))\n", + "file_name = \"ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\"\n", + "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"SPY\": ohlcv_df}), tz_convert=zoneNY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#parameters (primary y line, secondary y line, close)\n", + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig\n", + "\n", + "# close = basic_data.xloc[\"09:30\":\"10:00\"].close" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#PIPELINE - FOR - LOOP\n", + "\n", + "#indicator parameters\n", + "mom_timeperiod = list(range(2, 12))\n", + "\n", + "#uzavreni okna od 1 do 200\n", + "#entry_window_closes = list(range(2, 50, 3))\n", + "entry_window_closes = [5, 10, 30, 45]\n", + "#entry_window_closes = 30\n", + "#threshold entries parameters\n", + "#long\n", + "mom_th = np.round(np.arange(0.01, 0.5 + 0.02, 0.02),4).tolist()#-0.02\n", + "# short\n", + "#mom_th = np.round(np.arange(-0.01, -0.3 - 0.02, -0.02),4).tolist()#-0.02\n", + "roc_th = np.round(np.arange(-0.2, -0.8 - 0.05, -0.05),4).tolist()#-0.2\n", + "#print(mom_th, roc_th)\n", + "\n", + "#portfolio simulation parameters\n", + "sl_stop =np.round(np.arange(0.02/100, 0.7/100, 0.05/100),4).tolist()\n", + "tp_stop = np.round(np.arange(0.02/100, 0.7/100, 0.05/100),4).tolist()\n", + "\n", + "combs = list(product(mom_timeperiod, mom_th, roc_th, sl_stop, tp_stop))\n", + "\n", + "@vbt.parameterized(merge_func = \"concat\", random_subset = 2000, show_progress=True) \n", + "def test_strat(entry_window_closes=60,\n", + " mom_timeperiod=2,\n", + " mom_th=-0.04,\n", + " #roc_th=-0.2,\n", + " sl_stop=0.19/100,\n", + " tp_stop=0.19/100):\n", + " # mom_timeperiod=2\n", + " # mom_th=-0.06\n", + " # roc_th=-0.2\n", + " # sl_stop=0.04/100\n", + " # tp_stop=0.04/100\n", + "\n", + " momshort = vbt.indicator(\"talib:MOM\").run(basic_data.close, timeperiod=mom_timeperiod, short_name = \"slope_short\")\n", + " rocp = vbt.indicator(\"talib:ROC\").run(basic_data.close, short_name = \"rocp\")\n", + " #rate of change + momentum\n", + "\n", + " #momshort.plot rocp.real_crossed_below(roc_th) & \n", + " #short_signal = momshort.real_crossed_below(mom_th)\n", + " long_signal = momshort.real_crossed_above(mom_th)\n", + " # print(\"short signal\")\n", + " # print(short_signal.value_counts())\n", + "\n", + " #forced_exit = pd.Series(False, index=close.index)\n", + " forced_exit = basic_data.symbol_wrapper.fill(False)\n", + " #entry_window_open = pd.Series(False, index=close.index)\n", + " entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + " #print(entry_window_closes, \"entry window closes\")\n", + " # Calculate the time difference in minutes from market open for each timestamp\n", + " elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + " entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + " #print(entry_window_open.value_counts())\n", + "\n", + " forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + " #short_entries = (short_signal & entry_window_open)\n", + " #short_exits = forced_exit\n", + " entries = (long_signal & entry_window_open)\n", + " exits = forced_exit\n", + " #long_entries.info()\n", + " #number of trues and falses in long_entries\n", + " #print(short_exits.value_counts())\n", + " #print(short_entries.value_counts())\n", + "\n", + " #fig = plot_2y_close([],[momshort, rocp], close)\n", + " #short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " #print(sl_stop)\n", + " #tsl_th=sl_stop, \n", + " #short_entries=short_entries, short_exits=short_exits,\n", + " pf = vbt.Portfolio.from_signals(close=basic_data.close, entries=entries, exits=exits, tsl_stop=sl_stop, tp_stop = tp_stop, fees=0.0167/100, freq=\"1s\", price=\"close\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + " \n", + " return pf.stats([\n", + " 'total_return',\n", + " 'max_dd', \n", + " 'total_trades', \n", + " 'win_rate', \n", + " 'expectancy'\n", + " ])\n", + "\n", + "pf_results = test_strat(vbt.Param(entry_window_closes),\n", + " vbt.Param(mom_timeperiod),\n", + " vbt.Param(mom_th),\n", + " #vbt.Param(roc_th)\n", + " vbt.Param(sl_stop),\n", + " vbt.Param(tp_stop, condition=\"tp_stop > sl_stop\"))\n", + "pf_results = pf_results.unstack(level=-1)\n", + "pf_results.sort_values(by=[\"Total Return [%]\", \"Max Drawdown [%]\"], ascending=[False, True])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#pf_results.load(\"10tiscomb.pickle\")\n", + "#pf_results.info()\n", + "\n", + "vbt.save(pf_results, \"8tiscomb_tsl.pickle\")\n", + "\n", + "# pf_results = vbt.load(\"8tiscomb_tsl.pickle\")\n", + "# pf_results\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# parallel_coordinates method¶\n", + "\n", + "# attach_px_methods..plot_func(\n", + "# *args,\n", + "# layout=None,\n", + "# **kwargs\n", + "# )\n", + "\n", + "# pf_results.vbt.px.parallel_coordinates() #ocdf\n", + "\n", + "res = pf_results.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf_results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.decomposition import PCA\n", + "from sklearn.preprocessing import StandardScaler\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Assuming pf_results is your DataFrame\n", + "# Convert columns to numeric, assuming NaNs where conversion fails\n", + "metrics = ['Total Return [%]', 'Max Drawdown [%]', 'Total Trades']\n", + "for metric in metrics:\n", + " pf_results[metric] = pd.to_numeric(pf_results[metric], errors='coerce')\n", + "\n", + "# Handle missing values, for example filling with the median\n", + "pf_results['Max Drawdown [%]'].fillna(pf_results['Max Drawdown [%]'].median(), inplace=True)\n", + "\n", + "# Extract the metrics into a new DataFrame\n", + "data_for_pca = pf_results[metrics]\n", + "\n", + "# Standardize the data before applying PCA\n", + "scaler = StandardScaler()\n", + "data_scaled = scaler.fit_transform(data_for_pca)\n", + "\n", + "# Apply PCA\n", + "pca = PCA(n_components=2) # Adjust components as needed\n", + "principal_components = pca.fit_transform(data_scaled)\n", + "\n", + "# Create a DataFrame with the principal components\n", + "pca_results = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])\n", + "\n", + "# Visualize the results\n", + "plt.figure(figsize=(8,6))\n", + "plt.scatter(pca_results['PC1'], pca_results['PC2'], alpha=0.5)\n", + "plt.xlabel('Principal Component 1')\n", + "plt.ylabel('Principal Component 2')\n", + "plt.title('PCA of Strategy Optimization Results')\n", + "plt.grid(True)\n", + "plt.savefig(\"ddd.png\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check if there is any unnamed level and rename it\n", + "if None in df.index.names:\n", + " # Generate new names list replacing None with 'stat'\n", + " new_names = ['stat' if name is None else name for name in df.index.names]\n", + " df.index.set_names(new_names, inplace=True)\n", + "\n", + "rs= df\n", + "\n", + "rs.info()\n", + "\n", + "\n", + "# # Now, 'stat' is the name of the previously unnamed level\n", + "\n", + "# # Filter for 'Total Return' assuming it is a correct identifier in the 'stat' level\n", + "# total_return_series = df.xs('Total Return [%]', level='stat')\n", + "\n", + "# # Sort the Series to get the largest 'Total Return' values\n", + "# sorted_series = total_return_series.sort_values(ascending=False)\n", + "\n", + "# # Print the sorted filtered data\n", + "# sorted_series.head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sorted_series.vbt.save()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#df.info()\n", + "total_return_series = df.xs('Total Return [%]')\n", + "sorted_series = total_return_series.sort_values(ascending=False)\n", + "\n", + "# Display the top N entries, e.g., top 5\n", + "sorted_series.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "comb_stats_df.nlargest(10, 'Total Return [%]')\n", + "#stats_df.info()\n", + "\n", + "\n", + "8\t-0.06\t-0.2\t0.0028\t0.0048\t4.156254\n", + "4 -0.02 -0.25 0.0028 0.0048 0.84433\n", + "3 -0.02 -0.25 0.0033 0.0023 Total Return [%] 0.846753\n", + "#2\t-0.04\t-0.2\t0.0019\t0.0019\n", + "# 2\t-0.04\t-0.2\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.25\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.3\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.35\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.4\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.2\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.25\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.3\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.35\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.4\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basic_data.symbols" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + ">>> def apply_func(ts, entries, exits, fastw, sloww, minp=None):\n", + "... fast_ma = vbt.nb.rolling_mean_nb(ts, fastw, minp=minp)\n", + "... slow_ma = vbt.nb.rolling_mean_nb(ts, sloww, minp=minp)\n", + "... entries[:] = vbt.nb.crossed_above_nb(fast_ma, slow_ma) \n", + "... exits[:] = vbt.nb.crossed_above_nb(slow_ma, fast_ma)\n", + "... return (fast_ma, slow_ma) \n", + "\n", + ">>> CrossSig = vbt.IF(\n", + "... class_name=\"CrossSig\",\n", + "... input_names=['ts'],\n", + "... in_output_names=['entries', 'exits'],\n", + "... param_names=['fastw', 'sloww'],\n", + "... output_names=['fast_ma', 'slow_ma']\n", + "... ).with_apply_func(\n", + "... apply_func,\n", + "... in_output_settings=dict(\n", + "... entries=dict(dtype=np.bool_), #initialize output with bool\n", + "... exits=dict(dtype=np.bool_)\n", + "... )\n", + "... )\n", + ">>> cross_sig = CrossSig.run(ts2, 2, 4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#PIPELINE - parameters in one go\n", + "\n", + "\n", + "#TOTO prepsat do FOR-LOOPu\n", + "\n", + "\n", + "#indicator parameters\n", + "mom_timeperiod = list(range(2, 6))\n", + "\n", + "#threshold entries parameters\n", + "mom_th = np.round(np.arange(-0.02, -0.1 - 0.02, -0.02),4).tolist()#-0.02\n", + "roc_th = np.round(np.arange(-0.2, -0.4 - 0.05, -0.05),4).tolist()#-0.2\n", + "#print(mom_th, roc_th)\n", + "#jejich product\n", + "# mom_th_prod, roc_th_prod = zip(*product(mom_th, roc_th))\n", + "\n", + "# #convert threshold to vbt param\n", + "# mom_th_index = vbt.Param(mom_th_prod, name='mom_th_th') \n", + "# roc_th_index = vbt.Param(roc_th_prod, name='roc_th_th')\n", + "\n", + "mom_th = vbt.Param(mom_th, name='mom_th')\n", + "roc_th = vbt.Param(roc_th, name='roc_th')\n", + "\n", + "#portfolio simulation parameters\n", + "sl_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "sl_stop = [round(val, 4) for val in sl_stop]\n", + "tp_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "tp_stop = [round(val, 4) for val in tp_stop]\n", + "sl_stop = vbt.Param(sl_stop) #np.nan mean s no stoploss\n", + "tp_stop = vbt.Param(tp_stop) #np.nan mean s no stoploss\n", + "\n", + "\n", + "#def test_mom(window=14, mom_th=0.2, roc_th=0.2, sl_stop=0.03/100, tp_stop=0.03/100):\n", + "#close = basic_data.xloc[\"09:30\":\"10:00\"].close\n", + "momshort = vbt.indicator(\"talib:MOM\").run(basic_data.get(\"Close\"), timeperiod=mom_timeperiod, short_name = \"slope_short\")\n", + "\n", + "#ht_trendline = vbt.indicator(\"talib:HT_TRENDLINE\").run(close, short_name = \"httrendline\")\n", + "rocp = vbt.indicator(\"talib:ROC\").run(basic_data.get(\"Close\"), short_name = \"rocp\")\n", + "#rate of change + momentum\n", + "\n", + "rocp_signal = rocp.real_crossed_below(mom_th)\n", + "mom_signal = momshort.real_crossed_below(roc_th)\n", + "\n", + "#mom_signal\n", + "print(rocp_signal.info())\n", + "print(mom_signal.info())\n", + "#print(rocp.real)\n", + "\n", + "\n", + "short_signal = (mom_signal.vbt & rocp_signal)\n", + "\n", + "# #short_signal = (rocp.real_crossed_below(roc_th_index) & momshort.real_crossed_below(mom_th_index))\n", + "# forced_exit = m1_data.symbol_wrapper.fill(False)\n", + "# entry_window_open= m1_data.symbol_wrapper.fill(False)\n", + "\n", + "\n", + "# # Calculate the time difference in minutes from market open for each timestamp\n", + "# elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "# entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "# forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "# short_entries = (short_signal & entry_window_open)\n", + "# short_exits = forced_exit\n", + "# #long_entries.info()\n", + "# #number of trues and falses in long_entries\n", + "# #short_exits.value_counts()\n", + "# #short_entries.value_counts()\n", + "\n", + "\n", + "# pf = vbt.Portfolio.from_signals(close=close, short_entries=short_entries, short_exits=short_exits, sl_stop=sl_stop, tp_stop = tp_stop, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# filter dates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#filter na dny\n", + "dates_of_interest = pd.to_datetime(['2024-04-22']).tz_localize('US/Eastern')\n", + "filtered_df = df.loc[df.index.normalize().isin(dates_of_interest)]\n", + "\n", + "df = filtered_df\n", + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import plotly.io as pio\n", + "# pio.renderers.default = 'notebook'\n", + "\n", + "#naloadujeme do vbt symbol as column\n", + "basic_data = vbt.Data.from_data({\"BAC\": df}, tz_convert=zoneNY)\n", + "\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "#basic_data.data[\"BAC\"].vbt.ohlcv.plot()\n", + "\n", + "#basic_data.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "\n", + "m1_data.data[\"BAC\"]\n", + "#m5_data = m1_data.resample(\"5T\")\n", + "\n", + "#m5_data.data[\"BAC\"].head(10)\n", + "\n", + "# m15_data = m1_data.resample(\"15T\")\n", + "\n", + "# m15 = m15_data.data[\"BAC\"]\n", + "\n", + "# m15.vbt.ohlcv.plot()\n", + "\n", + "# m1_data.wrapper.index\n", + "\n", + "# m1_resampler = m1_data.wrapper.get_resampler(\"1T\")\n", + "# m1_resampler.index_difference(reverse=True)\n", + "\n", + "\n", + "# m5_resampler.prettify()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MOM indicator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.phelp(vbt.indicator(\"talib:ROCP\").run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "vyuzití rychleho klesani na sekundove urovni behem open rush\n", + "- MOM + ROC during open rush\n", + "- short signal\n", + "- pipeline kombinace thresholdu pro vstup mom_th, roc_th + hodnota sl_stop a tp_stop (pripadne trailing) - nalezeni optimalni kombinace atributu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# fig = plot_2y_close([ht_trendline],[momshort, rocp], close)\n", + "# short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + "\n", + "#parameters (primary y line, secondary y line, close)\n", + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig\n", + "\n", + "close = m1_data.xloc[\"09:30\":\"10:00\"].close\n", + "momshort = vbt.indicator(\"talib:MOM\").run(close, timeperiod=3, short_name = \"slope_short\")\n", + "ht_trendline = vbt.indicator(\"talib:HT_TRENDLINE\").run(close, short_name = \"httrendline\")\n", + "rocp = vbt.indicator(\"talib:ROC\").run(close, short_name = \"rocp\")\n", + "#rate of change + momentum\n", + "short_signal = (rocp.real_crossed_below(-0.2) & momshort.real_crossed_below(-0.02))\n", + "#indlong = vbt.indicator(\"talib:MOM\").run(close, timeperiod=10, short_name = \"slope_long\")\n", + "fig = plot_2y_close([ht_trendline],[momshort, rocp], close)\n", + "short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = m1_data.close\n", + "#vbt.phelp(vbt.OLS.run)\n", + "\n", + "#oer steepmnes of regression line\n", + "#talib.LINEARREG_SLOPE(close, timeperiod=timeperiod)\n", + "#a také ON BALANCE VOLUME - http://5.161.179.223:8000/static/js/vbt/api/indicators/custom/obv/index.html\n", + "\n", + "\n", + "\n", + "mom_ind = vbt.indicator(\"talib:MOM\") \n", + "#vbt.phelp(mom_ind.run)\n", + "\n", + "mom = mom_ind.run(close, timeperiod=10)\n", + "\n", + "plot_2y_close(mom, close)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# defining ENTRY WINDOW and forced EXIT window" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#m1_data.data[\"BAC\"].info()\n", + "import datetime\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 2\n", + "entry_window_closes = 30\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "forced_exit = m1_data.symbol_wrapper.fill(False)\n", + "entry_window_open= m1_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "\n", + "#entry_window_open.info()\n", + "# forced_exit.tail(100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = m1_data.close\n", + "\n", + "#rsi = vbt.RSI.run(close, window=14)\n", + "\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "#short_exits.value_counts()\n", + "short_entries.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_rsi(close, entries, exits):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"RSI\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " #rsi.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " entries.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + " exits.vbt.signals.plot_as_exits(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + " return fig\n", + "\n", + "plot_rsi(close, short_entries, short_exits)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.phelp(vbt.Portfolio.from_signals)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sl_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "sl_stop = [round(val, 4) for val in sl_stop]\n", + "print(sl_stop)\n", + "sl_stop = vbt.Param(sl_stop) #np.nan mean s no stoploss\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=close, short_entries=short_entries, short_exits=short_exits, sl_stop=0.03/100, tp_stop = 0.03/100, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "#pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#list of orders\n", + "#pf.orders.records_readable\n", + "#pf.orders.plots()\n", + "#pf.stats()\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0015,0.0013)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[0.03].plot_trade_signals()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# pristup k pf jako multi index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#pf[0.03].plot()\n", + "#pf.order_records\n", + "pf[(0.03)].stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#zgrupovane statistiky\n", + "stats_df = pf.stats([\n", + " 'total_return',\n", + " 'total_trades',\n", + " 'win_rate',\n", + " 'expectancy'\n", + "], agg_func=None)\n", + "stats_df\n", + "\n", + "\n", + "stats_df.nlargest(10, 'Total Return [%]')\n", + "#stats_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0011,0.0013000000000000002)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pandas.tseries.offsets import DateOffset\n", + "\n", + "temp_data = basic_data['2024-4-22']\n", + "temp_data\n", + "res1m = temp_data[[\"Open\", \"High\", \"Low\", \"Close\", \"Volume\"]]\n", + "\n", + "# Define a custom date offset that starts at 9:30 AM and spans 4 hours\n", + "custom_offset = DateOffset(hours=4, minutes=30)\n", + "\n", + "# res1m = res1m.get().resample(\"4H\").agg({ \n", + "# \"Open\": \"first\",\n", + "# \"High\": \"max\",\n", + "# \"Low\": \"min\",\n", + "# \"Close\": \"last\",\n", + "# \"Volume\": \"sum\"\n", + "# })\n", + "\n", + "res4h = res1m.resample(\"1h\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res4h.data\n", + "\n", + "res15m = res1m.resample(\"15T\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res15m.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@vbt.njit\n", + "def long_entry_place_func_nb(c, low, close, time_in_ns, rsi14, window_open, window_close):\n", + " market_open_minutes = 570 # 9 hours * 60 minutes + 30 minutes\n", + "\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + "\n", + " current_minutes = vbt.dt_nb.hour_nb(time_in_ns[i]) * 60 + vbt.dt_nb.minute_nb(time_in_ns[i])\n", + " #print(\"current_minutes\", current_minutes)\n", + " # Calculate elapsed minutes since market open at 9:30 AM\n", + " elapsed_from_open = current_minutes - market_open_minutes\n", + " elapsed_from_open = elapsed_from_open if elapsed_from_open >= 0 else 0\n", + " #print( \"elapsed_from_open\", elapsed_from_open)\n", + "\n", + " #elapsed_from_open = elapsed_minutes_from_open_nb(time_in_ns) \n", + " in_window = elapsed_from_open > window_open and elapsed_from_open < window_close\n", + " #print(\"in_window\", in_window)\n", + " # if in_window:\n", + " # print(\"in window\")\n", + "\n", + " if in_window and rsi14[i] > 60: # and low[i, c.col] <= hit_price: # and hour == 9: # (4)!\n", + " return out_i\n", + " return -1\n", + "\n", + "@vbt.njit\n", + "def long_exit_place_func_nb(c, high, close, time_index, tp, sl): # (5)!\n", + " entry_i = c.from_i - c.wait\n", + " entry_price = close[entry_i, c.col]\n", + " hit_price = entry_price * (1 + tp)\n", + " stop_price = entry_price * (1 - sl)\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " last_bar_of_day = vbt.dt_nb.day_changed_nb(time_index[i], time_index[i + 1])\n", + "\n", + " #print(next_day)\n", + " if last_bar_of_day: #pokud je dalsi next day, tak zavirame posledni\n", + " print(\"ted\",out_i)\n", + " return out_i\n", + " if close[i, c.col] >= hit_price or close[i, c.col] <= stop_price :\n", + " return out_i\n", + " return -1\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(np.random.random(size=(5, 10)), columns=list('abcdefghij'))\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.sum()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/strat_LINREG_MULTI/v1_SINGLE.ipynb b/research/strat_LINREG_MULTI/v1_SINGLE.ipynb new file mode 100644 index 0000000..e1f1412 --- /dev/null +++ b/research/strat_LINREG_MULTI/v1_SINGLE.ipynb @@ -0,0 +1,584 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Multi timeframe momentum\n", + "Cílen je nalézt kombinaci trendu, kdy je velmi pravdě+podobné, že trend bude o určitou hodnootu ještě pokračovat.\n", + "\n", + "jsou počítány linregression úhly pro více timeframů a délku oken\n", + "\n", + "Pro každou kombinaci je daný parametr nad kterým musí být. Pokud je nad všemi pak je entry (short/long).\n", + "\n", + "Zvážit i nějaký kumulativní počítadlo anglů - něco jako trend kummulátor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from v2realbot.tools.loadbatch import load_batch\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n", + "\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "#LOAD FROM BATCH\n", + "# res, df = load_batch(batch_id=\"f1ac6651\", #138170bc 0fb5043a bde6d0be f1ac6651\n", + "# space_resolution_evenly=False,\n", + "# indicators_columns=[\"Rsi14\"],\n", + "# main_session_only=True,\n", + "# verbose = False)\n", + "# if res < 0:\n", + "# print(\"Error\" + str(res) + str(df))\n", + "# df = df[\"bars\"]\n", + "\n", + "# basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": df}), tz_convert=zoneNY)\n", + "# #m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "# basic_data = basic_data.transform(lambda df: df.between_time('09:30', '16:00'))\n", + "# #basic_data.info()\n", + "\n", + "#LOAD FROM PARQUET\n", + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "print('\\n'.join(map(str, files)))\n", + "file_name = \"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet\"\n", + "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", + "#filter ohlcv_df to certain date range (assuming datetime index)\n", + "ohlcv_df = ohlcv_df.loc[\"2024-02-12 10:30\":\"2024-02-14 12:00\"]\n", + "\n", + "#add vwap column to ohlcv_df\n", + "#ohlcv_df[\"hlcc4\"] = (ohlcv_df[\"close\"] + ohlcv_df[\"high\"] + ohlcv_df[\"low\"] + ohlcv_df[\"close\"]) / 4\n", + "\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": ohlcv_df}), tz_convert=zoneNY)\n", + "ohlcv_df= None" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Add resample function to custom columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro.utils.config import merge_dicts, Config, HybridConfig\n", + "from vectorbtpro import _typing as tp\n", + "from vectorbtpro.generic import nb as generic_nb\n", + "\n", + "_feature_config: tp.ClassVar[Config] = HybridConfig(\n", + " {\n", + " \"buyvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " ),\n", + " \"sellvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " )\n", + " }\n", + ")\n", + "\n", + "basic_data._feature_config = _feature_config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#asic_data.stats()\n", + "basic_data.wrapper.index.normalize().nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t1data.data[\"BAC\"].buyvolume" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t1data.data[\"BAC\"].sellvolume" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "buyvolume = t1data.data[\"BAC\"].buyvolume\n", + "sellvolume = t1data.data[\"BAC\"].sellvolume\n", + "totalvolume = buyvolume + sellvolume\n", + "\n", + "#adjust to minimal value to avoid division by zero\n", + "sellvolume_adjusted = sellvolume.replace(0, 1e-10)\n", + "oibratio = buyvolume / sellvolume\n", + "\n", + "#cumulative order flow (net difference)\n", + "cof = buyvolume - sellvolume\n", + "\n", + "# Calculate the order imbalance (net differene) normalize the order imbalance by calculating the difference between buy and sell volumes and then scaling it by the total volume.\n", + "order_imbalance = cof / totalvolume\n", + "\n", + "order_imbalance_allvolume = cof / t1data.data[\"BAC\"].volume" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#priminds list (cena), secinds list (napr. rsi), close, voluminds (volume based)\n", + "def plot_2y_close(priminds, secinds, close, volume):\n", + " fig = vbt.make_subplots(rows=2, cols=1, shared_xaxes=True, \n", + " specs=[[{\"secondary_y\": True}], [{\"secondary_y\": False}]], \n", + " vertical_spacing=0.02, subplot_titles=(\"Price and Indicators\", \"Volume\"))\n", + "\n", + " # Plotting the close price\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False,row=1, col=1), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " \n", + " # Plotting primary indicators on the first row\n", + " for ind in priminds:\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + " \n", + " # Plotting secondary indicators on the first row\n", + " for ind in secinds:\n", + " #ind = ind.rename(str(ind.name))\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1))\n", + " \n", + " for indvolume in volume:\n", + " # Plotting the volume on the second row\n", + " indvolume.rename(str(indvolume.name)).vbt.barplot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=2, col=1))\n", + " #vbt.Bar(indvolume, fig=fig, add_trace_kwargs=dict(secondary_y=False, row=2, col=1))\n", + " \n", + " return fig\n", + "\n", + "plot_2y_close([], [cof,oibratio], t1data.close, [t1data.data[\"BAC\"].buyvolume, t1data.data[\"BAC\"].sellvolume, t1data.volume])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "t0data = basic_data\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"1T\")\n", + "t2data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"15T\")\n", + "t3data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"30T\")\n", + "t4data = basic_data[['open', 'high', 'low', 'close', 'volume', 'vwap']].resample(\"D\").dropna()\n", + "\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t2data = t2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t3data = t3data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "#30min data to daily\n", + "# t4data = t3data.resample(\"D\").dropna()\n", + "\n", + "#t4data = t4data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "#m1data.data[\"SPY\"].info()\n", + "\n", + "#m1data.data[\"SPY\"].vbt.ohlcv.plot()\n", + "#h2data.data[\"SPY\"].vbt.ohlcv.plot()\n", + "#ddata.data[\"SPY\"]\n", + "t2data.data[\"BAC\"].vbt.ohlcv.plot().show()\n", + "\n", + "\n", + "#t4data.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t2data.close\n", + "\n", + "#in df remove rows with nan\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#realign na 1T = t1data + oriznout main session\n", + "t2data_vwap = t2data.vwap.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "t3data_vwap = t3data.vwap.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "t4data_vwap = t4data.vwap.vbt.realign_closing(\"1T\").dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t2data_vwap" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t4data.clos.vbt \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "obvind = vbt.indicator.obv.run(t1data.close, t1data.volume)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t1_lengtgh = 15\n", + "t2_length = 15\n", + "t3_length = 15\n", + "t4_length = 5\n", + "t1_th = 0.1\n", + "t2_th = 0.1\n", + "t3_th = 0.1\n", + "t4_th = 0.1\n", + "\n", + "\n", + "\n", + "#minute\n", + "t1slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t1data.close, timeperiod=t1_lengtgh) # -0.09, 0.09\n", + "t2slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t2data.vwap, timeperiod=t2_length) # -0.08 , 0.079\n", + "t3slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t3data.vwap, timeperiod=t3_length) # -0.08, 0.08\n", + "#daily\n", + "t4slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t4data.vwap, timeperiod=t4_length) # -0.1, 0.09\n", + "\n", + "plot_2y_close(priminds=[], secinds=[t1slope, t2slope, t3slope, t4slope], close=t1data.close).show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#thirtymin_slope = thirtymin_slope.real.rename(\"30min\") #timto se prejmenuje real na 30min\n", + "t3slope = t3slope.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "##filter daily_slope_to_compare to only monday to friday\n", + "t3slope = t3slope[t3slope.index.dayofweek < 5]\n", + "\n", + "#t3slope.info()\n", + "\n", + "t2slope = t2slope.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "##filter daily_slope_to_compare to only monday to friday\n", + "t2slope = t2slope[t2slope.index.dayofweek < 5]\n", + "\n", + "t2slope.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "oibratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#\n", + "short_entries = order_imbalance.vbt < 0.0002\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_entries.value_counts()\n", + "\n", + "entries = order_imbalance.vbt > 0.7\n", + "#entries = oibratio.vbt > 10\n", + "entries.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = vbt.make_subplots(rows=3, cols=1, shared_xaxes=True, \n", + " specs=[[{\"secondary_y\": True}], [{\"secondary_y\": True}], [{\"secondary_y\": False}]], \n", + " vertical_spacing=0.02, subplot_titles=(\"Price and Indicators\", \"Volume\"))\n", + "t1data.data[\"BAC\"].vbt.ohlcv.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + "#oibratio.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1))\n", + "order_imbalance.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1))\n", + "entries.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"LONGS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"limegreen\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ), add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + "\n", + "short_entries.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"SHORTS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"red\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ), add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# thirtymin_slope_to_compare.vbt.xloc[\"04-16-2024\"].get()\n", + "thirty_down_signal.vbt.xloc[\"04-16-2024\"].get()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#short_signal = t1slope.real_below(t1_th) & t2slope.real_below(t2_th) & t3slope.real_below(t3_th) & t4slope.real_below(t4_th)\n", + "#long_signal = t1slope.real_above(t1_th) & t2slope.real_above(t2_th) & t3slope.real_above(t3_th) & t4slope.real_above(t4_th)\n", + "\n", + "#test na daily s reversem crossed 0\n", + "short_signal = t2slope.vbt < -0.01 & t3slope.vbt < -0.01 #min value of threshold\n", + "long_signal = t2slope.vbt > 0.01 & t3slope.vbt > 0.01 #min\n", + "\n", + "# thirty_up_signal = t3slope.vbt.crossed_above(0.01)\n", + "# thirty_down_signal = t3slope.vbt.crossed_below(-0.01)\n", + "\n", + "fig = plot_2y_close(priminds=[], secinds=[t3slope], close=t1data.close)\n", + "#short_signal.vbt.signals.plot_as_entries(basic_data.close, fig=fig)\n", + "\n", + "short_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"SHORTS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"red\", symbol=\"triangle-down\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "long_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"LONGS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"limegreen\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "\n", + "# thirty_down_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"DOWN30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"yellow\", symbol=\"triangle-down\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "# thirty_up_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"UP30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"grey\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "# thirtymin_slope_to_compare.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True), trace_kwargs=dict(name=\"30min slope\",\n", + "# line=dict(color=\"yellow\"), \n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "fig.show()\n", + "# print(\"short signal\")\n", + "# print(short_signal.value_counts())\n", + "\n", + "#forced_exit = pd.Series(False, index=close.index)\n", + "forced_exit = basic_data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signal & entry_window_open)\n", + "exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "# print(short_exits.value_counts())\n", + "# print(short_entries.value_counts())\n", + "\n", + "#fig = plot_2y_close([],[momshort, rocp], close)\n", + "#short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + "#print(sl_stop)\n", + "#short_entries=short_entries, short_exits=short_exits,\n", + "# pf = vbt.Portfolio.from_signals(close=basic_data, entries=short_entries, exits=exits, tsl_stop=0.005, tp_stop = 0.05, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "# pf.stats()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.get_drawdowns().records_readable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.orders.records_readable" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/strat_ORDER_IMBALANCE/.ipynb_checkpoints/v2_SINGLE-checkpoint.ipynb b/research/strat_ORDER_IMBALANCE/.ipynb_checkpoints/v2_SINGLE-checkpoint.ipynb new file mode 100644 index 0000000..58a878c --- /dev/null +++ b/research/strat_ORDER_IMBALANCE/.ipynb_checkpoints/v2_SINGLE-checkpoint.ipynb @@ -0,0 +1,1118 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ORDER Imbalance\n", + "\n", + "* introduced buyvolume and sellvolume on bar level.\n", + "* calculated order imbalance ratio (buyvolume-sellvolume/totalvolume)\n", + "* calculated on multiple timeframes\n", + "* entry based on confluences imbalances\n", + "\n", + "## Note\n", + "\n", + "Order disbalance nepodminuje zmenu ceny (tzn. muze byt order disbalance na buy stranu, ale cena nemusi jit nahoru a naopak)\n", + "Nicmene pokud je disbalance delsi a nedochazi ke zmene ceny - může to něco indikovat. \n", + "Vytvořit si kumulativní disbalance - kumulátory, které se budou načítat, když se budou silné disbalance, bez změny ceny. Tento akumulátor se bude nabíjet disbalancí a vybíjet příslušnou změnou ceny." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import load_dotenv\n", + "\n", + "#as V2realbot is client , load env variables here\n", + "env_file = \"/Users/davidbrazda/Documents/Development/python/.env\"\n", + "# Load the .env file\n", + "load_dotenv(env_file)\n", + "\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "# from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "from v2realbot.config import DATA_DIR\n", + "from lightweight_charts import JupyterChart, chart, Panel\n", + "from IPython.display import display\n", + "\n", + "# init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "#LOAD FROM PARQUET\n", + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "print('\\n'.join(map(str, files)))\n", + "file_name = \"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet\"\n", + "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", + "#filter ohlcv_df to certain date range (assuming datetime index)\n", + "#ohlcv_df = ohlcv_df.loc[\"2024-02-12 9:30\":\"2024-02-14 16:00\"]\n", + "\n", + "#add vwap column to ohlcv_df\n", + "#ohlcv_df[\"hlcc4\"] = (ohlcv_df[\"close\"] + ohlcv_df[\"high\"] + ohlcv_df[\"low\"] + ohlcv_df[\"close\"]) / 4\n", + "\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": ohlcv_df}), tz_convert=zoneNY)\n", + "ohlcv_df= None\n", + "basic_data.wrapper.index.normalize().nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add resample function to custom columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro.utils.config import merge_dicts, Config, HybridConfig\n", + "from vectorbtpro import _typing as tp\n", + "from vectorbtpro.generic import nb as generic_nb\n", + "\n", + "_feature_config: tp.ClassVar[Config] = HybridConfig(\n", + " {\n", + " \"buyvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " ),\n", + " \"sellvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " ),\n", + " \"trades\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " )\n", + " }\n", + ")\n", + "\n", + "basic_data._feature_config = _feature_config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basic_data.data[\"BAC\"].info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','trades','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t1data.data[\"BAC\"].info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bbands = vbt.talib(\"BBANDS\").run(\n", + " t1data.get(\"Close\"))\n", + "\n", + "\n", + "supertrend = vbt.SUPERTREND.run(t1data.high, t1data.low, t1data.close, period=14, multiplier=3)\n", + "#supertrend.output_names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "serka = supertrend.direction\n", + "\n", + "#a = serka.vbt.xloc[\"2024-02-12 09:30\":\"2024-02-12 09:32\"].obj\n", + "\n", + "a = serka.vbt.xloc[slice(\"2024-02-12 09:30\",\"2024-02-12 09:32\")].obj\n", + " \n", + "a\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = t1data.data[\"BAC\"]\n", + "\n", + "df.vbt.xloc[\"2024-02-12 09:30\":\"2024-02-12 09:32\"].obj" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "supertrend.trend" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pane1 = Panel(\n", + " ohlcv=(t1data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6), opacity\")]\n", + " right=[#(bbands,), #[(series, name, entries, exits, other_markers)]\n", + " (supertrend.trend,\"STtrend\"),\n", + " (supertrend.long,\"STlong\"),\n", + " (supertrend.short,\"STshort\")\n", + " ],\n", + " left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "pane2 = Panel(\n", + " ohlcv=(t1data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6), opacity\")]\n", + " right=[(bbands,), #[(series, name, entries, exits, other_markers)]\n", + " # (supertrend.trend,\"STtrend\"),\n", + " # (supertrend.long,\"STlong\"),\n", + " # (supertrend.short,\"STshort\")\n", + " ],\n", + " # left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "ch = chart([pane1, pane2], sync=True, size=\"s\", xloc=slice(\"2024-02-12 09:30\",\"2024-03-12\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "buyvolume = t1data.data[\"BAC\"].buyvolume\n", + "sellvolume = t1data.data[\"BAC\"].sellvolume\n", + "totalvolume = buyvolume + sellvolume\n", + "\n", + "#adjust to minimal value to avoid division by zero\n", + "sellvolume_adjusted = sellvolume.replace(0, 1e-10)\n", + "oibratio = buyvolume / sellvolume\n", + "\n", + "#cumulative order flow (net difference)\n", + "cof = buyvolume - sellvolume\n", + "\n", + "# Calculate the order imbalance (net differene) normalize the order imbalance by calculating the difference between buy and sell volumes and then scaling it by the total volume.\n", + "order_imbalance = cof / totalvolume\n", + "order_imbalance = order_imbalance.fillna(0) #nan nahradime 0\n", + "\n", + "order_imbalance_allvolume = cof / t1data.data[\"BAC\"].volume\n", + "\n", + "order_imbalance_sma = vbt.indicator(\"talib:EMA\").run(order_imbalance, timeperiod=5)\n", + "short_signals = order_imbalance.vbt < -0.5\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_signals.value_counts()\n", + "short_signals.name = \"short_entries\"\n", + "#.fillna(False)\n", + "short_exits = short_signals.shift(-2).fillna(False).astype(bool)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pane1 = Panel(\n", + " ohlcv=(t1data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " histogram=[(order_imbalance_allvolume, \"oib_allvolume\", \"rgba(53, 94, 59, 0.6)\",0.5),\n", + " (t1data.data[\"BAC\"].trades, \"trades\",None,0.4),\n", + " ], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n", + " # right=[\n", + " # (supertrend.trend,\"STtrend\"),\n", + " # (supertrend.long,\"STlong\"),\n", + " # (supertrend.short,\"STshort\")\n", + " # ],\n", + " # left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "pane2 = Panel(\n", + " ohlcv=(basic_data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " left=[(basic_data.data[\"BAC\"].trades, \"trades\")],\n", + " histogram=[(basic_data.data[\"BAC\"].trades, \"trades_hist\", \"white\", 0.5)], #\"rgba(53, 94, 59, 0.6)\"\n", + " # ], # [(series, name, \"rgba(53, 94, 59, 0.6)\")]\n", + " # right=[\n", + " # (supertrend.trend,\"STtrend\"),\n", + " # (supertrend.long,\"STlong\"),\n", + " # (supertrend.short,\"STshort\")\n", + " # ],\n", + " # left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "\n", + "ch = chart([pane1, pane2], size=\"m\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##z tohoto si udelat plot funkci (i pro entries,exits)\n", + "#t1data = t1data[[\"open\", \"high\", \"low\", \"close\", \"volume\"]]\n", + "chart = JupyterChart(width=1000, height=600, inner_width=1, inner_height=0.5, leftScale=True)\n", + "#set resolution\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "chart.set(t1data.data[\"BAC\"])\n", + "line_vwap = chart.create_line(name=\"vwap\")#, color=\"blue\")\n", + "line_vwap.set(t1data.vwap)\n", + "\n", + "\n", + "chart.topbar.textbox(\"title\",\"Nadpis\")\n", + "chart2 = chart.create_subchart(position='right', width=1, height=0.5, sync=True, leftScale=True)\n", + "t2data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"5T\")\n", + "t2data = t2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "\n", + "#5min close realigned to 1T\n", + "close_realigned = t2data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line1 = chart.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line1.set(close_realigned)\n", + "\n", + "#sma z realigned dat\n", + "sma_tp = 20\n", + "sma_t2 = vbt.indicator(\"talib:EMA\").run(close_realigned, timeperiod=sma_tp)\n", + "smaline = chart.create_line(name=f\"sma{sma_tp}\")#, color=\"blue\")\n", + "smaline.set(sma_t2)\n", + "\n", + "\n", + "#sma z puvodnich resamplovanych dat plus navic realign, melo by byt stejne \n", + "sma_real = vbt.indicator(\"talib:EMA\").run(t2data.close, timeperiod=sma_tp)\n", + "sma_real_value = sma_real.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "sma_real_value = sma_real_value[sma_real_value.index.dayofweek < 5]\n", + "smaline_real = chart.create_line(name=f\"smareal{sma_tp}\", color=\"yellow\")\n", + "smaline_real.set(sma_real_value)\n", + "\n", + "#resample 15T\n", + "t15data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"15T\")\n", + "t15data = t15data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "#5min close realigned to 1T\n", + "close_15realigned = t15data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_15realigned = close_15realigned[close_15realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line2 = chart.create_line(name=\"15minclose\")#, color=\"pink\")\n", + "line2.set(close_15realigned)\n", + "\n", + "\n", + "chart.legend(True)\n", + "hst = chart2.create_histogram(name=\"buyvolume\", color=\"rgba(53, 94, 59, 0.6)\") #green transparent\n", + "hst1 = chart2.create_histogram(name=\"sellvolume\", color=\"rgba(165, 42, 42, 0.6)\") #red transparent\n", + "hst.set(t1data.data[\"BAC\"])\n", + "hst1.set(t1data.data[\"BAC\"])\n", + "line2 = chart2.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line2.set(close_realigned)\n", + "\n", + "lineoib = chart2.create_line(name=\"oib\", priceScaleId=\"left\") #color=\"violet\", \n", + "#lineoib.scale(0.7,0)\n", + "lineoib.set(order_imbalance_allvolume)\n", + "\n", + "lineoib_sma = chart2.create_line(name=\"oibsma5\", priceScaleId=\"left\") #, color=\"blue\", \n", + "lineoib_sma.set(order_imbalance_sma)\n", + "\n", + "chart.fit()\n", + "chart2.legend(True)\n", + "#\n", + "line2.markers_set(short_signals, \"entries\")\n", + "# TODO jelikoz se davaji do jednoho pole je treba zajistit spravne sortovani\n", + "# domyslet jak to pojmout iterativni doplnovani markeru\n", + "line2.markers_set(short_exits, \"exits\")\n", + "\n", + "\n", + "chart2.fit()\n", + "chart.load()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "short_signals.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#vbt.IF.list_indicators(\"*ma\")\n", + "vbt.phelp(vbt.indicator(\"talib:EMA\").run)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sma = vbt.indicator(\"talib:EMA\").run(t1data.close, timeperiod=20)\n", + "sma.real.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rr = vbt.RSI.run(t1data.close)\n", + "type(rr)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "buyvolume = t1data.data[\"BAC\"].buyvolume\n", + "sellvolume = t1data.data[\"BAC\"].sellvolume\n", + "totalvolume = buyvolume + sellvolume\n", + "\n", + "#adjust to minimal value to avoid division by zero\n", + "sellvolume_adjusted = sellvolume.replace(0, 1e-10)\n", + "oibratio = buyvolume / sellvolume\n", + "\n", + "#cumulative order flow (net difference)\n", + "cof = buyvolume - sellvolume\n", + "\n", + "# Calculate the order imbalance (net differene) normalize the order imbalance by calculating the difference between buy and sell volumes and then scaling it by the total volume.\n", + "order_imbalance = cof / totalvolume\n", + "order_imbalance = order_imbalance.fillna(0) #nan nahradime 0\n", + "\n", + "order_imbalance_allvolume = cof / t1data.data[\"BAC\"].volume" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance_sma = vbt.indicator(\"talib:EMA\").run(order_imbalance, timeperiod=5)\n", + "short_signals = order_imbalance.vbt < -0.5\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_signals.value_counts()\n", + "short_signals.name = \"short_entries\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "short_signals.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "short_signals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance.fillna(0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance.vbt.plot()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chartN = JupyterChart(width=500, height=300, inner_width=1, inner_height=0.3, leftScale=True)\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "chartN.set(t1data.data[\"BAC\"])\n", + "line_sma = chartN.create_line(name=\"sma\", priceScaleId=\"right\")#, color=\"blue\")\n", + "line_sma.set(sma)\n", + "# line_sma.markers_set(short_signals, \"entries\")\n", + "# line_sma.markers_set(short_exits, \"exits\")\n", + "# hst = chartN.create_histogram(name=\"oivol\")\n", + "# hst.set(order_imbalance_allvolume)\n", + "# chartN.legend(True)\n", + "# chartN.fit()\n", + "\n", + "# subchart = chartN.create_subchart(position='right', width=1, height=0.5, sync=False, leftScale=True)\n", + "# # subchart.set(t1data.data[\"BAC\"])\n", + "# line_sma1 = subchart.create_line(name=\"smao\", priceScaleId=\"left\")#, color=\"blue\")\n", + "# line_sma1.set(sma)\n", + "# # line_sma1.markers_set(short_signals, \"entries\")\n", + "# # line_sma1.markers_set(short_exits, \"exits\")\n", + "# hsto = subchart.create_histogram(name=\"oivolo\")\n", + "# hsto.set(order_imbalance_sma)\n", + "\n", + "chart2 = chartN.create_subchart(position='left', width=1, height=0.5, sync=True, leftScale=True, toolbox=True)\n", + "# hst = chart2.create_histogram(name=\"buyvolume\", color=\"rgba(53, 94, 59, 0.6)\") #green transparent\n", + "# hst1 = chart2.create_histogram(name=\"sellvolume\", color=\"rgba(165, 42, 42, 0.6)\") #red transparent\n", + "# hst.set(t1data.data[\"BAC\"])\n", + "# hst1.set(t1data.data[\"BAC\"])\n", + "line2 = chart2.create_line(name=\"sma\")#, color=\"green\")\n", + "line2.set(sma)\n", + "chart2.topbar.textbox(\"title\",\"Nadpis\")\n", + "# chartN.topbar.textbox(\"title\",\"NadpisT\")\n", + "\n", + "# subchart.legend(True)\n", + "# subchart.fit()\n", + "chartN.load()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##z tohoto si udelat plot funkci (i pro entries,exits)\n", + "#t1data = t1data[[\"open\", \"high\", \"low\", \"close\", \"volume\"]]\n", + "chart = JupyterChart(width=1000, height=600, inner_width=1, inner_height=0.5, leftScale=True)\n", + "#set resolution\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "chart.set(t1data.data[\"BAC\"])\n", + "line_vwap = chart.create_line(name=\"vwap\")#, color=\"blue\")\n", + "line_vwap.set(t1data.vwap)\n", + "\n", + "\n", + "chart.topbar.textbox(\"title\",\"Nadpis\")\n", + "chart2 = chart.create_subchart(position='right', width=1, height=0.5, sync=True, leftScale=True)\n", + "t2data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"5T\")\n", + "t2data = t2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "\n", + "#5min close realigned to 1T\n", + "close_realigned = t2data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line1 = chart.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line1.set(close_realigned)\n", + "\n", + "#sma z realigned dat\n", + "sma_tp = 20\n", + "sma_t2 = vbt.indicator(\"talib:EMA\").run(close_realigned, timeperiod=sma_tp)\n", + "smaline = chart.create_line(name=f\"sma{sma_tp}\")#, color=\"blue\")\n", + "smaline.set(sma_t2)\n", + "\n", + "\n", + "#sma z puvodnich resamplovanych dat plus navic realign, melo by byt stejne \n", + "sma_real = vbt.indicator(\"talib:EMA\").run(t2data.close, timeperiod=sma_tp)\n", + "sma_real_value = sma_real.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "sma_real_value = sma_real_value[sma_real_value.index.dayofweek < 5]\n", + "smaline_real = chart.create_line(name=f\"smareal{sma_tp}\", color=\"yellow\")\n", + "smaline_real.set(sma_real_value)\n", + "\n", + "#resample 15T\n", + "t15data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"15T\")\n", + "t15data = t15data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "#5min close realigned to 1T\n", + "close_15realigned = t15data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_15realigned = close_15realigned[close_15realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line2 = chart.create_line(name=\"15minclose\")#, color=\"pink\")\n", + "line2.set(close_15realigned)\n", + "\n", + "\n", + "chart.legend(True)\n", + "hst = chart2.create_histogram(name=\"buyvolume\", color=\"rgba(53, 94, 59, 0.6)\") #green transparent\n", + "hst1 = chart2.create_histogram(name=\"sellvolume\", color=\"rgba(165, 42, 42, 0.6)\") #red transparent\n", + "hst.set(t1data.data[\"BAC\"])\n", + "hst1.set(t1data.data[\"BAC\"])\n", + "line2 = chart2.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line2.set(close_realigned)\n", + "\n", + "lineoib = chart2.create_line(name=\"oib\", priceScaleId=\"left\") #color=\"violet\", \n", + "#lineoib.scale(0.7,0)\n", + "lineoib.set(order_imbalance_allvolume)\n", + "\n", + "lineoib_sma = chart2.create_line(name=\"oibsma5\", priceScaleId=\"left\") #, color=\"blue\", \n", + "lineoib_sma.set(order_imbalance_sma)\n", + "\n", + "chart.fit()\n", + "chart2.legend(True)\n", + "#\n", + "line2.markers_set(short_signals, \"entries\")\n", + "# TODO jelikoz se davaji do jednoho pole je treba zajistit spravne sortovani\n", + "# domyslet jak to pojmout iterativni doplnovani markeru\n", + "line2.markers_set(short_exits, \"exits\")\n", + "\n", + "\n", + "chart2.fit()\n", + "chart.load()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sma.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#priminds list (same Y as price), secinds list (secondary Y napr. rsi), close, voluminds (volume based) list\n", + "def plot_2y_close(priminds, secinds, close, volumeinds, ohlcv=None):\n", + " fig = vbt.make_subplots(rows=2, cols=1, shared_xaxes=True, \n", + " specs=[[{\"secondary_y\": True}], [{\"secondary_y\": False}]], \n", + " vertical_spacing=0.02, subplot_titles=(\"Price and Indicators\", \"Volume\"))\n", + "\n", + " if ohlcv is not None:\n", + " ohlcv.vbt.ohlcv.plot(fig=fig, add_trace_kwargs=dict(row=1, col=1))\n", + "\n", + " # Plotting the close price\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False,row=1, col=1), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " \n", + " # Plotting primary indicators on the first row\n", + " for ind in priminds:\n", + " if isinstance(ind, pd.Series):\n", + " #if series has no name, make the name same as the variable name\n", + " \n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + " \n", + " # Plotting secondary indicators on the first row\n", + " for ind in secinds:\n", + " #ind = ind.rename(str(ind.name))\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1), trace_kwargs=dict(line=dict(color=\"rgba(255, 0, 0, 0.4)\")))\n", + " \n", + " for indvolume in volumeinds:\n", + " # Plotting the volume on the second row\n", + " indvolume.rename(str(indvolume.name)).vbt.barplot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=2, col=1))\n", + " #vbt.Bar(indvolume, fig=fig, add_trace_kwargs=dict(secondary_y=False, row=2, col=1))\n", + " \n", + " return fig\n", + "\n", + "fig = plot_2y_close([sma], [order_imbalance.rename(\"order_imbalance_norm\"),order_imbalance_sma.real.rename(\"oib_sma\")], t1data.close, [t1data.data[\"BAC\"].buyvolume, t1data.data[\"BAC\"].sellvolume, t1data.volume], t1data.data[\"BAC\"])\n", + "fig.update_yaxes(range=[33,34], secondary_y=False, row=1, col=1) #update y axis range\n", + "fig.update_yaxes(range=[-1,1], secondary_y=True, row=1, col=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "t0data = basic_data\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"1T\")\n", + "t2data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"15T\")\n", + "t3data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"30T\")\n", + "t4data = basic_data[['open', 'high', 'low', 'close', 'volume', 'vwap']].resample(\"D\").dropna()\n", + "\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t2data = t2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t3data = t3data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "#30min data to daily\n", + "# t4data = t3data.resample(\"D\").dropna()\n", + "\n", + "#t4data = t4data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "#m1data.data[\"SPY\"].info()\n", + "\n", + "#m1data.data[\"SPY\"].vbt.ohlcv.plot()\n", + "#h2data.data[\"SPY\"].vbt.ohlcv.plot()\n", + "#ddata.data[\"SPY\"]\n", + "t2data.data[\"BAC\"].vbt.ohlcv.plot().show()\n", + "\n", + "\n", + "#t4data.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t2data.close\n", + "\n", + "#in df remove rows with nan\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#realign na 1T = t1data + oriznout main session\n", + "t2data_vwap = t2data.vwap.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "t3data_vwap = t3data.vwap.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "t4data_vwap = t4data.vwap.vbt.realign_closing(\"1T\").dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t2data_vwap" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t4data.clos.vbt \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "obvind = vbt.indicator.obv.run(t1data.close, t1data.volume)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t1_lengtgh = 15\n", + "t2_length = 15\n", + "t3_length = 15\n", + "t4_length = 5\n", + "t1_th = 0.1\n", + "t2_th = 0.1\n", + "t3_th = 0.1\n", + "t4_th = 0.1\n", + "\n", + "\n", + "\n", + "#minute\n", + "t1slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t1data.close, timeperiod=t1_lengtgh) # -0.09, 0.09\n", + "t2slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t2data.vwap, timeperiod=t2_length) # -0.08 , 0.079\n", + "t3slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t3data.vwap, timeperiod=t3_length) # -0.08, 0.08\n", + "#daily\n", + "t4slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t4data.vwap, timeperiod=t4_length) # -0.1, 0.09\n", + "\n", + "plot_2y_close(priminds=[], secinds=[t1slope, t2slope, t3slope, t4slope], close=t1data.close).show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#thirtymin_slope = thirtymin_slope.real.rename(\"30min\") #timto se prejmenuje real na 30min\n", + "t3slope = t3slope.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "##filter daily_slope_to_compare to only monday to friday\n", + "t3slope = t3slope[t3slope.index.dayofweek < 5]\n", + "\n", + "#t3slope.info()\n", + "\n", + "t2slope = t2slope.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "##filter daily_slope_to_compare to only monday to friday\n", + "t2slope = t2slope[t2slope.index.dayofweek < 5]\n", + "\n", + "t2slope.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "oibratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#\n", + "short_signals = order_imbalance.vbt < -0.3\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_signals.value_counts()\n", + "\n", + "long_signals = order_imbalance.vbt > 0.3\n", + "#entries = oibratio.vbt > 10\n", + "long_signals.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = vbt.make_subplots(rows=3, cols=1, shared_xaxes=True, \n", + " specs=[[{\"secondary_y\": True}], [{\"secondary_y\": True}], [{\"secondary_y\": False}]], \n", + " vertical_spacing=0.02, subplot_titles=(\"Price and Indicators\", \"Volume\"))\n", + "t1data.data[\"BAC\"].vbt.ohlcv.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + "#oibratio.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1))\n", + "order_imbalance.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1))\n", + "long_signals.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"LONGS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"limegreen\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ), add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + "\n", + "short_signals.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"SHORTS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"red\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ), add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# thirtymin_slope_to_compare.vbt.xloc[\"04-16-2024\"].get()\n", + "thirty_down_signal.vbt.xloc[\"04-16-2024\"].get()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#short_signal = t1slope.real_below(t1_th) & t2slope.real_below(t2_th) & t3slope.real_below(t3_th) & t4slope.real_below(t4_th)\n", + "#long_signal = t1slope.real_above(t1_th) & t2slope.real_above(t2_th) & t3slope.real_above(t3_th) & t4slope.real_above(t4_th)\n", + "\n", + "#test na daily s reversem crossed 0\n", + "short_signal = t2slope.vbt < -0.01 & t3slope.vbt < -0.01 #min value of threshold\n", + "long_signal = t2slope.vbt > 0.01 & t3slope.vbt > 0.01 #min\n", + "\n", + "# thirty_up_signal = t3slope.vbt.crossed_above(0.01)\n", + "# thirty_down_signal = t3slope.vbt.crossed_below(-0.01)\n", + "\n", + "fig = plot_2y_close(priminds=[], secinds=[t3slope], close=t1data.close)\n", + "#short_signal.vbt.signals.plot_as_entries(basic_data.close, fig=fig)\n", + "\n", + "short_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"SHORTS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"red\", symbol=\"triangle-down\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "long_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"LONGS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"limegreen\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "\n", + "# thirty_down_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"DOWN30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"yellow\", symbol=\"triangle-down\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "# thirty_up_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"UP30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"grey\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "# thirtymin_slope_to_compare.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True), trace_kwargs=dict(name=\"30min slope\",\n", + "# line=dict(color=\"yellow\"), \n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "fig.show()\n", + "# print(\"short signal\")\n", + "# print(short_signal.value_counts())\n", + "\n", + "#forced_exit = pd.Series(False, index=close.index)\n", + "forced_exit = basic_data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signal & entry_window_open)\n", + "exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "# print(short_exits.value_counts())\n", + "# print(short_entries.value_counts())\n", + "\n", + "#fig = plot_2y_close([],[momshort, rocp], close)\n", + "#short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + "#print(sl_stop)\n", + "#short_entries=short_entries, short_exits=short_exits,\n", + "# pf = vbt.Portfolio.from_signals(close=basic_data, entries=short_entries, exits=exits, tsl_stop=0.005, tp_stop = 0.05, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "# pf.stats()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "forced_exit = t1data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= t1data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signals & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signals & entry_window_open)\n", + "exits = forced_exit\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=t1data, entries=entries, exits=exits, short_entries=short_entries, short_exits=exits,\n", + "td_stop=2, time_delta_format=\"rows\",\n", + "tsl_stop=0.005, tp_stop = 0.005, fees=0.0167/100)#, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.get_drawdowns().records_readable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.orders.records_readable" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/strat_ORDER_IMBALANCE/v1_MULTI.ipynb b/research/strat_ORDER_IMBALANCE/v1_MULTI.ipynb new file mode 100644 index 0000000..99c2982 --- /dev/null +++ b/research/strat_ORDER_IMBALANCE/v1_MULTI.ipynb @@ -0,0 +1,932 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from v2realbot.tools.loadbatch import load_batch\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n", + "\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "#LOAD FROM PARQUET\n", + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "#print('\\n'.join(map(str, files)))\n", + "file_name = \"ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\"\n", + "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"SPY\": ohlcv_df}), tz_convert=zoneNY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#parameters (primary y line, secondary y line, close)\n", + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig\n", + "\n", + "# close = basic_data.xloc[\"09:30\":\"10:00\"].close" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#PIPELINE - FOR - LOOP\n", + "\n", + "#indicator parameters\n", + "mom_timeperiod = list(range(2, 12))\n", + "\n", + "#uzavreni okna od 1 do 200\n", + "#entry_window_closes = list(range(2, 50, 3))\n", + "entry_window_closes = [5, 10, 30, 45]\n", + "#entry_window_closes = 30\n", + "#threshold entries parameters\n", + "#long\n", + "mom_th = np.round(np.arange(0.01, 0.5 + 0.02, 0.02),4).tolist()#-0.02\n", + "# short\n", + "#mom_th = np.round(np.arange(-0.01, -0.3 - 0.02, -0.02),4).tolist()#-0.02\n", + "roc_th = np.round(np.arange(-0.2, -0.8 - 0.05, -0.05),4).tolist()#-0.2\n", + "#print(mom_th, roc_th)\n", + "\n", + "#portfolio simulation parameters\n", + "sl_stop =np.round(np.arange(0.02/100, 0.7/100, 0.05/100),4).tolist()\n", + "tp_stop = np.round(np.arange(0.02/100, 0.7/100, 0.05/100),4).tolist()\n", + "\n", + "combs = list(product(mom_timeperiod, mom_th, roc_th, sl_stop, tp_stop))\n", + "\n", + "@vbt.parameterized(merge_func = \"concat\", random_subset = 2000, show_progress=True) \n", + "def test_strat(entry_window_closes=60,\n", + " mom_timeperiod=2,\n", + " mom_th=-0.04,\n", + " #roc_th=-0.2,\n", + " sl_stop=0.19/100,\n", + " tp_stop=0.19/100):\n", + " # mom_timeperiod=2\n", + " # mom_th=-0.06\n", + " # roc_th=-0.2\n", + " # sl_stop=0.04/100\n", + " # tp_stop=0.04/100\n", + "\n", + " momshort = vbt.indicator(\"talib:MOM\").run(basic_data.close, timeperiod=mom_timeperiod, short_name = \"slope_short\")\n", + " rocp = vbt.indicator(\"talib:ROC\").run(basic_data.close, short_name = \"rocp\")\n", + " #rate of change + momentum\n", + "\n", + " #momshort.plot rocp.real_crossed_below(roc_th) & \n", + " #short_signal = momshort.real_crossed_below(mom_th)\n", + " long_signal = momshort.real_crossed_above(mom_th)\n", + " # print(\"short signal\")\n", + " # print(short_signal.value_counts())\n", + "\n", + " #forced_exit = pd.Series(False, index=close.index)\n", + " forced_exit = basic_data.symbol_wrapper.fill(False)\n", + " #entry_window_open = pd.Series(False, index=close.index)\n", + " entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + " #print(entry_window_closes, \"entry window closes\")\n", + " # Calculate the time difference in minutes from market open for each timestamp\n", + " elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + " entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + " #print(entry_window_open.value_counts())\n", + "\n", + " forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + " #short_entries = (short_signal & entry_window_open)\n", + " #short_exits = forced_exit\n", + " entries = (long_signal & entry_window_open)\n", + " exits = forced_exit\n", + " #long_entries.info()\n", + " #number of trues and falses in long_entries\n", + " #print(short_exits.value_counts())\n", + " #print(short_entries.value_counts())\n", + "\n", + " #fig = plot_2y_close([],[momshort, rocp], close)\n", + " #short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " #print(sl_stop)\n", + " #tsl_th=sl_stop, \n", + " #short_entries=short_entries, short_exits=short_exits,\n", + " pf = vbt.Portfolio.from_signals(close=basic_data.close, entries=entries, exits=exits, tsl_stop=sl_stop, tp_stop = tp_stop, fees=0.0167/100, freq=\"1s\", price=\"close\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + " \n", + " return pf.stats([\n", + " 'total_return',\n", + " 'max_dd', \n", + " 'total_trades', \n", + " 'win_rate', \n", + " 'expectancy'\n", + " ])\n", + "\n", + "pf_results = test_strat(vbt.Param(entry_window_closes),\n", + " vbt.Param(mom_timeperiod),\n", + " vbt.Param(mom_th),\n", + " #vbt.Param(roc_th)\n", + " vbt.Param(sl_stop),\n", + " vbt.Param(tp_stop, condition=\"tp_stop > sl_stop\"))\n", + "pf_results = pf_results.unstack(level=-1)\n", + "pf_results.sort_values(by=[\"Total Return [%]\", \"Max Drawdown [%]\"], ascending=[False, True])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#pf_results.load(\"10tiscomb.pickle\")\n", + "#pf_results.info()\n", + "\n", + "vbt.save(pf_results, \"8tiscomb_tsl.pickle\")\n", + "\n", + "# pf_results = vbt.load(\"8tiscomb_tsl.pickle\")\n", + "# pf_results\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# parallel_coordinates method¶\n", + "\n", + "# attach_px_methods..plot_func(\n", + "# *args,\n", + "# layout=None,\n", + "# **kwargs\n", + "# )\n", + "\n", + "# pf_results.vbt.px.parallel_coordinates() #ocdf\n", + "\n", + "res = pf_results.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf_results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.decomposition import PCA\n", + "from sklearn.preprocessing import StandardScaler\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Assuming pf_results is your DataFrame\n", + "# Convert columns to numeric, assuming NaNs where conversion fails\n", + "metrics = ['Total Return [%]', 'Max Drawdown [%]', 'Total Trades']\n", + "for metric in metrics:\n", + " pf_results[metric] = pd.to_numeric(pf_results[metric], errors='coerce')\n", + "\n", + "# Handle missing values, for example filling with the median\n", + "pf_results['Max Drawdown [%]'].fillna(pf_results['Max Drawdown [%]'].median(), inplace=True)\n", + "\n", + "# Extract the metrics into a new DataFrame\n", + "data_for_pca = pf_results[metrics]\n", + "\n", + "# Standardize the data before applying PCA\n", + "scaler = StandardScaler()\n", + "data_scaled = scaler.fit_transform(data_for_pca)\n", + "\n", + "# Apply PCA\n", + "pca = PCA(n_components=2) # Adjust components as needed\n", + "principal_components = pca.fit_transform(data_scaled)\n", + "\n", + "# Create a DataFrame with the principal components\n", + "pca_results = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])\n", + "\n", + "# Visualize the results\n", + "plt.figure(figsize=(8,6))\n", + "plt.scatter(pca_results['PC1'], pca_results['PC2'], alpha=0.5)\n", + "plt.xlabel('Principal Component 1')\n", + "plt.ylabel('Principal Component 2')\n", + "plt.title('PCA of Strategy Optimization Results')\n", + "plt.grid(True)\n", + "plt.savefig(\"ddd.png\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check if there is any unnamed level and rename it\n", + "if None in df.index.names:\n", + " # Generate new names list replacing None with 'stat'\n", + " new_names = ['stat' if name is None else name for name in df.index.names]\n", + " df.index.set_names(new_names, inplace=True)\n", + "\n", + "rs= df\n", + "\n", + "rs.info()\n", + "\n", + "\n", + "# # Now, 'stat' is the name of the previously unnamed level\n", + "\n", + "# # Filter for 'Total Return' assuming it is a correct identifier in the 'stat' level\n", + "# total_return_series = df.xs('Total Return [%]', level='stat')\n", + "\n", + "# # Sort the Series to get the largest 'Total Return' values\n", + "# sorted_series = total_return_series.sort_values(ascending=False)\n", + "\n", + "# # Print the sorted filtered data\n", + "# sorted_series.head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sorted_series.vbt.save()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#df.info()\n", + "total_return_series = df.xs('Total Return [%]')\n", + "sorted_series = total_return_series.sort_values(ascending=False)\n", + "\n", + "# Display the top N entries, e.g., top 5\n", + "sorted_series.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "comb_stats_df.nlargest(10, 'Total Return [%]')\n", + "#stats_df.info()\n", + "\n", + "\n", + "8\t-0.06\t-0.2\t0.0028\t0.0048\t4.156254\n", + "4 -0.02 -0.25 0.0028 0.0048 0.84433\n", + "3 -0.02 -0.25 0.0033 0.0023 Total Return [%] 0.846753\n", + "#2\t-0.04\t-0.2\t0.0019\t0.0019\n", + "# 2\t-0.04\t-0.2\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.25\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.3\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.35\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.4\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.2\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.25\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.3\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.35\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.4\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basic_data.symbols" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + ">>> def apply_func(ts, entries, exits, fastw, sloww, minp=None):\n", + "... fast_ma = vbt.nb.rolling_mean_nb(ts, fastw, minp=minp)\n", + "... slow_ma = vbt.nb.rolling_mean_nb(ts, sloww, minp=minp)\n", + "... entries[:] = vbt.nb.crossed_above_nb(fast_ma, slow_ma) \n", + "... exits[:] = vbt.nb.crossed_above_nb(slow_ma, fast_ma)\n", + "... return (fast_ma, slow_ma) \n", + "\n", + ">>> CrossSig = vbt.IF(\n", + "... class_name=\"CrossSig\",\n", + "... input_names=['ts'],\n", + "... in_output_names=['entries', 'exits'],\n", + "... param_names=['fastw', 'sloww'],\n", + "... output_names=['fast_ma', 'slow_ma']\n", + "... ).with_apply_func(\n", + "... apply_func,\n", + "... in_output_settings=dict(\n", + "... entries=dict(dtype=np.bool_), #initialize output with bool\n", + "... exits=dict(dtype=np.bool_)\n", + "... )\n", + "... )\n", + ">>> cross_sig = CrossSig.run(ts2, 2, 4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#PIPELINE - parameters in one go\n", + "\n", + "\n", + "#TOTO prepsat do FOR-LOOPu\n", + "\n", + "\n", + "#indicator parameters\n", + "mom_timeperiod = list(range(2, 6))\n", + "\n", + "#threshold entries parameters\n", + "mom_th = np.round(np.arange(-0.02, -0.1 - 0.02, -0.02),4).tolist()#-0.02\n", + "roc_th = np.round(np.arange(-0.2, -0.4 - 0.05, -0.05),4).tolist()#-0.2\n", + "#print(mom_th, roc_th)\n", + "#jejich product\n", + "# mom_th_prod, roc_th_prod = zip(*product(mom_th, roc_th))\n", + "\n", + "# #convert threshold to vbt param\n", + "# mom_th_index = vbt.Param(mom_th_prod, name='mom_th_th') \n", + "# roc_th_index = vbt.Param(roc_th_prod, name='roc_th_th')\n", + "\n", + "mom_th = vbt.Param(mom_th, name='mom_th')\n", + "roc_th = vbt.Param(roc_th, name='roc_th')\n", + "\n", + "#portfolio simulation parameters\n", + "sl_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "sl_stop = [round(val, 4) for val in sl_stop]\n", + "tp_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "tp_stop = [round(val, 4) for val in tp_stop]\n", + "sl_stop = vbt.Param(sl_stop) #np.nan mean s no stoploss\n", + "tp_stop = vbt.Param(tp_stop) #np.nan mean s no stoploss\n", + "\n", + "\n", + "#def test_mom(window=14, mom_th=0.2, roc_th=0.2, sl_stop=0.03/100, tp_stop=0.03/100):\n", + "#close = basic_data.xloc[\"09:30\":\"10:00\"].close\n", + "momshort = vbt.indicator(\"talib:MOM\").run(basic_data.get(\"Close\"), timeperiod=mom_timeperiod, short_name = \"slope_short\")\n", + "\n", + "#ht_trendline = vbt.indicator(\"talib:HT_TRENDLINE\").run(close, short_name = \"httrendline\")\n", + "rocp = vbt.indicator(\"talib:ROC\").run(basic_data.get(\"Close\"), short_name = \"rocp\")\n", + "#rate of change + momentum\n", + "\n", + "rocp_signal = rocp.real_crossed_below(mom_th)\n", + "mom_signal = momshort.real_crossed_below(roc_th)\n", + "\n", + "#mom_signal\n", + "print(rocp_signal.info())\n", + "print(mom_signal.info())\n", + "#print(rocp.real)\n", + "\n", + "\n", + "short_signal = (mom_signal.vbt & rocp_signal)\n", + "\n", + "# #short_signal = (rocp.real_crossed_below(roc_th_index) & momshort.real_crossed_below(mom_th_index))\n", + "# forced_exit = m1_data.symbol_wrapper.fill(False)\n", + "# entry_window_open= m1_data.symbol_wrapper.fill(False)\n", + "\n", + "\n", + "# # Calculate the time difference in minutes from market open for each timestamp\n", + "# elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "# entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "# forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "# short_entries = (short_signal & entry_window_open)\n", + "# short_exits = forced_exit\n", + "# #long_entries.info()\n", + "# #number of trues and falses in long_entries\n", + "# #short_exits.value_counts()\n", + "# #short_entries.value_counts()\n", + "\n", + "\n", + "# pf = vbt.Portfolio.from_signals(close=close, short_entries=short_entries, short_exits=short_exits, sl_stop=sl_stop, tp_stop = tp_stop, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# filter dates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#filter na dny\n", + "dates_of_interest = pd.to_datetime(['2024-04-22']).tz_localize('US/Eastern')\n", + "filtered_df = df.loc[df.index.normalize().isin(dates_of_interest)]\n", + "\n", + "df = filtered_df\n", + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import plotly.io as pio\n", + "# pio.renderers.default = 'notebook'\n", + "\n", + "#naloadujeme do vbt symbol as column\n", + "basic_data = vbt.Data.from_data({\"BAC\": df}, tz_convert=zoneNY)\n", + "\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "#basic_data.data[\"BAC\"].vbt.ohlcv.plot()\n", + "\n", + "#basic_data.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "\n", + "m1_data.data[\"BAC\"]\n", + "#m5_data = m1_data.resample(\"5T\")\n", + "\n", + "#m5_data.data[\"BAC\"].head(10)\n", + "\n", + "# m15_data = m1_data.resample(\"15T\")\n", + "\n", + "# m15 = m15_data.data[\"BAC\"]\n", + "\n", + "# m15.vbt.ohlcv.plot()\n", + "\n", + "# m1_data.wrapper.index\n", + "\n", + "# m1_resampler = m1_data.wrapper.get_resampler(\"1T\")\n", + "# m1_resampler.index_difference(reverse=True)\n", + "\n", + "\n", + "# m5_resampler.prettify()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MOM indicator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.phelp(vbt.indicator(\"talib:ROCP\").run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "vyuzití rychleho klesani na sekundove urovni behem open rush\n", + "- MOM + ROC during open rush\n", + "- short signal\n", + "- pipeline kombinace thresholdu pro vstup mom_th, roc_th + hodnota sl_stop a tp_stop (pripadne trailing) - nalezeni optimalni kombinace atributu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# fig = plot_2y_close([ht_trendline],[momshort, rocp], close)\n", + "# short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + "\n", + "#parameters (primary y line, secondary y line, close)\n", + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig\n", + "\n", + "close = m1_data.xloc[\"09:30\":\"10:00\"].close\n", + "momshort = vbt.indicator(\"talib:MOM\").run(close, timeperiod=3, short_name = \"slope_short\")\n", + "ht_trendline = vbt.indicator(\"talib:HT_TRENDLINE\").run(close, short_name = \"httrendline\")\n", + "rocp = vbt.indicator(\"talib:ROC\").run(close, short_name = \"rocp\")\n", + "#rate of change + momentum\n", + "short_signal = (rocp.real_crossed_below(-0.2) & momshort.real_crossed_below(-0.02))\n", + "#indlong = vbt.indicator(\"talib:MOM\").run(close, timeperiod=10, short_name = \"slope_long\")\n", + "fig = plot_2y_close([ht_trendline],[momshort, rocp], close)\n", + "short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = m1_data.close\n", + "#vbt.phelp(vbt.OLS.run)\n", + "\n", + "#oer steepmnes of regression line\n", + "#talib.LINEARREG_SLOPE(close, timeperiod=timeperiod)\n", + "#a také ON BALANCE VOLUME - http://5.161.179.223:8000/static/js/vbt/api/indicators/custom/obv/index.html\n", + "\n", + "\n", + "\n", + "mom_ind = vbt.indicator(\"talib:MOM\") \n", + "#vbt.phelp(mom_ind.run)\n", + "\n", + "mom = mom_ind.run(close, timeperiod=10)\n", + "\n", + "plot_2y_close(mom, close)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# defining ENTRY WINDOW and forced EXIT window" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#m1_data.data[\"BAC\"].info()\n", + "import datetime\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 2\n", + "entry_window_closes = 30\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "forced_exit = m1_data.symbol_wrapper.fill(False)\n", + "entry_window_open= m1_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "\n", + "#entry_window_open.info()\n", + "# forced_exit.tail(100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = m1_data.close\n", + "\n", + "#rsi = vbt.RSI.run(close, window=14)\n", + "\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "#short_exits.value_counts()\n", + "short_entries.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_rsi(close, entries, exits):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"RSI\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " #rsi.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " entries.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + " exits.vbt.signals.plot_as_exits(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + " return fig\n", + "\n", + "plot_rsi(close, short_entries, short_exits)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.phelp(vbt.Portfolio.from_signals)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sl_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "sl_stop = [round(val, 4) for val in sl_stop]\n", + "print(sl_stop)\n", + "sl_stop = vbt.Param(sl_stop) #np.nan mean s no stoploss\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=close, short_entries=short_entries, short_exits=short_exits, sl_stop=0.03/100, tp_stop = 0.03/100, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "#pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#list of orders\n", + "#pf.orders.records_readable\n", + "#pf.orders.plots()\n", + "#pf.stats()\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0015,0.0013)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[0.03].plot_trade_signals()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# pristup k pf jako multi index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#pf[0.03].plot()\n", + "#pf.order_records\n", + "pf[(0.03)].stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#zgrupovane statistiky\n", + "stats_df = pf.stats([\n", + " 'total_return',\n", + " 'total_trades',\n", + " 'win_rate',\n", + " 'expectancy'\n", + "], agg_func=None)\n", + "stats_df\n", + "\n", + "\n", + "stats_df.nlargest(10, 'Total Return [%]')\n", + "#stats_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0011,0.0013000000000000002)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pandas.tseries.offsets import DateOffset\n", + "\n", + "temp_data = basic_data['2024-4-22']\n", + "temp_data\n", + "res1m = temp_data[[\"Open\", \"High\", \"Low\", \"Close\", \"Volume\"]]\n", + "\n", + "# Define a custom date offset that starts at 9:30 AM and spans 4 hours\n", + "custom_offset = DateOffset(hours=4, minutes=30)\n", + "\n", + "# res1m = res1m.get().resample(\"4H\").agg({ \n", + "# \"Open\": \"first\",\n", + "# \"High\": \"max\",\n", + "# \"Low\": \"min\",\n", + "# \"Close\": \"last\",\n", + "# \"Volume\": \"sum\"\n", + "# })\n", + "\n", + "res4h = res1m.resample(\"1h\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res4h.data\n", + "\n", + "res15m = res1m.resample(\"15T\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res15m.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@vbt.njit\n", + "def long_entry_place_func_nb(c, low, close, time_in_ns, rsi14, window_open, window_close):\n", + " market_open_minutes = 570 # 9 hours * 60 minutes + 30 minutes\n", + "\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + "\n", + " current_minutes = vbt.dt_nb.hour_nb(time_in_ns[i]) * 60 + vbt.dt_nb.minute_nb(time_in_ns[i])\n", + " #print(\"current_minutes\", current_minutes)\n", + " # Calculate elapsed minutes since market open at 9:30 AM\n", + " elapsed_from_open = current_minutes - market_open_minutes\n", + " elapsed_from_open = elapsed_from_open if elapsed_from_open >= 0 else 0\n", + " #print( \"elapsed_from_open\", elapsed_from_open)\n", + "\n", + " #elapsed_from_open = elapsed_minutes_from_open_nb(time_in_ns) \n", + " in_window = elapsed_from_open > window_open and elapsed_from_open < window_close\n", + " #print(\"in_window\", in_window)\n", + " # if in_window:\n", + " # print(\"in window\")\n", + "\n", + " if in_window and rsi14[i] > 60: # and low[i, c.col] <= hit_price: # and hour == 9: # (4)!\n", + " return out_i\n", + " return -1\n", + "\n", + "@vbt.njit\n", + "def long_exit_place_func_nb(c, high, close, time_index, tp, sl): # (5)!\n", + " entry_i = c.from_i - c.wait\n", + " entry_price = close[entry_i, c.col]\n", + " hit_price = entry_price * (1 + tp)\n", + " stop_price = entry_price * (1 - sl)\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " last_bar_of_day = vbt.dt_nb.day_changed_nb(time_index[i], time_index[i + 1])\n", + "\n", + " #print(next_day)\n", + " if last_bar_of_day: #pokud je dalsi next day, tak zavirame posledni\n", + " print(\"ted\",out_i)\n", + " return out_i\n", + " if close[i, c.col] >= hit_price or close[i, c.col] <= stop_price :\n", + " return out_i\n", + " return -1\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(np.random.random(size=(5, 10)), columns=list('abcdefghij'))\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.sum()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/strat_ORDER_IMBALANCE/v1_SINGLE.ipynb b/research/strat_ORDER_IMBALANCE/v1_SINGLE.ipynb new file mode 100644 index 0000000..b53293c --- /dev/null +++ b/research/strat_ORDER_IMBALANCE/v1_SINGLE.ipynb @@ -0,0 +1,964 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ORDER Imbalance\n", + "\n", + "* introduced buyvolume and sellvolume on bar level.\n", + "* calculated order imbalance ratio (buyvolume-sellvolume/totalvolume)\n", + "* calculated on multiple timeframes\n", + "* entry based on confluences imbalances\n", + "\n", + "## Note\n", + "\n", + "Order disbalance nepodminuje zmenu ceny (tzn. muze byt order disbalance na buy stranu, ale cena nemusi jit nahoru a naopak)\n", + "Nicmene pokud je disbalance delsi a nedochazi ke zmene ceny - může to něco indikovat. \n", + "Vytvořit si kumulativní disbalance - kumulátory, které se budou načítat, když se budou silné disbalance, bez změny ceny. Tento akumulátor se bude nabíjet disbalancí a vybíjet příslušnou změnou ceny." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from v2realbot.tools.loadbatch import load_batch\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "# from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n", + "from lightweight_charts import JupyterChart, chart as chartp, Panel\n", + "from IPython.display import display\n", + "\n", + "# init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "#LOAD FROM PARQUET\n", + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "print('\\n'.join(map(str, files)))\n", + "file_name = \"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet\"\n", + "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", + "#filter ohlcv_df to certain date range (assuming datetime index)\n", + "ohlcv_df = ohlcv_df.loc[\"2024-02-12 9:30\":\"2024-03-14 16:00\"]\n", + "\n", + "#add vwap column to ohlcv_df\n", + "#ohlcv_df[\"hlcc4\"] = (ohlcv_df[\"close\"] + ohlcv_df[\"high\"] + ohlcv_df[\"low\"] + ohlcv_df[\"close\"]) / 4\n", + "\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": ohlcv_df}), tz_convert=zoneNY)\n", + "ohlcv_df= None" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add resample function to custom columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro.utils.config import merge_dicts, Config, HybridConfig\n", + "from vectorbtpro import _typing as tp\n", + "from vectorbtpro.generic import nb as generic_nb\n", + "\n", + "_feature_config: tp.ClassVar[Config] = HybridConfig(\n", + " {\n", + " \"buyvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " ),\n", + " \"sellvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " )\n", + " }\n", + ")\n", + "\n", + "basic_data._feature_config = _feature_config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#asic_data.stats()\n", + "basic_data.wrapper.index.normalize().nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#basic_data.ohlcv.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "second_data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']]\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "#t1data = t1data.xloc[\"2024-02-12 9:30\":\"2024-02-14 16:00\"]\n", + "basic_data = t1data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "buyvolume = t1data.data[\"BAC\"].buyvolume\n", + "sellvolume = t1data.data[\"BAC\"].sellvolume\n", + "totalvolume = buyvolume + sellvolume\n", + "\n", + "#adjust to minimal value to avoid division by zero\n", + "sellvolume_adjusted = sellvolume.replace(0, 1e-10)\n", + "oibratio = buyvolume / sellvolume\n", + "\n", + "#cumulative order flow (net difference)\n", + "cof = buyvolume - sellvolume\n", + "\n", + "# Calculate the order imbalance (net differene) normalize the order imbalance by calculating the difference between buy and sell volumes and then scaling it by the total volume.\n", + "order_imbalance = cof / totalvolume\n", + "order_imbalance = order_imbalance.fillna(0) #nan nahradime 0\n", + "\n", + "order_imbalance_allvolume = cof / t1data.data[\"BAC\"].volume\n", + "\n", + "order_imbalance_sma = vbt.indicator(\"talib:EMA\").run(order_imbalance, timeperiod=5)\n", + "short_signals = order_imbalance.vbt < -0.5\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_signals.value_counts()\n", + "short_signals.name = \"short_entries\"\n", + "#.fillna(False)\n", + "short_exits = short_signals.shift(-2).fillna(False).astype(bool)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##z tohoto si udelat plot funkci (i pro entries,exits)\n", + "#t1data = t1data[[\"open\", \"high\", \"low\", \"close\", \"volume\"]]\n", + "chart = JupyterChart(width=1000, height=600, inner_width=1, inner_height=0.5, leftScale=True)\n", + "#set resolution\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "chart.set(t1data.data[\"BAC\"])\n", + "line_vwap = chart.create_line(name=\"vwap\")#, color=\"blue\")\n", + "line_vwap.set(t1data.vwap)\n", + "\n", + "\n", + "chart.topbar.textbox(\"title\",\"Nadpis\")\n", + "chart2 = chart.create_subchart(position='right', width=1, height=0.5, sync=True, leftScale=True)\n", + "t2data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"5T\")\n", + "t2data = t2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "\n", + "#5min close realigned to 1T\n", + "close_realigned = t2data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line1 = chart.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line1.set(close_realigned)\n", + "\n", + "#sma z realigned dat\n", + "sma_tp = 20\n", + "sma_t2 = vbt.indicator(\"talib:EMA\").run(close_realigned, timeperiod=sma_tp)\n", + "smaline = chart.create_line(name=f\"sma{sma_tp}\")#, color=\"blue\")\n", + "smaline.set(sma_t2)\n", + "\n", + "\n", + "#sma z puvodnich resamplovanych dat plus navic realign, melo by byt stejne \n", + "sma_real = vbt.indicator(\"talib:EMA\").run(t2data.close, timeperiod=sma_tp)\n", + "sma_real_value = sma_real.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "sma_real_value = sma_real_value[sma_real_value.index.dayofweek < 5]\n", + "smaline_real = chart.create_line(name=f\"smareal{sma_tp}\", color=\"yellow\")\n", + "smaline_real.set(sma_real_value)\n", + "\n", + "#resample 15T\n", + "t15data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"15T\")\n", + "t15data = t15data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "#5min close realigned to 1T\n", + "close_15realigned = t15data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_15realigned = close_15realigned[close_15realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line2 = chart.create_line(name=\"15minclose\")#, color=\"pink\")\n", + "line2.set(close_15realigned)\n", + "\n", + "\n", + "chart.legend(True)\n", + "hst = chart2.create_histogram(name=\"buyvolume\", color=\"rgba(53, 94, 59, 0.6)\") #green transparent\n", + "hst1 = chart2.create_histogram(name=\"sellvolume\", color=\"rgba(165, 42, 42, 0.6)\") #red transparent\n", + "hst.set(t1data.data[\"BAC\"])\n", + "hst1.set(t1data.data[\"BAC\"])\n", + "line2 = chart2.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line2.set(close_realigned)\n", + "\n", + "lineoib = chart2.create_line(name=\"oib\", priceScaleId=\"left\") #color=\"violet\", \n", + "#lineoib.scale(0.7,0)\n", + "lineoib.set(order_imbalance_allvolume)\n", + "\n", + "lineoib_sma = chart2.create_line(name=\"oibsma5\", priceScaleId=\"left\") #, color=\"blue\", \n", + "lineoib_sma.set(order_imbalance_sma)\n", + "\n", + "chart.fit()\n", + "chart2.legend(True)\n", + "#\n", + "line2.markers_set(short_signals, \"entries\")\n", + "# TODO jelikoz se davaji do jednoho pole je treba zajistit spravne sortovani\n", + "# domyslet jak to pojmout iterativni doplnovani markeru\n", + "line2.markers_set(short_exits, \"exits\")\n", + "\n", + "\n", + "chart2.fit()\n", + "chart.load()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "short_signals.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#vbt.IF.list_indicators(\"*ma\")\n", + "vbt.phelp(vbt.indicator(\"talib:EMA\").run)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sma = vbt.indicator(\"talib:EMA\").run(t1data.close, timeperiod=20)\n", + "sma.real.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rr = vbt.RSI.run(t1data.close)\n", + "type(rr)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "buyvolume = t1data.data[\"BAC\"].buyvolume\n", + "sellvolume = t1data.data[\"BAC\"].sellvolume\n", + "totalvolume = buyvolume + sellvolume\n", + "\n", + "#adjust to minimal value to avoid division by zero\n", + "sellvolume_adjusted = sellvolume.replace(0, 1e-10)\n", + "oibratio = buyvolume / sellvolume\n", + "\n", + "#cumulative order flow (net difference)\n", + "cof = buyvolume - sellvolume\n", + "\n", + "# Calculate the order imbalance (net differene) normalize the order imbalance by calculating the difference between buy and sell volumes and then scaling it by the total volume.\n", + "order_imbalance = cof / totalvolume\n", + "order_imbalance = order_imbalance.fillna(0) #nan nahradime 0\n", + "\n", + "order_imbalance_allvolume = cof / t1data.data[\"BAC\"].volume" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance_sma = vbt.indicator(\"talib:EMA\").run(order_imbalance, timeperiod=5)\n", + "short_signals = order_imbalance.vbt < -0.5\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_signals.value_counts()\n", + "short_signals.name = \"short_entries\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "short_signals.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "short_signals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance.fillna(0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance.vbt.plot()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chartN = JupyterChart(width=500, height=300, inner_width=1, inner_height=0.3, leftScale=True)\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "chartN.set(t1data.data[\"BAC\"])\n", + "line_sma = chartN.create_line(name=\"sma\", priceScaleId=\"right\")#, color=\"blue\")\n", + "line_sma.set(sma)\n", + "# line_sma.markers_set(short_signals, \"entries\")\n", + "# line_sma.markers_set(short_exits, \"exits\")\n", + "# hst = chartN.create_histogram(name=\"oivol\")\n", + "# hst.set(order_imbalance_allvolume)\n", + "# chartN.legend(True)\n", + "# chartN.fit()\n", + "\n", + "# subchart = chartN.create_subchart(position='right', width=1, height=0.5, sync=False, leftScale=True)\n", + "# # subchart.set(t1data.data[\"BAC\"])\n", + "# line_sma1 = subchart.create_line(name=\"smao\", priceScaleId=\"left\")#, color=\"blue\")\n", + "# line_sma1.set(sma)\n", + "# # line_sma1.markers_set(short_signals, \"entries\")\n", + "# # line_sma1.markers_set(short_exits, \"exits\")\n", + "# hsto = subchart.create_histogram(name=\"oivolo\")\n", + "# hsto.set(order_imbalance_sma)\n", + "\n", + "chart2 = chartN.create_subchart(position='left', width=1, height=0.5, sync=True, leftScale=True, toolbox=True)\n", + "# hst = chart2.create_histogram(name=\"buyvolume\", color=\"rgba(53, 94, 59, 0.6)\") #green transparent\n", + "# hst1 = chart2.create_histogram(name=\"sellvolume\", color=\"rgba(165, 42, 42, 0.6)\") #red transparent\n", + "# hst.set(t1data.data[\"BAC\"])\n", + "# hst1.set(t1data.data[\"BAC\"])\n", + "line2 = chart2.create_line(name=\"sma\")#, color=\"green\")\n", + "line2.set(sma)\n", + "chart2.topbar.textbox(\"title\",\"Nadpis\")\n", + "# chartN.topbar.textbox(\"title\",\"NadpisT\")\n", + "\n", + "# subchart.legend(True)\n", + "# subchart.fit()\n", + "chartN.load()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##z tohoto si udelat plot funkci (i pro entries,exits)\n", + "#t1data = t1data[[\"open\", \"high\", \"low\", \"close\", \"volume\"]]\n", + "chart = JupyterChart(width=1000, height=600, inner_width=1, inner_height=0.5, leftScale=True)\n", + "#set resolution\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "chart.set(t1data.data[\"BAC\"])\n", + "line_vwap = chart.create_line(name=\"vwap\")#, color=\"blue\")\n", + "line_vwap.set(t1data.vwap)\n", + "\n", + "\n", + "chart.topbar.textbox(\"title\",\"Nadpis\")\n", + "chart2 = chart.create_subchart(position='right', width=1, height=0.5, sync=True, leftScale=True)\n", + "t2data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"5T\")\n", + "t2data = t2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "\n", + "#5min close realigned to 1T\n", + "close_realigned = t2data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line1 = chart.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line1.set(close_realigned)\n", + "\n", + "#sma z realigned dat\n", + "sma_tp = 20\n", + "sma_t2 = vbt.indicator(\"talib:EMA\").run(close_realigned, timeperiod=sma_tp)\n", + "smaline = chart.create_line(name=f\"sma{sma_tp}\")#, color=\"blue\")\n", + "smaline.set(sma_t2)\n", + "\n", + "\n", + "#sma z puvodnich resamplovanych dat plus navic realign, melo by byt stejne \n", + "sma_real = vbt.indicator(\"talib:EMA\").run(t2data.close, timeperiod=sma_tp)\n", + "sma_real_value = sma_real.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "sma_real_value = sma_real_value[sma_real_value.index.dayofweek < 5]\n", + "smaline_real = chart.create_line(name=f\"smareal{sma_tp}\", color=\"yellow\")\n", + "smaline_real.set(sma_real_value)\n", + "\n", + "#resample 15T\n", + "t15data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"15T\")\n", + "t15data = t15data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "#5min close realigned to 1T\n", + "close_15realigned = t15data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_15realigned = close_15realigned[close_15realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line2 = chart.create_line(name=\"15minclose\")#, color=\"pink\")\n", + "line2.set(close_15realigned)\n", + "\n", + "\n", + "chart.legend(True)\n", + "hst = chart2.create_histogram(name=\"buyvolume\", color=\"rgba(53, 94, 59, 0.6)\") #green transparent\n", + "hst1 = chart2.create_histogram(name=\"sellvolume\", color=\"rgba(165, 42, 42, 0.6)\") #red transparent\n", + "hst.set(t1data.data[\"BAC\"])\n", + "hst1.set(t1data.data[\"BAC\"])\n", + "line2 = chart2.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line2.set(close_realigned)\n", + "\n", + "lineoib = chart2.create_line(name=\"oib\", priceScaleId=\"left\") #color=\"violet\", \n", + "#lineoib.scale(0.7,0)\n", + "lineoib.set(order_imbalance_allvolume)\n", + "\n", + "lineoib_sma = chart2.create_line(name=\"oibsma5\", priceScaleId=\"left\") #, color=\"blue\", \n", + "lineoib_sma.set(order_imbalance_sma)\n", + "\n", + "chart.fit()\n", + "chart2.legend(True)\n", + "#\n", + "line2.markers_set(short_signals, \"entries\")\n", + "# TODO jelikoz se davaji do jednoho pole je treba zajistit spravne sortovani\n", + "# domyslet jak to pojmout iterativni doplnovani markeru\n", + "line2.markers_set(short_exits, \"exits\")\n", + "\n", + "\n", + "chart2.fit()\n", + "chart.load()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sma.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#priminds list (same Y as price), secinds list (secondary Y napr. rsi), close, voluminds (volume based) list\n", + "def plot_2y_close(priminds, secinds, close, volumeinds, ohlcv=None):\n", + " fig = vbt.make_subplots(rows=2, cols=1, shared_xaxes=True, \n", + " specs=[[{\"secondary_y\": True}], [{\"secondary_y\": False}]], \n", + " vertical_spacing=0.02, subplot_titles=(\"Price and Indicators\", \"Volume\"))\n", + "\n", + " if ohlcv is not None:\n", + " ohlcv.vbt.ohlcv.plot(fig=fig, add_trace_kwargs=dict(row=1, col=1))\n", + "\n", + " # Plotting the close price\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False,row=1, col=1), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " \n", + " # Plotting primary indicators on the first row\n", + " for ind in priminds:\n", + " if isinstance(ind, pd.Series):\n", + " #if series has no name, make the name same as the variable name\n", + " \n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + " \n", + " # Plotting secondary indicators on the first row\n", + " for ind in secinds:\n", + " #ind = ind.rename(str(ind.name))\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1), trace_kwargs=dict(line=dict(color=\"rgba(255, 0, 0, 0.4)\")))\n", + " \n", + " for indvolume in volumeinds:\n", + " # Plotting the volume on the second row\n", + " indvolume.rename(str(indvolume.name)).vbt.barplot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=2, col=1))\n", + " #vbt.Bar(indvolume, fig=fig, add_trace_kwargs=dict(secondary_y=False, row=2, col=1))\n", + " \n", + " return fig\n", + "\n", + "fig = plot_2y_close([sma], [order_imbalance.rename(\"order_imbalance_norm\"),order_imbalance_sma.real.rename(\"oib_sma\")], t1data.close, [t1data.data[\"BAC\"].buyvolume, t1data.data[\"BAC\"].sellvolume, t1data.volume], t1data.data[\"BAC\"])\n", + "fig.update_yaxes(range=[33,34], secondary_y=False, row=1, col=1) #update y axis range\n", + "fig.update_yaxes(range=[-1,1], secondary_y=True, row=1, col=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "t0data = basic_data\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"1T\")\n", + "t2data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"15T\")\n", + "t3data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"30T\")\n", + "t4data = basic_data[['open', 'high', 'low', 'close', 'volume', 'vwap']].resample(\"D\").dropna()\n", + "\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t2data = t2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t3data = t3data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "#30min data to daily\n", + "# t4data = t3data.resample(\"D\").dropna()\n", + "\n", + "#t4data = t4data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "#m1data.data[\"SPY\"].info()\n", + "\n", + "#m1data.data[\"SPY\"].vbt.ohlcv.plot()\n", + "#h2data.data[\"SPY\"].vbt.ohlcv.plot()\n", + "#ddata.data[\"SPY\"]\n", + "t2data.data[\"BAC\"].vbt.ohlcv.plot().show()\n", + "\n", + "\n", + "#t4data.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t2data.close\n", + "\n", + "#in df remove rows with nan\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#realign na 1T = t1data + oriznout main session\n", + "t2data_vwap = t2data.vwap.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "t3data_vwap = t3data.vwap.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "t4data_vwap = t4data.vwap.vbt.realign_closing(\"1T\").dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t2data_vwap" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t4data.clos.vbt \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "obvind = vbt.indicator.obv.run(t1data.close, t1data.volume)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t1_lengtgh = 15\n", + "t2_length = 15\n", + "t3_length = 15\n", + "t4_length = 5\n", + "t1_th = 0.1\n", + "t2_th = 0.1\n", + "t3_th = 0.1\n", + "t4_th = 0.1\n", + "\n", + "\n", + "\n", + "#minute\n", + "t1slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t1data.close, timeperiod=t1_lengtgh) # -0.09, 0.09\n", + "t2slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t2data.vwap, timeperiod=t2_length) # -0.08 , 0.079\n", + "t3slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t3data.vwap, timeperiod=t3_length) # -0.08, 0.08\n", + "#daily\n", + "t4slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t4data.vwap, timeperiod=t4_length) # -0.1, 0.09\n", + "\n", + "plot_2y_close(priminds=[], secinds=[t1slope, t2slope, t3slope, t4slope], close=t1data.close).show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#thirtymin_slope = thirtymin_slope.real.rename(\"30min\") #timto se prejmenuje real na 30min\n", + "t3slope = t3slope.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "##filter daily_slope_to_compare to only monday to friday\n", + "t3slope = t3slope[t3slope.index.dayofweek < 5]\n", + "\n", + "#t3slope.info()\n", + "\n", + "t2slope = t2slope.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "##filter daily_slope_to_compare to only monday to friday\n", + "t2slope = t2slope[t2slope.index.dayofweek < 5]\n", + "\n", + "t2slope.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "oibratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#\n", + "short_signals = order_imbalance.vbt < -0.3\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_signals.value_counts()\n", + "\n", + "long_signals = order_imbalance.vbt > 0.3\n", + "#entries = oibratio.vbt > 10\n", + "long_signals.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = vbt.make_subplots(rows=3, cols=1, shared_xaxes=True, \n", + " specs=[[{\"secondary_y\": True}], [{\"secondary_y\": True}], [{\"secondary_y\": False}]], \n", + " vertical_spacing=0.02, subplot_titles=(\"Price and Indicators\", \"Volume\"))\n", + "t1data.data[\"BAC\"].vbt.ohlcv.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + "#oibratio.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1))\n", + "order_imbalance.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1))\n", + "long_signals.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"LONGS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"limegreen\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ), add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + "\n", + "short_signals.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"SHORTS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"red\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ), add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# thirtymin_slope_to_compare.vbt.xloc[\"04-16-2024\"].get()\n", + "thirty_down_signal.vbt.xloc[\"04-16-2024\"].get()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#short_signal = t1slope.real_below(t1_th) & t2slope.real_below(t2_th) & t3slope.real_below(t3_th) & t4slope.real_below(t4_th)\n", + "#long_signal = t1slope.real_above(t1_th) & t2slope.real_above(t2_th) & t3slope.real_above(t3_th) & t4slope.real_above(t4_th)\n", + "\n", + "#test na daily s reversem crossed 0\n", + "short_signal = t2slope.vbt < -0.01 & t3slope.vbt < -0.01 #min value of threshold\n", + "long_signal = t2slope.vbt > 0.01 & t3slope.vbt > 0.01 #min\n", + "\n", + "# thirty_up_signal = t3slope.vbt.crossed_above(0.01)\n", + "# thirty_down_signal = t3slope.vbt.crossed_below(-0.01)\n", + "\n", + "fig = plot_2y_close(priminds=[], secinds=[t3slope], close=t1data.close)\n", + "#short_signal.vbt.signals.plot_as_entries(basic_data.close, fig=fig)\n", + "\n", + "short_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"SHORTS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"red\", symbol=\"triangle-down\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "long_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"LONGS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"limegreen\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "\n", + "# thirty_down_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"DOWN30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"yellow\", symbol=\"triangle-down\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "# thirty_up_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"UP30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"grey\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "# thirtymin_slope_to_compare.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True), trace_kwargs=dict(name=\"30min slope\",\n", + "# line=dict(color=\"yellow\"), \n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "fig.show()\n", + "# print(\"short signal\")\n", + "# print(short_signal.value_counts())\n", + "\n", + "#forced_exit = pd.Series(False, index=close.index)\n", + "forced_exit = basic_data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signal & entry_window_open)\n", + "exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "# print(short_exits.value_counts())\n", + "# print(short_entries.value_counts())\n", + "\n", + "#fig = plot_2y_close([],[momshort, rocp], close)\n", + "#short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + "#print(sl_stop)\n", + "#short_entries=short_entries, short_exits=short_exits,\n", + "# pf = vbt.Portfolio.from_signals(close=basic_data, entries=short_entries, exits=exits, tsl_stop=0.005, tp_stop = 0.05, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "# pf.stats()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "forced_exit = t1data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= t1data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signals & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signals & entry_window_open)\n", + "exits = forced_exit\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=t1data, entries=entries, exits=exits, short_entries=short_entries, short_exits=exits,\n", + "td_stop=2, time_delta_format=\"rows\",\n", + "tsl_stop=0.005, tp_stop = 0.005, fees=0.0167/100)#, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.get_drawdowns().records_readable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.orders.records_readable" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/strat_ORDER_IMBALANCE/v2_SINGLE.ipynb b/research/strat_ORDER_IMBALANCE/v2_SINGLE.ipynb new file mode 100644 index 0000000..8b2386a --- /dev/null +++ b/research/strat_ORDER_IMBALANCE/v2_SINGLE.ipynb @@ -0,0 +1,1411 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ORDER Imbalance\n", + "\n", + "* introduced buyvolume and sellvolume on bar level.\n", + "* calculated order imbalance ratio (buyvolume-sellvolume/totalvolume)\n", + "* calculated on multiple timeframes\n", + "* entry based on confluences imbalances\n", + "\n", + "## Note\n", + "\n", + "Order disbalance nepodminuje zmenu ceny (tzn. muze byt order disbalance na buy stranu, ale cena nemusi jit nahoru a naopak)\n", + "Nicmene pokud je disbalance delsi a nedochazi ke zmene ceny - může to něco indikovat. \n", + "Vytvořit si kumulativní disbalance - kumulátory, které se budou načítat, když se budou silné disbalance, bez změny ceny. Tento akumulátor se bude nabíjet disbalancí a vybíjet příslušnou změnou ceny." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import load_dotenv\n", + "\n", + "#as V2realbot is client , load env variables here\n", + "env_file = \"/Users/davidbrazda/Documents/Development/python/.env\"\n", + "# Load the .env file\n", + "load_dotenv(env_file)\n", + "\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "# from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "from v2realbot.config import DATA_DIR\n", + "from lightweight_charts import JupyterChart, chart, Panel\n", + "from IPython.display import display\n", + "\n", + "# init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "#LOAD FROM PARQUET\n", + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "print('\\n'.join(map(str, files)))\n", + "file_name = \"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet\"\n", + "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", + "#filter ohlcv_df to certain date range (assuming datetime index)\n", + "#ohlcv_df = ohlcv_df.loc[\"2024-02-12 9:30\":\"2024-02-14 16:00\"]\n", + "\n", + "#add vwap column to ohlcv_df\n", + "#ohlcv_df[\"hlcc4\"] = (ohlcv_df[\"close\"] + ohlcv_df[\"high\"] + ohlcv_df[\"low\"] + ohlcv_df[\"close\"]) / 4\n", + "\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": ohlcv_df}), tz_convert=zoneNY)\n", + "ohlcv_df= None\n", + "basic_data.wrapper.index.normalize().nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basic_data.data[\"BAC\"].info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add resample function to custom columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro.utils.config import merge_dicts, Config, HybridConfig\n", + "from vectorbtpro import _typing as tp\n", + "from vectorbtpro.generic import nb as generic_nb\n", + "\n", + "_feature_config: tp.ClassVar[Config] = HybridConfig(\n", + " {\n", + " \"buyvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " ),\n", + " \"sellvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " ),\n", + " \"trades\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " )\n", + " }\n", + ")\n", + "\n", + "basic_data._feature_config = _feature_config" + ] + }, + { + "cell_type": "code", +<<<<<<< HEAD + "execution_count": null, +======= + "execution_count": 5, +>>>>>>> parent of 7cc9f86... vbt pipeline edits + "metadata": {}, + "outputs": [], + "source": [ + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','trades','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t1data.data[\"BAC\"].info()\n", + "\n", + "m30data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','trades','sellvolume']].resample(\"30T\")\n", + "m30data = m30data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "m30data.data[\"BAC\"].info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], +<<<<<<< HEAD + "source": [ + "m30data.close.lw.plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Stationarity test (ADF)\n", + "if the p-value > 0.05, wee need to find the order of differencing. Use returns (current price - previous price)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "from statsmodels.tsa.stattools import adfuller\n", + "\n", + "# Start the timer\n", + "start_time = time.time()\n", + "\n", + "# Perform the Augmented Dickey-Fuller test to check for stationarity\n", + "result = adfuller(m30data.xloc[:500].get(\"Close\"))\n", + "\n", + "# Stop the timer\n", + "end_time = time.time()\n", + "\n", + "# Print the results of the ADF test\n", + "print(result)\n", + "print('ADF Statistic: %f' % result[0])\n", + "print('p-value: %f' % result[1])\n", + "\n", + "# Print the time taken\n", + "print('Time taken: %f seconds' % (end_time - start_time))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot of close price vs log returns of close price vs just returns (delta)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = m30data.get(\"Close\")\n", + "log_returns = np.log(close) - np.log(close.shift(1))\n", + "returns = close - close.shift(1) #or close.diff()\n", + "/\n", + "\n", + "pane1 = Panel(\n", + " ohlcv=(), #(series, entries, exits, other_markers)\n", + " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n", + " #[(series, name, entries, exits, other_markers)]\n", + " right=[(close, \"close\"),\n", + " ],\n", + " left = [ \n", + " (log_returns, \"log_returns\"),\n", + " (returns, \"returns\"),\n", + " # (ret_log_diff, \"ret_log_diff\"),\n", + " ],\n", + ")\n", + "ch = chart([pane1], size=\"s\", precision=6)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from statsmodels.graphics.tsaplots import plot_acf, plot_pacf\n", + "plot_acf(returns)\n", + "#no autocorrelation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from statsmodels.tsa.arima.model import ARIMA\n", + "# Fit an ARIMA model\n", + "model = ARIMA(returns, order=(0, 1, 0)) # ARIMA(0, 1, 0) is a simple random walk model\n", + "result = model.fit()\n", + "\n", + "# Print summary of the model\n", + "#print(result.summary())\n", + "\n", + "\n", + "result.resid.lw.plot(session=None)\n", + "\n", + "plot_acf(result.resid, title='ACF of Residuals')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from statsmodels.graphics.tsaplots import month_plot\n", + "# import matplotlib.pyplot as plt\n", + "# plot_pacf(close, title='PACF of Close', lags=30)\n", + "# plot_acf(close, title='ACF of Close', lags=30)\n", + "month_plot(close)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close.lw.plot()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "returns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close.vbt.xloc[\"2023-01-03\":\"2024-01-03\"].obj\n", + "# [\"01-03-2023\":\"01-O3-2024\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "insample_close = close.vbt.xloc[\"2023-01-03\":\"2024-01-03\"].get()\n", + "insample_returns = returns.vbt.xloc[\"2023-01-03\":\"2024-01-03\"].get()\n", + "outsample_close = close.vbt.xloc[\"2024-01-03\":\"2025-01-03\"].obj\n", + "outsample_returns = returns.vbt.xloc[\"2024-01-03\":\"2025-01-03\"].obj\n", + "\n", + "insample_close.info()\n", + "outsample_close.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from statsmodels.tsa.ar_model import AutoReg\n", + "\n", + "\n", + "# Fit the AutoReg model\n", + "model = AutoReg(insample_close, lags=5, trend=\"ct\").fit()\n", + "\n", + "# Generate vectorized predictions\n", + "predictions = model.predict(start=1, end=len(insample_close))\n", + "predictions.index = insample_close.index\n", + "\n", + "# # Generate predictions for the differenced data\n", + "# differenced_predictions = result.predict(start=1, end=len(insample_returns))\n", + "\n", + "# differenced_predictions.index = insample_returns.index\n", + "# # Back-transform the differenced predictions to the original scale\n", + "# predictions = insample_close.shift(1) + differenced_predictions\n", + "\n", + "pane1 = Panel(\n", + " ohlcv=(), #(series, entries, exits, other_markers)\n", + " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n", + " #[(series, name, entries, exits, other_markers)]\n", + " right=[(insample_close, \"insample close\"),\n", + " (predictions, \"prediction next close lag1\")\n", + " ],\n", + " # left = [ \n", + " # (differenced_predictions, \"returns_predicted\"),\n", + " # (insample_returns, \"insample returns\"),],\n", + ")\n", + "ch = chart([pane1], size=\"s\",precision=6, title=\"AutoReg prediction\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Fit the ARIMA model on the differenced data\n", + "model = ARIMA(insample_returns, order=(6, 1, 6)) # Note the differenced data, so d=0 here\n", + "result = model.fit()\n", + "\n", + "# Generate predictions for the differenced data\n", + "differenced_predictions = result.predict(start=1, end=len(insample_returns))\n", + "\n", + "differenced_predictions.index = insample_returns.index\n", + "# Back-transform the differenced predictions to the original scale\n", + "predictions = insample_close.shift(1) + differenced_predictions\n", + "\n", + "pane1 = Panel(\n", + " ohlcv=(), #(series, entries, exits, other_markers)\n", + " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n", + " #[(series, name, entries, exits, other_markers)]\n", + " right=[(insample_close, \"insample close\"),\n", + " (predictions, \"prediction close\")\n", + " ],\n", + " left = [ \n", + " (differenced_predictions, \"returns_predicted\"),\n", + " (insample_returns, \"insample returns\"),],\n", + ")\n", + "ch = chart([pane1], size=\"s\",precision=6, title=\"ARIMA prediction\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize an array to store the predictions\n", + "predictions = [np.nan] # No prediction for the first point\n", + "\n", + "# Rolling one-step-ahead forecasts\n", + "for t in range(10, len(close)): # Start from 2 to ensure enough data points\n", + " model = ARIMA(close[:t], order=(1, 1, 1)) # Fit ARIMA model up to time t-1\n", + " result = model.fit()\n", + " forecast = result.forecast(steps=1)\n", + " predictions.append(forecast.iloc[0]) # Store the forecast\n", + "\n", + "# Pad the predictions to align with the original series\n", + "predictions = [np.nan] * (len(close) - len(predictions)) + predictions\n", + "\n", + "# Convert predictions to a Pandas Series\n", + "predictions = pd.Series(predictions, index=close.index)\n", + "\n", + "pane1 = Panel(\n", + " ohlcv=(), #(series, entries, exits, other_markers)\n", + " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n", + " #[(series, name, entries, exits, other_markers)]\n", + " right=[(close, \"real close\"),\n", + " (predictions, \"real close\")\n", + " ],\n", + " left = [ \n", + " (log_returns, \"log_returns\"),\n", + " (returns, \"returns\"),],\n", + ")\n", + "ch = chart([pane1], size=\"s\", session=None, precision=6, title=\"One step ahead ARIMA prediction\")\n", + "\n", + "\n" + ] +======= + "source": [] +>>>>>>> parent of 7cc9f86... vbt pipeline edits + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bbands = vbt.talib(\"BBANDS\").run(\n", + " t1data.get(\"Close\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "supertrend = vbt.SUPERTREND.run(t1data.high, t1data.low, t1data.close, period=14, multiplier=3)\n", + "direction_series = supertrend.direction\n", + "uptrend= pd.Series(False, index=direction_series.index)\n", + "downtrend= pd.Series(False, index=direction_series.index)\n", + "\n", + "# -1 na 1\n", + "uptrend[1:] = (direction_series[1:] == 1) & (direction_series.shift(1)[1:] == -1)\n", + "# 1 na -1\n", + "downtrend[1:] = (direction_series[1:] == -1) & (direction_series.shift(1)[1:] == 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "supertrendm30 = vbt.SUPERTREND.run(m30data.high, m30data.low, m30data.close, period=14, multiplier=3)\n", + "direction_series = supertrendm30.direction\n", + "uptrend_m30= pd.Series(False, index=direction_series.index)\n", + "downtrend_m30= pd.Series(False, index=direction_series.index)\n", + "# -1 na 1\n", + "uptrend_m30[1:] = (direction_series[1:] == 1) & (direction_series.shift(1)[1:] == -1)\n", + "# 1 na -1\n", + "downtrend_m30[1:] = (direction_series[1:] == -1) & (direction_series.shift(1)[1:] == 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "macd = vbt.talib(\"MACD\").run(m30data.close)\n", + "macd.macd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pane1 = Panel(\n", + " ohlcv=(t1data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6), opacity\")]\n", + " right=[#(bbands,), #[(series, name, entries, exits, other_markers)]\n", + " (t1data.data[\"BAC\"].vwap, \"vwap\", uptrend, downtrend),\n", + " (supertrend.trend,\"STtrend\"),\n", + " (supertrend.long,\"STlong\"),\n", + " (supertrend.short,\"STshort\")\n", + " ],\n", + " left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "pane2 = Panel(\n", + " ohlcv=(m30data.data[\"BAC\"],uptrend_m30, downtrend_m30), #(series, entries, exits, other_markers)\n", + " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6), opacity\")]\n", + " right=[#(bbands,), #[(series, name, entries, exits, other_markers)]\n", + " (supertrendm30.trend,\"STtrend30\"),\n", + " (supertrendm30.long,\"STlong30\"),\n", + " (supertrendm30.short,\"STshort30\")\n", + " ],\n", + " left = [(supertrendm30.direction,\"STdirection30\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + " title = \"30m\")\n", + "\n", + "pane3 = Panel(\n", + " ohlcv=(m30data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " histogram=[(macd.macdhist,\"macdhist30\",None,0.5)], # [(series, name, \"rgba(53, 94, 59, 0.6), opacity\")]\n", + " # right=[#(bbands,), #[(series, name, entries, exits, other_markers)]\n", + " # (supertrendm30.trend,\"STtrend30\"),\n", + " # (supertrendm30.long,\"STlong30\"),\n", + " # (supertrendm30.short,\"STshort30\")\n", + " # ],\n", + " left = [(macd.macd,\"macd30\"),\n", + " (macd.macdsignal,\"macdsignal30\")\n", + " ],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + " title = \"30m_macd\")\n", + "\n", + "ch = chart([pane1, pane2, pane3], sync=False, size=\"l\", xloc=slice(\"2024-02-12 09:30\",\"2024-03-12\"))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "buyvolume = t1data.data[\"BAC\"].buyvolume\n", + "sellvolume = t1data.data[\"BAC\"].sellvolume\n", + "totalvolume = buyvolume + sellvolume\n", + "\n", + "#adjust to minimal value to avoid division by zero\n", + "sellvolume_adjusted = sellvolume.replace(0, 1e-10)\n", + "oibratio = buyvolume / sellvolume\n", + "\n", + "#cumulative order flow (net difference)\n", + "cof = buyvolume - sellvolume\n", + "\n", + "# Calculate the order imbalance (net differene) normalize the order imbalance by calculating the difference between buy and sell volumes and then scaling it by the total volume.\n", + "order_imbalance = cof / totalvolume\n", + "order_imbalance = order_imbalance.fillna(0) #nan nahradime 0\n", + "\n", + "order_imbalance_allvolume = cof / t1data.data[\"BAC\"].volume\n", + "\n", + "order_imbalance_sma = vbt.indicator(\"talib:EMA\").run(order_imbalance, timeperiod=5)\n", + "short_signals = order_imbalance.vbt < -0.5\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_signals.value_counts()\n", + "short_signals.name = \"short_entries\"\n", + "#.fillna(False)\n", + "short_exits = short_signals.shift(-2).fillna(False).astype(bool)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pane1 = Panel(\n", + " ohlcv=(t1data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " histogram=[(order_imbalance_allvolume, \"oib_allvolume\", \"rgba(53, 94, 59, 0.6)\",0.5),\n", + " (t1data.data[\"BAC\"].trades, \"trades\",None,0.4),\n", + " ], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n", + " # right=[\n", + " # (supertrend.trend,\"STtrend\"),\n", + " # (supertrend.long,\"STlong\"),\n", + " # (supertrend.short,\"STshort\")\n", + " # ],\n", + " # left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "pane2 = Panel(\n", + " ohlcv=(basic_data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " left=[(basic_data.data[\"BAC\"].trades, \"trades\")],\n", + " histogram=[(basic_data.data[\"BAC\"].trades, \"trades_hist\", \"white\", 0.5)], #\"rgba(53, 94, 59, 0.6)\"\n", + " # ], # [(series, name, \"rgba(53, 94, 59, 0.6)\")]\n", + " # right=[\n", + " # (supertrend.trend,\"STtrend\"),\n", + " # (supertrend.long,\"STlong\"),\n", + " # (supertrend.short,\"STshort\")\n", + " # ],\n", + " # left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "\n", + "ch = chart([pane1, pane2], size=\"m\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##z tohoto si udelat plot funkci (i pro entries,exits)\n", + "#t1data = t1data[[\"open\", \"high\", \"low\", \"close\", \"volume\"]]\n", + "chart = JupyterChart(width=1000, height=600, inner_width=1, inner_height=0.5, leftScale=True)\n", + "#set resolution\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "chart.set(t1data.data[\"BAC\"])\n", + "line_vwap = chart.create_line(name=\"vwap\")#, color=\"blue\")\n", + "line_vwap.set(t1data.vwap)\n", + "\n", + "\n", + "chart.topbar.textbox(\"title\",\"Nadpis\")\n", + "chart2 = chart.create_subchart(position='right', width=1, height=0.5, sync=True, leftScale=True)\n", + "t2data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"5T\")\n", + "t2data = t2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "\n", + "#5min close realigned to 1T\n", + "close_realigned = t2data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line1 = chart.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line1.set(close_realigned)\n", + "\n", + "#sma z realigned dat\n", + "sma_tp = 20\n", + "sma_t2 = vbt.indicator(\"talib:EMA\").run(close_realigned, timeperiod=sma_tp)\n", + "smaline = chart.create_line(name=f\"sma{sma_tp}\")#, color=\"blue\")\n", + "smaline.set(sma_t2)\n", + "\n", + "\n", + "#sma z puvodnich resamplovanych dat plus navic realign, melo by byt stejne \n", + "sma_real = vbt.indicator(\"talib:EMA\").run(t2data.close, timeperiod=sma_tp)\n", + "sma_real_value = sma_real.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "sma_real_value = sma_real_value[sma_real_value.index.dayofweek < 5]\n", + "smaline_real = chart.create_line(name=f\"smareal{sma_tp}\", color=\"yellow\")\n", + "smaline_real.set(sma_real_value)\n", + "\n", + "#resample 15T\n", + "t15data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"15T\")\n", + "t15data = t15data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "#5min close realigned to 1T\n", + "close_15realigned = t15data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_15realigned = close_15realigned[close_15realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line2 = chart.create_line(name=\"15minclose\")#, color=\"pink\")\n", + "line2.set(close_15realigned)\n", + "\n", + "\n", + "chart.legend(True)\n", + "hst = chart2.create_histogram(name=\"buyvolume\", color=\"rgba(53, 94, 59, 0.6)\") #green transparent\n", + "hst1 = chart2.create_histogram(name=\"sellvolume\", color=\"rgba(165, 42, 42, 0.6)\") #red transparent\n", + "hst.set(t1data.data[\"BAC\"])\n", + "hst1.set(t1data.data[\"BAC\"])\n", + "line2 = chart2.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line2.set(close_realigned)\n", + "\n", + "lineoib = chart2.create_line(name=\"oib\", priceScaleId=\"left\") #color=\"violet\", \n", + "#lineoib.scale(0.7,0)\n", + "lineoib.set(order_imbalance_allvolume)\n", + "\n", + "lineoib_sma = chart2.create_line(name=\"oibsma5\", priceScaleId=\"left\") #, color=\"blue\", \n", + "lineoib_sma.set(order_imbalance_sma)\n", + "\n", + "chart.fit()\n", + "chart2.legend(True)\n", + "#\n", + "line2.markers_set(short_signals, \"entries\")\n", + "# TODO jelikoz se davaji do jednoho pole je treba zajistit spravne sortovani\n", + "# domyslet jak to pojmout iterativni doplnovani markeru\n", + "line2.markers_set(short_exits, \"exits\")\n", + "\n", + "\n", + "chart2.fit()\n", + "chart.load()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "short_signals.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#vbt.IF.list_indicators(\"*ma\")\n", + "vbt.phelp(vbt.indicator(\"talib:EMA\").run)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sma = vbt.indicator(\"talib:EMA\").run(t1data.close, timeperiod=20)\n", + "sma.real.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rr = vbt.RSI.run(t1data.close)\n", + "type(rr)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "buyvolume = t1data.data[\"BAC\"].buyvolume\n", + "sellvolume = t1data.data[\"BAC\"].sellvolume\n", + "totalvolume = buyvolume + sellvolume\n", + "\n", + "#adjust to minimal value to avoid division by zero\n", + "sellvolume_adjusted = sellvolume.replace(0, 1e-10)\n", + "oibratio = buyvolume / sellvolume\n", + "\n", + "#cumulative order flow (net difference)\n", + "cof = buyvolume - sellvolume\n", + "\n", + "# Calculate the order imbalance (net differene) normalize the order imbalance by calculating the difference between buy and sell volumes and then scaling it by the total volume.\n", + "order_imbalance = cof / totalvolume\n", + "order_imbalance = order_imbalance.fillna(0) #nan nahradime 0\n", + "\n", + "order_imbalance_allvolume = cof / t1data.data[\"BAC\"].volume" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance_sma = vbt.indicator(\"talib:EMA\").run(order_imbalance, timeperiod=5)\n", + "short_signals = order_imbalance.vbt < -0.5\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_signals.value_counts()\n", + "short_signals.name = \"short_entries\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "short_signals.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "short_signals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance.fillna(0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance.vbt.plot()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chartN = JupyterChart(width=500, height=300, inner_width=1, inner_height=0.3, leftScale=True)\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "chartN.set(t1data.data[\"BAC\"])\n", + "line_sma = chartN.create_line(name=\"sma\", priceScaleId=\"right\")#, color=\"blue\")\n", + "line_sma.set(sma)\n", + "# line_sma.markers_set(short_signals, \"entries\")\n", + "# line_sma.markers_set(short_exits, \"exits\")\n", + "# hst = chartN.create_histogram(name=\"oivol\")\n", + "# hst.set(order_imbalance_allvolume)\n", + "# chartN.legend(True)\n", + "# chartN.fit()\n", + "\n", + "# subchart = chartN.create_subchart(position='right', width=1, height=0.5, sync=False, leftScale=True)\n", + "# # subchart.set(t1data.data[\"BAC\"])\n", + "# line_sma1 = subchart.create_line(name=\"smao\", priceScaleId=\"left\")#, color=\"blue\")\n", + "# line_sma1.set(sma)\n", + "# # line_sma1.markers_set(short_signals, \"entries\")\n", + "# # line_sma1.markers_set(short_exits, \"exits\")\n", + "# hsto = subchart.create_histogram(name=\"oivolo\")\n", + "# hsto.set(order_imbalance_sma)\n", + "\n", + "chart2 = chartN.create_subchart(position='left', width=1, height=0.5, sync=True, leftScale=True, toolbox=True)\n", + "# hst = chart2.create_histogram(name=\"buyvolume\", color=\"rgba(53, 94, 59, 0.6)\") #green transparent\n", + "# hst1 = chart2.create_histogram(name=\"sellvolume\", color=\"rgba(165, 42, 42, 0.6)\") #red transparent\n", + "# hst.set(t1data.data[\"BAC\"])\n", + "# hst1.set(t1data.data[\"BAC\"])\n", + "line2 = chart2.create_line(name=\"sma\")#, color=\"green\")\n", + "line2.set(sma)\n", + "chart2.topbar.textbox(\"title\",\"Nadpis\")\n", + "# chartN.topbar.textbox(\"title\",\"NadpisT\")\n", + "\n", + "# subchart.legend(True)\n", + "# subchart.fit()\n", + "chartN.load()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##z tohoto si udelat plot funkci (i pro entries,exits)\n", + "#t1data = t1data[[\"open\", \"high\", \"low\", \"close\", \"volume\"]]\n", + "chart = JupyterChart(width=1000, height=600, inner_width=1, inner_height=0.5, leftScale=True)\n", + "#set resolution\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "chart.set(t1data.data[\"BAC\"])\n", + "line_vwap = chart.create_line(name=\"vwap\")#, color=\"blue\")\n", + "line_vwap.set(t1data.vwap)\n", + "\n", + "\n", + "chart.topbar.textbox(\"title\",\"Nadpis\")\n", + "chart2 = chart.create_subchart(position='right', width=1, height=0.5, sync=True, leftScale=True)\n", + "t2data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"5T\")\n", + "t2data = t2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "\n", + "#5min close realigned to 1T\n", + "close_realigned = t2data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line1 = chart.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line1.set(close_realigned)\n", + "\n", + "#sma z realigned dat\n", + "sma_tp = 20\n", + "sma_t2 = vbt.indicator(\"talib:EMA\").run(close_realigned, timeperiod=sma_tp)\n", + "smaline = chart.create_line(name=f\"sma{sma_tp}\")#, color=\"blue\")\n", + "smaline.set(sma_t2)\n", + "\n", + "\n", + "#sma z puvodnich resamplovanych dat plus navic realign, melo by byt stejne \n", + "sma_real = vbt.indicator(\"talib:EMA\").run(t2data.close, timeperiod=sma_tp)\n", + "sma_real_value = sma_real.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "sma_real_value = sma_real_value[sma_real_value.index.dayofweek < 5]\n", + "smaline_real = chart.create_line(name=f\"smareal{sma_tp}\", color=\"yellow\")\n", + "smaline_real.set(sma_real_value)\n", + "\n", + "#resample 15T\n", + "t15data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"15T\")\n", + "t15data = t15data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "#5min close realigned to 1T\n", + "close_15realigned = t15data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_15realigned = close_15realigned[close_15realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line2 = chart.create_line(name=\"15minclose\")#, color=\"pink\")\n", + "line2.set(close_15realigned)\n", + "\n", + "\n", + "chart.legend(True)\n", + "hst = chart2.create_histogram(name=\"buyvolume\", color=\"rgba(53, 94, 59, 0.6)\") #green transparent\n", + "hst1 = chart2.create_histogram(name=\"sellvolume\", color=\"rgba(165, 42, 42, 0.6)\") #red transparent\n", + "hst.set(t1data.data[\"BAC\"])\n", + "hst1.set(t1data.data[\"BAC\"])\n", + "line2 = chart2.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line2.set(close_realigned)\n", + "\n", + "lineoib = chart2.create_line(name=\"oib\", priceScaleId=\"left\") #color=\"violet\", \n", + "#lineoib.scale(0.7,0)\n", + "lineoib.set(order_imbalance_allvolume)\n", + "\n", + "lineoib_sma = chart2.create_line(name=\"oibsma5\", priceScaleId=\"left\") #, color=\"blue\", \n", + "lineoib_sma.set(order_imbalance_sma)\n", + "\n", + "chart.fit()\n", + "chart2.legend(True)\n", + "#\n", + "line2.markers_set(short_signals, \"entries\")\n", + "# TODO jelikoz se davaji do jednoho pole je treba zajistit spravne sortovani\n", + "# domyslet jak to pojmout iterativni doplnovani markeru\n", + "line2.markers_set(short_exits, \"exits\")\n", + "\n", + "\n", + "chart2.fit()\n", + "chart.load()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sma.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#priminds list (same Y as price), secinds list (secondary Y napr. rsi), close, voluminds (volume based) list\n", + "def plot_2y_close(priminds, secinds, close, volumeinds, ohlcv=None):\n", + " fig = vbt.make_subplots(rows=2, cols=1, shared_xaxes=True, \n", + " specs=[[{\"secondary_y\": True}], [{\"secondary_y\": False}]], \n", + " vertical_spacing=0.02, subplot_titles=(\"Price and Indicators\", \"Volume\"))\n", + "\n", + " if ohlcv is not None:\n", + " ohlcv.vbt.ohlcv.plot(fig=fig, add_trace_kwargs=dict(row=1, col=1))\n", + "\n", + " # Plotting the close price\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False,row=1, col=1), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " \n", + " # Plotting primary indicators on the first row\n", + " for ind in priminds:\n", + " if isinstance(ind, pd.Series):\n", + " #if series has no name, make the name same as the variable name\n", + " \n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + " \n", + " # Plotting secondary indicators on the first row\n", + " for ind in secinds:\n", + " #ind = ind.rename(str(ind.name))\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1), trace_kwargs=dict(line=dict(color=\"rgba(255, 0, 0, 0.4)\")))\n", + " \n", + " for indvolume in volumeinds:\n", + " # Plotting the volume on the second row\n", + " indvolume.rename(str(indvolume.name)).vbt.barplot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=2, col=1))\n", + " #vbt.Bar(indvolume, fig=fig, add_trace_kwargs=dict(secondary_y=False, row=2, col=1))\n", + " \n", + " return fig\n", + "\n", + "fig = plot_2y_close([sma], [order_imbalance.rename(\"order_imbalance_norm\"),order_imbalance_sma.real.rename(\"oib_sma\")], t1data.close, [t1data.data[\"BAC\"].buyvolume, t1data.data[\"BAC\"].sellvolume, t1data.volume], t1data.data[\"BAC\"])\n", + "fig.update_yaxes(range=[33,34], secondary_y=False, row=1, col=1) #update y axis range\n", + "fig.update_yaxes(range=[-1,1], secondary_y=True, row=1, col=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "t0data = basic_data\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"1T\")\n", + "t2data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"15T\")\n", + "t3data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"30T\")\n", + "t4data = basic_data[['open', 'high', 'low', 'close', 'volume', 'vwap']].resample(\"D\").dropna()\n", + "\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t2data = t2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t3data = t3data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "#30min data to daily\n", + "# t4data = t3data.resample(\"D\").dropna()\n", + "\n", + "#t4data = t4data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "#m1data.data[\"SPY\"].info()\n", + "\n", + "#m1data.data[\"SPY\"].vbt.ohlcv.plot()\n", + "#h2data.data[\"SPY\"].vbt.ohlcv.plot()\n", + "#ddata.data[\"SPY\"]\n", + "t2data.data[\"BAC\"].vbt.ohlcv.plot().show()\n", + "\n", + "\n", + "#t4data.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t2data.close\n", + "\n", + "#in df remove rows with nan\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#realign na 1T = t1data + oriznout main session\n", + "t2data_vwap = t2data.vwap.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "t3data_vwap = t3data.vwap.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "t4data_vwap = t4data.vwap.vbt.realign_closing(\"1T\").dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t2data_vwap" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t4data.clos.vbt \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "obvind = vbt.indicator.obv.run(t1data.close, t1data.volume)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t1_lengtgh = 15\n", + "t2_length = 15\n", + "t3_length = 15\n", + "t4_length = 5\n", + "t1_th = 0.1\n", + "t2_th = 0.1\n", + "t3_th = 0.1\n", + "t4_th = 0.1\n", + "\n", + "\n", + "\n", + "#minute\n", + "t1slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t1data.close, timeperiod=t1_lengtgh) # -0.09, 0.09\n", + "t2slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t2data.vwap, timeperiod=t2_length) # -0.08 , 0.079\n", + "t3slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t3data.vwap, timeperiod=t3_length) # -0.08, 0.08\n", + "#daily\n", + "t4slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t4data.vwap, timeperiod=t4_length) # -0.1, 0.09\n", + "\n", + "plot_2y_close(priminds=[], secinds=[t1slope, t2slope, t3slope, t4slope], close=t1data.close).show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#thirtymin_slope = thirtymin_slope.real.rename(\"30min\") #timto se prejmenuje real na 30min\n", + "t3slope = t3slope.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "##filter daily_slope_to_compare to only monday to friday\n", + "t3slope = t3slope[t3slope.index.dayofweek < 5]\n", + "\n", + "#t3slope.info()\n", + "\n", + "t2slope = t2slope.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "##filter daily_slope_to_compare to only monday to friday\n", + "t2slope = t2slope[t2slope.index.dayofweek < 5]\n", + "\n", + "t2slope.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "oibratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#\n", + "short_signals = order_imbalance.vbt < -0.3\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_signals.value_counts()\n", + "\n", + "long_signals = order_imbalance.vbt > 0.3\n", + "#entries = oibratio.vbt > 10\n", + "long_signals.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = vbt.make_subplots(rows=3, cols=1, shared_xaxes=True, \n", + " specs=[[{\"secondary_y\": True}], [{\"secondary_y\": True}], [{\"secondary_y\": False}]], \n", + " vertical_spacing=0.02, subplot_titles=(\"Price and Indicators\", \"Volume\"))\n", + "t1data.data[\"BAC\"].vbt.ohlcv.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + "#oibratio.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1))\n", + "order_imbalance.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1))\n", + "long_signals.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"LONGS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"limegreen\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ), add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + "\n", + "short_signals.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"SHORTS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"red\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ), add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# thirtymin_slope_to_compare.vbt.xloc[\"04-16-2024\"].get()\n", + "thirty_down_signal.vbt.xloc[\"04-16-2024\"].get()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#short_signal = t1slope.real_below(t1_th) & t2slope.real_below(t2_th) & t3slope.real_below(t3_th) & t4slope.real_below(t4_th)\n", + "#long_signal = t1slope.real_above(t1_th) & t2slope.real_above(t2_th) & t3slope.real_above(t3_th) & t4slope.real_above(t4_th)\n", + "\n", + "#test na daily s reversem crossed 0\n", + "short_signal = t2slope.vbt < -0.01 & t3slope.vbt < -0.01 #min value of threshold\n", + "long_signal = t2slope.vbt > 0.01 & t3slope.vbt > 0.01 #min\n", + "\n", + "# thirty_up_signal = t3slope.vbt.crossed_above(0.01)\n", + "# thirty_down_signal = t3slope.vbt.crossed_below(-0.01)\n", + "\n", + "fig = plot_2y_close(priminds=[], secinds=[t3slope], close=t1data.close)\n", + "#short_signal.vbt.signals.plot_as_entries(basic_data.close, fig=fig)\n", + "\n", + "short_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"SHORTS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"red\", symbol=\"triangle-down\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "long_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"LONGS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"limegreen\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "\n", + "# thirty_down_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"DOWN30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"yellow\", symbol=\"triangle-down\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "# thirty_up_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"UP30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"grey\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "# thirtymin_slope_to_compare.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True), trace_kwargs=dict(name=\"30min slope\",\n", + "# line=dict(color=\"yellow\"), \n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "fig.show()\n", + "# print(\"short signal\")\n", + "# print(short_signal.value_counts())\n", + "\n", + "#forced_exit = pd.Series(False, index=close.index)\n", + "forced_exit = basic_data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signal & entry_window_open)\n", + "exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "# print(short_exits.value_counts())\n", + "# print(short_entries.value_counts())\n", + "\n", + "#fig = plot_2y_close([],[momshort, rocp], close)\n", + "#short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + "#print(sl_stop)\n", + "#short_entries=short_entries, short_exits=short_exits,\n", + "# pf = vbt.Portfolio.from_signals(close=basic_data, entries=short_entries, exits=exits, tsl_stop=0.005, tp_stop = 0.05, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "# pf.stats()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "forced_exit = t1data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= t1data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signals & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signals & entry_window_open)\n", + "exits = forced_exit\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=t1data, entries=entries, exits=exits, short_entries=short_entries, short_exits=exits,\n", + "td_stop=2, time_delta_format=\"rows\",\n", + "tsl_stop=0.005, tp_stop = 0.005, fees=0.0167/100)#, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.get_drawdowns().records_readable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.orders.records_readable" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/strat_SUPERTREND/.ipynb_checkpoints/v2_SINGLE-checkpoint.ipynb b/research/strat_SUPERTREND/.ipynb_checkpoints/v2_SINGLE-checkpoint.ipynb new file mode 100644 index 0000000..58a878c --- /dev/null +++ b/research/strat_SUPERTREND/.ipynb_checkpoints/v2_SINGLE-checkpoint.ipynb @@ -0,0 +1,1118 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ORDER Imbalance\n", + "\n", + "* introduced buyvolume and sellvolume on bar level.\n", + "* calculated order imbalance ratio (buyvolume-sellvolume/totalvolume)\n", + "* calculated on multiple timeframes\n", + "* entry based on confluences imbalances\n", + "\n", + "## Note\n", + "\n", + "Order disbalance nepodminuje zmenu ceny (tzn. muze byt order disbalance na buy stranu, ale cena nemusi jit nahoru a naopak)\n", + "Nicmene pokud je disbalance delsi a nedochazi ke zmene ceny - může to něco indikovat. \n", + "Vytvořit si kumulativní disbalance - kumulátory, které se budou načítat, když se budou silné disbalance, bez změny ceny. Tento akumulátor se bude nabíjet disbalancí a vybíjet příslušnou změnou ceny." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import load_dotenv\n", + "\n", + "#as V2realbot is client , load env variables here\n", + "env_file = \"/Users/davidbrazda/Documents/Development/python/.env\"\n", + "# Load the .env file\n", + "load_dotenv(env_file)\n", + "\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "# from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "from v2realbot.config import DATA_DIR\n", + "from lightweight_charts import JupyterChart, chart, Panel\n", + "from IPython.display import display\n", + "\n", + "# init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "#LOAD FROM PARQUET\n", + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "print('\\n'.join(map(str, files)))\n", + "file_name = \"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet\"\n", + "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", + "#filter ohlcv_df to certain date range (assuming datetime index)\n", + "#ohlcv_df = ohlcv_df.loc[\"2024-02-12 9:30\":\"2024-02-14 16:00\"]\n", + "\n", + "#add vwap column to ohlcv_df\n", + "#ohlcv_df[\"hlcc4\"] = (ohlcv_df[\"close\"] + ohlcv_df[\"high\"] + ohlcv_df[\"low\"] + ohlcv_df[\"close\"]) / 4\n", + "\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": ohlcv_df}), tz_convert=zoneNY)\n", + "ohlcv_df= None\n", + "basic_data.wrapper.index.normalize().nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add resample function to custom columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro.utils.config import merge_dicts, Config, HybridConfig\n", + "from vectorbtpro import _typing as tp\n", + "from vectorbtpro.generic import nb as generic_nb\n", + "\n", + "_feature_config: tp.ClassVar[Config] = HybridConfig(\n", + " {\n", + " \"buyvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " ),\n", + " \"sellvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " ),\n", + " \"trades\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " )\n", + " }\n", + ")\n", + "\n", + "basic_data._feature_config = _feature_config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basic_data.data[\"BAC\"].info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','trades','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t1data.data[\"BAC\"].info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bbands = vbt.talib(\"BBANDS\").run(\n", + " t1data.get(\"Close\"))\n", + "\n", + "\n", + "supertrend = vbt.SUPERTREND.run(t1data.high, t1data.low, t1data.close, period=14, multiplier=3)\n", + "#supertrend.output_names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "serka = supertrend.direction\n", + "\n", + "#a = serka.vbt.xloc[\"2024-02-12 09:30\":\"2024-02-12 09:32\"].obj\n", + "\n", + "a = serka.vbt.xloc[slice(\"2024-02-12 09:30\",\"2024-02-12 09:32\")].obj\n", + " \n", + "a\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = t1data.data[\"BAC\"]\n", + "\n", + "df.vbt.xloc[\"2024-02-12 09:30\":\"2024-02-12 09:32\"].obj" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "supertrend.trend" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pane1 = Panel(\n", + " ohlcv=(t1data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6), opacity\")]\n", + " right=[#(bbands,), #[(series, name, entries, exits, other_markers)]\n", + " (supertrend.trend,\"STtrend\"),\n", + " (supertrend.long,\"STlong\"),\n", + " (supertrend.short,\"STshort\")\n", + " ],\n", + " left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "pane2 = Panel(\n", + " ohlcv=(t1data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6), opacity\")]\n", + " right=[(bbands,), #[(series, name, entries, exits, other_markers)]\n", + " # (supertrend.trend,\"STtrend\"),\n", + " # (supertrend.long,\"STlong\"),\n", + " # (supertrend.short,\"STshort\")\n", + " ],\n", + " # left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "ch = chart([pane1, pane2], sync=True, size=\"s\", xloc=slice(\"2024-02-12 09:30\",\"2024-03-12\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "buyvolume = t1data.data[\"BAC\"].buyvolume\n", + "sellvolume = t1data.data[\"BAC\"].sellvolume\n", + "totalvolume = buyvolume + sellvolume\n", + "\n", + "#adjust to minimal value to avoid division by zero\n", + "sellvolume_adjusted = sellvolume.replace(0, 1e-10)\n", + "oibratio = buyvolume / sellvolume\n", + "\n", + "#cumulative order flow (net difference)\n", + "cof = buyvolume - sellvolume\n", + "\n", + "# Calculate the order imbalance (net differene) normalize the order imbalance by calculating the difference between buy and sell volumes and then scaling it by the total volume.\n", + "order_imbalance = cof / totalvolume\n", + "order_imbalance = order_imbalance.fillna(0) #nan nahradime 0\n", + "\n", + "order_imbalance_allvolume = cof / t1data.data[\"BAC\"].volume\n", + "\n", + "order_imbalance_sma = vbt.indicator(\"talib:EMA\").run(order_imbalance, timeperiod=5)\n", + "short_signals = order_imbalance.vbt < -0.5\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_signals.value_counts()\n", + "short_signals.name = \"short_entries\"\n", + "#.fillna(False)\n", + "short_exits = short_signals.shift(-2).fillna(False).astype(bool)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pane1 = Panel(\n", + " ohlcv=(t1data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " histogram=[(order_imbalance_allvolume, \"oib_allvolume\", \"rgba(53, 94, 59, 0.6)\",0.5),\n", + " (t1data.data[\"BAC\"].trades, \"trades\",None,0.4),\n", + " ], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n", + " # right=[\n", + " # (supertrend.trend,\"STtrend\"),\n", + " # (supertrend.long,\"STlong\"),\n", + " # (supertrend.short,\"STshort\")\n", + " # ],\n", + " # left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "pane2 = Panel(\n", + " ohlcv=(basic_data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " left=[(basic_data.data[\"BAC\"].trades, \"trades\")],\n", + " histogram=[(basic_data.data[\"BAC\"].trades, \"trades_hist\", \"white\", 0.5)], #\"rgba(53, 94, 59, 0.6)\"\n", + " # ], # [(series, name, \"rgba(53, 94, 59, 0.6)\")]\n", + " # right=[\n", + " # (supertrend.trend,\"STtrend\"),\n", + " # (supertrend.long,\"STlong\"),\n", + " # (supertrend.short,\"STshort\")\n", + " # ],\n", + " # left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "\n", + "ch = chart([pane1, pane2], size=\"m\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##z tohoto si udelat plot funkci (i pro entries,exits)\n", + "#t1data = t1data[[\"open\", \"high\", \"low\", \"close\", \"volume\"]]\n", + "chart = JupyterChart(width=1000, height=600, inner_width=1, inner_height=0.5, leftScale=True)\n", + "#set resolution\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "chart.set(t1data.data[\"BAC\"])\n", + "line_vwap = chart.create_line(name=\"vwap\")#, color=\"blue\")\n", + "line_vwap.set(t1data.vwap)\n", + "\n", + "\n", + "chart.topbar.textbox(\"title\",\"Nadpis\")\n", + "chart2 = chart.create_subchart(position='right', width=1, height=0.5, sync=True, leftScale=True)\n", + "t2data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"5T\")\n", + "t2data = t2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "\n", + "#5min close realigned to 1T\n", + "close_realigned = t2data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line1 = chart.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line1.set(close_realigned)\n", + "\n", + "#sma z realigned dat\n", + "sma_tp = 20\n", + "sma_t2 = vbt.indicator(\"talib:EMA\").run(close_realigned, timeperiod=sma_tp)\n", + "smaline = chart.create_line(name=f\"sma{sma_tp}\")#, color=\"blue\")\n", + "smaline.set(sma_t2)\n", + "\n", + "\n", + "#sma z puvodnich resamplovanych dat plus navic realign, melo by byt stejne \n", + "sma_real = vbt.indicator(\"talib:EMA\").run(t2data.close, timeperiod=sma_tp)\n", + "sma_real_value = sma_real.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "sma_real_value = sma_real_value[sma_real_value.index.dayofweek < 5]\n", + "smaline_real = chart.create_line(name=f\"smareal{sma_tp}\", color=\"yellow\")\n", + "smaline_real.set(sma_real_value)\n", + "\n", + "#resample 15T\n", + "t15data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"15T\")\n", + "t15data = t15data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "#5min close realigned to 1T\n", + "close_15realigned = t15data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_15realigned = close_15realigned[close_15realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line2 = chart.create_line(name=\"15minclose\")#, color=\"pink\")\n", + "line2.set(close_15realigned)\n", + "\n", + "\n", + "chart.legend(True)\n", + "hst = chart2.create_histogram(name=\"buyvolume\", color=\"rgba(53, 94, 59, 0.6)\") #green transparent\n", + "hst1 = chart2.create_histogram(name=\"sellvolume\", color=\"rgba(165, 42, 42, 0.6)\") #red transparent\n", + "hst.set(t1data.data[\"BAC\"])\n", + "hst1.set(t1data.data[\"BAC\"])\n", + "line2 = chart2.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line2.set(close_realigned)\n", + "\n", + "lineoib = chart2.create_line(name=\"oib\", priceScaleId=\"left\") #color=\"violet\", \n", + "#lineoib.scale(0.7,0)\n", + "lineoib.set(order_imbalance_allvolume)\n", + "\n", + "lineoib_sma = chart2.create_line(name=\"oibsma5\", priceScaleId=\"left\") #, color=\"blue\", \n", + "lineoib_sma.set(order_imbalance_sma)\n", + "\n", + "chart.fit()\n", + "chart2.legend(True)\n", + "#\n", + "line2.markers_set(short_signals, \"entries\")\n", + "# TODO jelikoz se davaji do jednoho pole je treba zajistit spravne sortovani\n", + "# domyslet jak to pojmout iterativni doplnovani markeru\n", + "line2.markers_set(short_exits, \"exits\")\n", + "\n", + "\n", + "chart2.fit()\n", + "chart.load()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "short_signals.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#vbt.IF.list_indicators(\"*ma\")\n", + "vbt.phelp(vbt.indicator(\"talib:EMA\").run)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sma = vbt.indicator(\"talib:EMA\").run(t1data.close, timeperiod=20)\n", + "sma.real.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rr = vbt.RSI.run(t1data.close)\n", + "type(rr)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "buyvolume = t1data.data[\"BAC\"].buyvolume\n", + "sellvolume = t1data.data[\"BAC\"].sellvolume\n", + "totalvolume = buyvolume + sellvolume\n", + "\n", + "#adjust to minimal value to avoid division by zero\n", + "sellvolume_adjusted = sellvolume.replace(0, 1e-10)\n", + "oibratio = buyvolume / sellvolume\n", + "\n", + "#cumulative order flow (net difference)\n", + "cof = buyvolume - sellvolume\n", + "\n", + "# Calculate the order imbalance (net differene) normalize the order imbalance by calculating the difference between buy and sell volumes and then scaling it by the total volume.\n", + "order_imbalance = cof / totalvolume\n", + "order_imbalance = order_imbalance.fillna(0) #nan nahradime 0\n", + "\n", + "order_imbalance_allvolume = cof / t1data.data[\"BAC\"].volume" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance_sma = vbt.indicator(\"talib:EMA\").run(order_imbalance, timeperiod=5)\n", + "short_signals = order_imbalance.vbt < -0.5\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_signals.value_counts()\n", + "short_signals.name = \"short_entries\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "short_signals.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "short_signals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance.fillna(0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance.vbt.plot()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chartN = JupyterChart(width=500, height=300, inner_width=1, inner_height=0.3, leftScale=True)\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "chartN.set(t1data.data[\"BAC\"])\n", + "line_sma = chartN.create_line(name=\"sma\", priceScaleId=\"right\")#, color=\"blue\")\n", + "line_sma.set(sma)\n", + "# line_sma.markers_set(short_signals, \"entries\")\n", + "# line_sma.markers_set(short_exits, \"exits\")\n", + "# hst = chartN.create_histogram(name=\"oivol\")\n", + "# hst.set(order_imbalance_allvolume)\n", + "# chartN.legend(True)\n", + "# chartN.fit()\n", + "\n", + "# subchart = chartN.create_subchart(position='right', width=1, height=0.5, sync=False, leftScale=True)\n", + "# # subchart.set(t1data.data[\"BAC\"])\n", + "# line_sma1 = subchart.create_line(name=\"smao\", priceScaleId=\"left\")#, color=\"blue\")\n", + "# line_sma1.set(sma)\n", + "# # line_sma1.markers_set(short_signals, \"entries\")\n", + "# # line_sma1.markers_set(short_exits, \"exits\")\n", + "# hsto = subchart.create_histogram(name=\"oivolo\")\n", + "# hsto.set(order_imbalance_sma)\n", + "\n", + "chart2 = chartN.create_subchart(position='left', width=1, height=0.5, sync=True, leftScale=True, toolbox=True)\n", + "# hst = chart2.create_histogram(name=\"buyvolume\", color=\"rgba(53, 94, 59, 0.6)\") #green transparent\n", + "# hst1 = chart2.create_histogram(name=\"sellvolume\", color=\"rgba(165, 42, 42, 0.6)\") #red transparent\n", + "# hst.set(t1data.data[\"BAC\"])\n", + "# hst1.set(t1data.data[\"BAC\"])\n", + "line2 = chart2.create_line(name=\"sma\")#, color=\"green\")\n", + "line2.set(sma)\n", + "chart2.topbar.textbox(\"title\",\"Nadpis\")\n", + "# chartN.topbar.textbox(\"title\",\"NadpisT\")\n", + "\n", + "# subchart.legend(True)\n", + "# subchart.fit()\n", + "chartN.load()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##z tohoto si udelat plot funkci (i pro entries,exits)\n", + "#t1data = t1data[[\"open\", \"high\", \"low\", \"close\", \"volume\"]]\n", + "chart = JupyterChart(width=1000, height=600, inner_width=1, inner_height=0.5, leftScale=True)\n", + "#set resolution\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "chart.set(t1data.data[\"BAC\"])\n", + "line_vwap = chart.create_line(name=\"vwap\")#, color=\"blue\")\n", + "line_vwap.set(t1data.vwap)\n", + "\n", + "\n", + "chart.topbar.textbox(\"title\",\"Nadpis\")\n", + "chart2 = chart.create_subchart(position='right', width=1, height=0.5, sync=True, leftScale=True)\n", + "t2data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"5T\")\n", + "t2data = t2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "\n", + "#5min close realigned to 1T\n", + "close_realigned = t2data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line1 = chart.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line1.set(close_realigned)\n", + "\n", + "#sma z realigned dat\n", + "sma_tp = 20\n", + "sma_t2 = vbt.indicator(\"talib:EMA\").run(close_realigned, timeperiod=sma_tp)\n", + "smaline = chart.create_line(name=f\"sma{sma_tp}\")#, color=\"blue\")\n", + "smaline.set(sma_t2)\n", + "\n", + "\n", + "#sma z puvodnich resamplovanych dat plus navic realign, melo by byt stejne \n", + "sma_real = vbt.indicator(\"talib:EMA\").run(t2data.close, timeperiod=sma_tp)\n", + "sma_real_value = sma_real.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "sma_real_value = sma_real_value[sma_real_value.index.dayofweek < 5]\n", + "smaline_real = chart.create_line(name=f\"smareal{sma_tp}\", color=\"yellow\")\n", + "smaline_real.set(sma_real_value)\n", + "\n", + "#resample 15T\n", + "t15data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"15T\")\n", + "t15data = t15data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "#5min close realigned to 1T\n", + "close_15realigned = t15data.close.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "close_15realigned = close_15realigned[close_15realigned.index.dayofweek < 5]\n", + "#close_realigned = close_realigned[close_realigned.index.dayofweek < 5]\n", + "line2 = chart.create_line(name=\"15minclose\")#, color=\"pink\")\n", + "line2.set(close_15realigned)\n", + "\n", + "\n", + "chart.legend(True)\n", + "hst = chart2.create_histogram(name=\"buyvolume\", color=\"rgba(53, 94, 59, 0.6)\") #green transparent\n", + "hst1 = chart2.create_histogram(name=\"sellvolume\", color=\"rgba(165, 42, 42, 0.6)\") #red transparent\n", + "hst.set(t1data.data[\"BAC\"])\n", + "hst1.set(t1data.data[\"BAC\"])\n", + "line2 = chart2.create_line(name=\"5minclose\")#, color=\"green\")\n", + "line2.set(close_realigned)\n", + "\n", + "lineoib = chart2.create_line(name=\"oib\", priceScaleId=\"left\") #color=\"violet\", \n", + "#lineoib.scale(0.7,0)\n", + "lineoib.set(order_imbalance_allvolume)\n", + "\n", + "lineoib_sma = chart2.create_line(name=\"oibsma5\", priceScaleId=\"left\") #, color=\"blue\", \n", + "lineoib_sma.set(order_imbalance_sma)\n", + "\n", + "chart.fit()\n", + "chart2.legend(True)\n", + "#\n", + "line2.markers_set(short_signals, \"entries\")\n", + "# TODO jelikoz se davaji do jednoho pole je treba zajistit spravne sortovani\n", + "# domyslet jak to pojmout iterativni doplnovani markeru\n", + "line2.markers_set(short_exits, \"exits\")\n", + "\n", + "\n", + "chart2.fit()\n", + "chart.load()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sma.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#priminds list (same Y as price), secinds list (secondary Y napr. rsi), close, voluminds (volume based) list\n", + "def plot_2y_close(priminds, secinds, close, volumeinds, ohlcv=None):\n", + " fig = vbt.make_subplots(rows=2, cols=1, shared_xaxes=True, \n", + " specs=[[{\"secondary_y\": True}], [{\"secondary_y\": False}]], \n", + " vertical_spacing=0.02, subplot_titles=(\"Price and Indicators\", \"Volume\"))\n", + "\n", + " if ohlcv is not None:\n", + " ohlcv.vbt.ohlcv.plot(fig=fig, add_trace_kwargs=dict(row=1, col=1))\n", + "\n", + " # Plotting the close price\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False,row=1, col=1), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " \n", + " # Plotting primary indicators on the first row\n", + " for ind in priminds:\n", + " if isinstance(ind, pd.Series):\n", + " #if series has no name, make the name same as the variable name\n", + " \n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + " \n", + " # Plotting secondary indicators on the first row\n", + " for ind in secinds:\n", + " #ind = ind.rename(str(ind.name))\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1), trace_kwargs=dict(line=dict(color=\"rgba(255, 0, 0, 0.4)\")))\n", + " \n", + " for indvolume in volumeinds:\n", + " # Plotting the volume on the second row\n", + " indvolume.rename(str(indvolume.name)).vbt.barplot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=2, col=1))\n", + " #vbt.Bar(indvolume, fig=fig, add_trace_kwargs=dict(secondary_y=False, row=2, col=1))\n", + " \n", + " return fig\n", + "\n", + "fig = plot_2y_close([sma], [order_imbalance.rename(\"order_imbalance_norm\"),order_imbalance_sma.real.rename(\"oib_sma\")], t1data.close, [t1data.data[\"BAC\"].buyvolume, t1data.data[\"BAC\"].sellvolume, t1data.volume], t1data.data[\"BAC\"])\n", + "fig.update_yaxes(range=[33,34], secondary_y=False, row=1, col=1) #update y axis range\n", + "fig.update_yaxes(range=[-1,1], secondary_y=True, row=1, col=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "t0data = basic_data\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"1T\")\n", + "t2data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"15T\")\n", + "t3data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"30T\")\n", + "t4data = basic_data[['open', 'high', 'low', 'close', 'volume', 'vwap']].resample(\"D\").dropna()\n", + "\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t2data = t2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t3data = t3data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "#30min data to daily\n", + "# t4data = t3data.resample(\"D\").dropna()\n", + "\n", + "#t4data = t4data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "#m1data.data[\"SPY\"].info()\n", + "\n", + "#m1data.data[\"SPY\"].vbt.ohlcv.plot()\n", + "#h2data.data[\"SPY\"].vbt.ohlcv.plot()\n", + "#ddata.data[\"SPY\"]\n", + "t2data.data[\"BAC\"].vbt.ohlcv.plot().show()\n", + "\n", + "\n", + "#t4data.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t2data.close\n", + "\n", + "#in df remove rows with nan\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#realign na 1T = t1data + oriznout main session\n", + "t2data_vwap = t2data.vwap.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "t3data_vwap = t3data.vwap.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "t4data_vwap = t4data.vwap.vbt.realign_closing(\"1T\").dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t2data_vwap" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t4data.clos.vbt \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "obvind = vbt.indicator.obv.run(t1data.close, t1data.volume)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t1_lengtgh = 15\n", + "t2_length = 15\n", + "t3_length = 15\n", + "t4_length = 5\n", + "t1_th = 0.1\n", + "t2_th = 0.1\n", + "t3_th = 0.1\n", + "t4_th = 0.1\n", + "\n", + "\n", + "\n", + "#minute\n", + "t1slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t1data.close, timeperiod=t1_lengtgh) # -0.09, 0.09\n", + "t2slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t2data.vwap, timeperiod=t2_length) # -0.08 , 0.079\n", + "t3slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t3data.vwap, timeperiod=t3_length) # -0.08, 0.08\n", + "#daily\n", + "t4slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t4data.vwap, timeperiod=t4_length) # -0.1, 0.09\n", + "\n", + "plot_2y_close(priminds=[], secinds=[t1slope, t2slope, t3slope, t4slope], close=t1data.close).show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#thirtymin_slope = thirtymin_slope.real.rename(\"30min\") #timto se prejmenuje real na 30min\n", + "t3slope = t3slope.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "##filter daily_slope_to_compare to only monday to friday\n", + "t3slope = t3slope[t3slope.index.dayofweek < 5]\n", + "\n", + "#t3slope.info()\n", + "\n", + "t2slope = t2slope.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "##filter daily_slope_to_compare to only monday to friday\n", + "t2slope = t2slope[t2slope.index.dayofweek < 5]\n", + "\n", + "t2slope.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "oibratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#\n", + "short_signals = order_imbalance.vbt < -0.3\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_signals.value_counts()\n", + "\n", + "long_signals = order_imbalance.vbt > 0.3\n", + "#entries = oibratio.vbt > 10\n", + "long_signals.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = vbt.make_subplots(rows=3, cols=1, shared_xaxes=True, \n", + " specs=[[{\"secondary_y\": True}], [{\"secondary_y\": True}], [{\"secondary_y\": False}]], \n", + " vertical_spacing=0.02, subplot_titles=(\"Price and Indicators\", \"Volume\"))\n", + "t1data.data[\"BAC\"].vbt.ohlcv.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + "#oibratio.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1))\n", + "order_imbalance.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1))\n", + "long_signals.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"LONGS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"limegreen\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ), add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + "\n", + "short_signals.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"SHORTS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"red\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ), add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# thirtymin_slope_to_compare.vbt.xloc[\"04-16-2024\"].get()\n", + "thirty_down_signal.vbt.xloc[\"04-16-2024\"].get()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#short_signal = t1slope.real_below(t1_th) & t2slope.real_below(t2_th) & t3slope.real_below(t3_th) & t4slope.real_below(t4_th)\n", + "#long_signal = t1slope.real_above(t1_th) & t2slope.real_above(t2_th) & t3slope.real_above(t3_th) & t4slope.real_above(t4_th)\n", + "\n", + "#test na daily s reversem crossed 0\n", + "short_signal = t2slope.vbt < -0.01 & t3slope.vbt < -0.01 #min value of threshold\n", + "long_signal = t2slope.vbt > 0.01 & t3slope.vbt > 0.01 #min\n", + "\n", + "# thirty_up_signal = t3slope.vbt.crossed_above(0.01)\n", + "# thirty_down_signal = t3slope.vbt.crossed_below(-0.01)\n", + "\n", + "fig = plot_2y_close(priminds=[], secinds=[t3slope], close=t1data.close)\n", + "#short_signal.vbt.signals.plot_as_entries(basic_data.close, fig=fig)\n", + "\n", + "short_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"SHORTS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"red\", symbol=\"triangle-down\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "long_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"LONGS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"limegreen\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "\n", + "# thirty_down_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"DOWN30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"yellow\", symbol=\"triangle-down\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "# thirty_up_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"UP30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"grey\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "# thirtymin_slope_to_compare.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True), trace_kwargs=dict(name=\"30min slope\",\n", + "# line=dict(color=\"yellow\"), \n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "fig.show()\n", + "# print(\"short signal\")\n", + "# print(short_signal.value_counts())\n", + "\n", + "#forced_exit = pd.Series(False, index=close.index)\n", + "forced_exit = basic_data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signal & entry_window_open)\n", + "exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "# print(short_exits.value_counts())\n", + "# print(short_entries.value_counts())\n", + "\n", + "#fig = plot_2y_close([],[momshort, rocp], close)\n", + "#short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + "#print(sl_stop)\n", + "#short_entries=short_entries, short_exits=short_exits,\n", + "# pf = vbt.Portfolio.from_signals(close=basic_data, entries=short_entries, exits=exits, tsl_stop=0.005, tp_stop = 0.05, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "# pf.stats()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "forced_exit = t1data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= t1data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signals & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signals & entry_window_open)\n", + "exits = forced_exit\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=t1data, entries=entries, exits=exits, short_entries=short_entries, short_exits=exits,\n", + "td_stop=2, time_delta_format=\"rows\",\n", + "tsl_stop=0.005, tp_stop = 0.005, fees=0.0167/100)#, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.get_drawdowns().records_readable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.orders.records_readable" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/strat_SUPERTREND/SUPERTREND_v1_MULTI.ipynb b/research/strat_SUPERTREND/SUPERTREND_v1_MULTI.ipynb new file mode 100644 index 0000000..99c2982 --- /dev/null +++ b/research/strat_SUPERTREND/SUPERTREND_v1_MULTI.ipynb @@ -0,0 +1,932 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from v2realbot.tools.loadbatch import load_batch\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n", + "\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "#LOAD FROM PARQUET\n", + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "#print('\\n'.join(map(str, files)))\n", + "file_name = \"ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\"\n", + "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"SPY\": ohlcv_df}), tz_convert=zoneNY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#parameters (primary y line, secondary y line, close)\n", + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig\n", + "\n", + "# close = basic_data.xloc[\"09:30\":\"10:00\"].close" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#PIPELINE - FOR - LOOP\n", + "\n", + "#indicator parameters\n", + "mom_timeperiod = list(range(2, 12))\n", + "\n", + "#uzavreni okna od 1 do 200\n", + "#entry_window_closes = list(range(2, 50, 3))\n", + "entry_window_closes = [5, 10, 30, 45]\n", + "#entry_window_closes = 30\n", + "#threshold entries parameters\n", + "#long\n", + "mom_th = np.round(np.arange(0.01, 0.5 + 0.02, 0.02),4).tolist()#-0.02\n", + "# short\n", + "#mom_th = np.round(np.arange(-0.01, -0.3 - 0.02, -0.02),4).tolist()#-0.02\n", + "roc_th = np.round(np.arange(-0.2, -0.8 - 0.05, -0.05),4).tolist()#-0.2\n", + "#print(mom_th, roc_th)\n", + "\n", + "#portfolio simulation parameters\n", + "sl_stop =np.round(np.arange(0.02/100, 0.7/100, 0.05/100),4).tolist()\n", + "tp_stop = np.round(np.arange(0.02/100, 0.7/100, 0.05/100),4).tolist()\n", + "\n", + "combs = list(product(mom_timeperiod, mom_th, roc_th, sl_stop, tp_stop))\n", + "\n", + "@vbt.parameterized(merge_func = \"concat\", random_subset = 2000, show_progress=True) \n", + "def test_strat(entry_window_closes=60,\n", + " mom_timeperiod=2,\n", + " mom_th=-0.04,\n", + " #roc_th=-0.2,\n", + " sl_stop=0.19/100,\n", + " tp_stop=0.19/100):\n", + " # mom_timeperiod=2\n", + " # mom_th=-0.06\n", + " # roc_th=-0.2\n", + " # sl_stop=0.04/100\n", + " # tp_stop=0.04/100\n", + "\n", + " momshort = vbt.indicator(\"talib:MOM\").run(basic_data.close, timeperiod=mom_timeperiod, short_name = \"slope_short\")\n", + " rocp = vbt.indicator(\"talib:ROC\").run(basic_data.close, short_name = \"rocp\")\n", + " #rate of change + momentum\n", + "\n", + " #momshort.plot rocp.real_crossed_below(roc_th) & \n", + " #short_signal = momshort.real_crossed_below(mom_th)\n", + " long_signal = momshort.real_crossed_above(mom_th)\n", + " # print(\"short signal\")\n", + " # print(short_signal.value_counts())\n", + "\n", + " #forced_exit = pd.Series(False, index=close.index)\n", + " forced_exit = basic_data.symbol_wrapper.fill(False)\n", + " #entry_window_open = pd.Series(False, index=close.index)\n", + " entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + " #print(entry_window_closes, \"entry window closes\")\n", + " # Calculate the time difference in minutes from market open for each timestamp\n", + " elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + " entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + " #print(entry_window_open.value_counts())\n", + "\n", + " forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + " #short_entries = (short_signal & entry_window_open)\n", + " #short_exits = forced_exit\n", + " entries = (long_signal & entry_window_open)\n", + " exits = forced_exit\n", + " #long_entries.info()\n", + " #number of trues and falses in long_entries\n", + " #print(short_exits.value_counts())\n", + " #print(short_entries.value_counts())\n", + "\n", + " #fig = plot_2y_close([],[momshort, rocp], close)\n", + " #short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " #print(sl_stop)\n", + " #tsl_th=sl_stop, \n", + " #short_entries=short_entries, short_exits=short_exits,\n", + " pf = vbt.Portfolio.from_signals(close=basic_data.close, entries=entries, exits=exits, tsl_stop=sl_stop, tp_stop = tp_stop, fees=0.0167/100, freq=\"1s\", price=\"close\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + " \n", + " return pf.stats([\n", + " 'total_return',\n", + " 'max_dd', \n", + " 'total_trades', \n", + " 'win_rate', \n", + " 'expectancy'\n", + " ])\n", + "\n", + "pf_results = test_strat(vbt.Param(entry_window_closes),\n", + " vbt.Param(mom_timeperiod),\n", + " vbt.Param(mom_th),\n", + " #vbt.Param(roc_th)\n", + " vbt.Param(sl_stop),\n", + " vbt.Param(tp_stop, condition=\"tp_stop > sl_stop\"))\n", + "pf_results = pf_results.unstack(level=-1)\n", + "pf_results.sort_values(by=[\"Total Return [%]\", \"Max Drawdown [%]\"], ascending=[False, True])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#pf_results.load(\"10tiscomb.pickle\")\n", + "#pf_results.info()\n", + "\n", + "vbt.save(pf_results, \"8tiscomb_tsl.pickle\")\n", + "\n", + "# pf_results = vbt.load(\"8tiscomb_tsl.pickle\")\n", + "# pf_results\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# parallel_coordinates method¶\n", + "\n", + "# attach_px_methods..plot_func(\n", + "# *args,\n", + "# layout=None,\n", + "# **kwargs\n", + "# )\n", + "\n", + "# pf_results.vbt.px.parallel_coordinates() #ocdf\n", + "\n", + "res = pf_results.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf_results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.decomposition import PCA\n", + "from sklearn.preprocessing import StandardScaler\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Assuming pf_results is your DataFrame\n", + "# Convert columns to numeric, assuming NaNs where conversion fails\n", + "metrics = ['Total Return [%]', 'Max Drawdown [%]', 'Total Trades']\n", + "for metric in metrics:\n", + " pf_results[metric] = pd.to_numeric(pf_results[metric], errors='coerce')\n", + "\n", + "# Handle missing values, for example filling with the median\n", + "pf_results['Max Drawdown [%]'].fillna(pf_results['Max Drawdown [%]'].median(), inplace=True)\n", + "\n", + "# Extract the metrics into a new DataFrame\n", + "data_for_pca = pf_results[metrics]\n", + "\n", + "# Standardize the data before applying PCA\n", + "scaler = StandardScaler()\n", + "data_scaled = scaler.fit_transform(data_for_pca)\n", + "\n", + "# Apply PCA\n", + "pca = PCA(n_components=2) # Adjust components as needed\n", + "principal_components = pca.fit_transform(data_scaled)\n", + "\n", + "# Create a DataFrame with the principal components\n", + "pca_results = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])\n", + "\n", + "# Visualize the results\n", + "plt.figure(figsize=(8,6))\n", + "plt.scatter(pca_results['PC1'], pca_results['PC2'], alpha=0.5)\n", + "plt.xlabel('Principal Component 1')\n", + "plt.ylabel('Principal Component 2')\n", + "plt.title('PCA of Strategy Optimization Results')\n", + "plt.grid(True)\n", + "plt.savefig(\"ddd.png\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check if there is any unnamed level and rename it\n", + "if None in df.index.names:\n", + " # Generate new names list replacing None with 'stat'\n", + " new_names = ['stat' if name is None else name for name in df.index.names]\n", + " df.index.set_names(new_names, inplace=True)\n", + "\n", + "rs= df\n", + "\n", + "rs.info()\n", + "\n", + "\n", + "# # Now, 'stat' is the name of the previously unnamed level\n", + "\n", + "# # Filter for 'Total Return' assuming it is a correct identifier in the 'stat' level\n", + "# total_return_series = df.xs('Total Return [%]', level='stat')\n", + "\n", + "# # Sort the Series to get the largest 'Total Return' values\n", + "# sorted_series = total_return_series.sort_values(ascending=False)\n", + "\n", + "# # Print the sorted filtered data\n", + "# sorted_series.head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sorted_series.vbt.save()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#df.info()\n", + "total_return_series = df.xs('Total Return [%]')\n", + "sorted_series = total_return_series.sort_values(ascending=False)\n", + "\n", + "# Display the top N entries, e.g., top 5\n", + "sorted_series.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "comb_stats_df.nlargest(10, 'Total Return [%]')\n", + "#stats_df.info()\n", + "\n", + "\n", + "8\t-0.06\t-0.2\t0.0028\t0.0048\t4.156254\n", + "4 -0.02 -0.25 0.0028 0.0048 0.84433\n", + "3 -0.02 -0.25 0.0033 0.0023 Total Return [%] 0.846753\n", + "#2\t-0.04\t-0.2\t0.0019\t0.0019\n", + "# 2\t-0.04\t-0.2\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.25\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.3\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.35\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.4\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.2\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.25\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.3\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.35\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.4\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basic_data.symbols" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + ">>> def apply_func(ts, entries, exits, fastw, sloww, minp=None):\n", + "... fast_ma = vbt.nb.rolling_mean_nb(ts, fastw, minp=minp)\n", + "... slow_ma = vbt.nb.rolling_mean_nb(ts, sloww, minp=minp)\n", + "... entries[:] = vbt.nb.crossed_above_nb(fast_ma, slow_ma) \n", + "... exits[:] = vbt.nb.crossed_above_nb(slow_ma, fast_ma)\n", + "... return (fast_ma, slow_ma) \n", + "\n", + ">>> CrossSig = vbt.IF(\n", + "... class_name=\"CrossSig\",\n", + "... input_names=['ts'],\n", + "... in_output_names=['entries', 'exits'],\n", + "... param_names=['fastw', 'sloww'],\n", + "... output_names=['fast_ma', 'slow_ma']\n", + "... ).with_apply_func(\n", + "... apply_func,\n", + "... in_output_settings=dict(\n", + "... entries=dict(dtype=np.bool_), #initialize output with bool\n", + "... exits=dict(dtype=np.bool_)\n", + "... )\n", + "... )\n", + ">>> cross_sig = CrossSig.run(ts2, 2, 4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#PIPELINE - parameters in one go\n", + "\n", + "\n", + "#TOTO prepsat do FOR-LOOPu\n", + "\n", + "\n", + "#indicator parameters\n", + "mom_timeperiod = list(range(2, 6))\n", + "\n", + "#threshold entries parameters\n", + "mom_th = np.round(np.arange(-0.02, -0.1 - 0.02, -0.02),4).tolist()#-0.02\n", + "roc_th = np.round(np.arange(-0.2, -0.4 - 0.05, -0.05),4).tolist()#-0.2\n", + "#print(mom_th, roc_th)\n", + "#jejich product\n", + "# mom_th_prod, roc_th_prod = zip(*product(mom_th, roc_th))\n", + "\n", + "# #convert threshold to vbt param\n", + "# mom_th_index = vbt.Param(mom_th_prod, name='mom_th_th') \n", + "# roc_th_index = vbt.Param(roc_th_prod, name='roc_th_th')\n", + "\n", + "mom_th = vbt.Param(mom_th, name='mom_th')\n", + "roc_th = vbt.Param(roc_th, name='roc_th')\n", + "\n", + "#portfolio simulation parameters\n", + "sl_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "sl_stop = [round(val, 4) for val in sl_stop]\n", + "tp_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "tp_stop = [round(val, 4) for val in tp_stop]\n", + "sl_stop = vbt.Param(sl_stop) #np.nan mean s no stoploss\n", + "tp_stop = vbt.Param(tp_stop) #np.nan mean s no stoploss\n", + "\n", + "\n", + "#def test_mom(window=14, mom_th=0.2, roc_th=0.2, sl_stop=0.03/100, tp_stop=0.03/100):\n", + "#close = basic_data.xloc[\"09:30\":\"10:00\"].close\n", + "momshort = vbt.indicator(\"talib:MOM\").run(basic_data.get(\"Close\"), timeperiod=mom_timeperiod, short_name = \"slope_short\")\n", + "\n", + "#ht_trendline = vbt.indicator(\"talib:HT_TRENDLINE\").run(close, short_name = \"httrendline\")\n", + "rocp = vbt.indicator(\"talib:ROC\").run(basic_data.get(\"Close\"), short_name = \"rocp\")\n", + "#rate of change + momentum\n", + "\n", + "rocp_signal = rocp.real_crossed_below(mom_th)\n", + "mom_signal = momshort.real_crossed_below(roc_th)\n", + "\n", + "#mom_signal\n", + "print(rocp_signal.info())\n", + "print(mom_signal.info())\n", + "#print(rocp.real)\n", + "\n", + "\n", + "short_signal = (mom_signal.vbt & rocp_signal)\n", + "\n", + "# #short_signal = (rocp.real_crossed_below(roc_th_index) & momshort.real_crossed_below(mom_th_index))\n", + "# forced_exit = m1_data.symbol_wrapper.fill(False)\n", + "# entry_window_open= m1_data.symbol_wrapper.fill(False)\n", + "\n", + "\n", + "# # Calculate the time difference in minutes from market open for each timestamp\n", + "# elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "# entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "# forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "# short_entries = (short_signal & entry_window_open)\n", + "# short_exits = forced_exit\n", + "# #long_entries.info()\n", + "# #number of trues and falses in long_entries\n", + "# #short_exits.value_counts()\n", + "# #short_entries.value_counts()\n", + "\n", + "\n", + "# pf = vbt.Portfolio.from_signals(close=close, short_entries=short_entries, short_exits=short_exits, sl_stop=sl_stop, tp_stop = tp_stop, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# filter dates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#filter na dny\n", + "dates_of_interest = pd.to_datetime(['2024-04-22']).tz_localize('US/Eastern')\n", + "filtered_df = df.loc[df.index.normalize().isin(dates_of_interest)]\n", + "\n", + "df = filtered_df\n", + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import plotly.io as pio\n", + "# pio.renderers.default = 'notebook'\n", + "\n", + "#naloadujeme do vbt symbol as column\n", + "basic_data = vbt.Data.from_data({\"BAC\": df}, tz_convert=zoneNY)\n", + "\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "#basic_data.data[\"BAC\"].vbt.ohlcv.plot()\n", + "\n", + "#basic_data.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "\n", + "m1_data.data[\"BAC\"]\n", + "#m5_data = m1_data.resample(\"5T\")\n", + "\n", + "#m5_data.data[\"BAC\"].head(10)\n", + "\n", + "# m15_data = m1_data.resample(\"15T\")\n", + "\n", + "# m15 = m15_data.data[\"BAC\"]\n", + "\n", + "# m15.vbt.ohlcv.plot()\n", + "\n", + "# m1_data.wrapper.index\n", + "\n", + "# m1_resampler = m1_data.wrapper.get_resampler(\"1T\")\n", + "# m1_resampler.index_difference(reverse=True)\n", + "\n", + "\n", + "# m5_resampler.prettify()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MOM indicator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.phelp(vbt.indicator(\"talib:ROCP\").run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "vyuzití rychleho klesani na sekundove urovni behem open rush\n", + "- MOM + ROC during open rush\n", + "- short signal\n", + "- pipeline kombinace thresholdu pro vstup mom_th, roc_th + hodnota sl_stop a tp_stop (pripadne trailing) - nalezeni optimalni kombinace atributu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# fig = plot_2y_close([ht_trendline],[momshort, rocp], close)\n", + "# short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + "\n", + "#parameters (primary y line, secondary y line, close)\n", + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig\n", + "\n", + "close = m1_data.xloc[\"09:30\":\"10:00\"].close\n", + "momshort = vbt.indicator(\"talib:MOM\").run(close, timeperiod=3, short_name = \"slope_short\")\n", + "ht_trendline = vbt.indicator(\"talib:HT_TRENDLINE\").run(close, short_name = \"httrendline\")\n", + "rocp = vbt.indicator(\"talib:ROC\").run(close, short_name = \"rocp\")\n", + "#rate of change + momentum\n", + "short_signal = (rocp.real_crossed_below(-0.2) & momshort.real_crossed_below(-0.02))\n", + "#indlong = vbt.indicator(\"talib:MOM\").run(close, timeperiod=10, short_name = \"slope_long\")\n", + "fig = plot_2y_close([ht_trendline],[momshort, rocp], close)\n", + "short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = m1_data.close\n", + "#vbt.phelp(vbt.OLS.run)\n", + "\n", + "#oer steepmnes of regression line\n", + "#talib.LINEARREG_SLOPE(close, timeperiod=timeperiod)\n", + "#a také ON BALANCE VOLUME - http://5.161.179.223:8000/static/js/vbt/api/indicators/custom/obv/index.html\n", + "\n", + "\n", + "\n", + "mom_ind = vbt.indicator(\"talib:MOM\") \n", + "#vbt.phelp(mom_ind.run)\n", + "\n", + "mom = mom_ind.run(close, timeperiod=10)\n", + "\n", + "plot_2y_close(mom, close)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# defining ENTRY WINDOW and forced EXIT window" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#m1_data.data[\"BAC\"].info()\n", + "import datetime\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 2\n", + "entry_window_closes = 30\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "forced_exit = m1_data.symbol_wrapper.fill(False)\n", + "entry_window_open= m1_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "\n", + "#entry_window_open.info()\n", + "# forced_exit.tail(100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = m1_data.close\n", + "\n", + "#rsi = vbt.RSI.run(close, window=14)\n", + "\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "#short_exits.value_counts()\n", + "short_entries.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_rsi(close, entries, exits):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"RSI\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " #rsi.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " entries.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + " exits.vbt.signals.plot_as_exits(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + " return fig\n", + "\n", + "plot_rsi(close, short_entries, short_exits)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.phelp(vbt.Portfolio.from_signals)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sl_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "sl_stop = [round(val, 4) for val in sl_stop]\n", + "print(sl_stop)\n", + "sl_stop = vbt.Param(sl_stop) #np.nan mean s no stoploss\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=close, short_entries=short_entries, short_exits=short_exits, sl_stop=0.03/100, tp_stop = 0.03/100, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "#pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#list of orders\n", + "#pf.orders.records_readable\n", + "#pf.orders.plots()\n", + "#pf.stats()\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0015,0.0013)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[0.03].plot_trade_signals()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# pristup k pf jako multi index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#pf[0.03].plot()\n", + "#pf.order_records\n", + "pf[(0.03)].stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#zgrupovane statistiky\n", + "stats_df = pf.stats([\n", + " 'total_return',\n", + " 'total_trades',\n", + " 'win_rate',\n", + " 'expectancy'\n", + "], agg_func=None)\n", + "stats_df\n", + "\n", + "\n", + "stats_df.nlargest(10, 'Total Return [%]')\n", + "#stats_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0011,0.0013000000000000002)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pandas.tseries.offsets import DateOffset\n", + "\n", + "temp_data = basic_data['2024-4-22']\n", + "temp_data\n", + "res1m = temp_data[[\"Open\", \"High\", \"Low\", \"Close\", \"Volume\"]]\n", + "\n", + "# Define a custom date offset that starts at 9:30 AM and spans 4 hours\n", + "custom_offset = DateOffset(hours=4, minutes=30)\n", + "\n", + "# res1m = res1m.get().resample(\"4H\").agg({ \n", + "# \"Open\": \"first\",\n", + "# \"High\": \"max\",\n", + "# \"Low\": \"min\",\n", + "# \"Close\": \"last\",\n", + "# \"Volume\": \"sum\"\n", + "# })\n", + "\n", + "res4h = res1m.resample(\"1h\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res4h.data\n", + "\n", + "res15m = res1m.resample(\"15T\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res15m.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@vbt.njit\n", + "def long_entry_place_func_nb(c, low, close, time_in_ns, rsi14, window_open, window_close):\n", + " market_open_minutes = 570 # 9 hours * 60 minutes + 30 minutes\n", + "\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + "\n", + " current_minutes = vbt.dt_nb.hour_nb(time_in_ns[i]) * 60 + vbt.dt_nb.minute_nb(time_in_ns[i])\n", + " #print(\"current_minutes\", current_minutes)\n", + " # Calculate elapsed minutes since market open at 9:30 AM\n", + " elapsed_from_open = current_minutes - market_open_minutes\n", + " elapsed_from_open = elapsed_from_open if elapsed_from_open >= 0 else 0\n", + " #print( \"elapsed_from_open\", elapsed_from_open)\n", + "\n", + " #elapsed_from_open = elapsed_minutes_from_open_nb(time_in_ns) \n", + " in_window = elapsed_from_open > window_open and elapsed_from_open < window_close\n", + " #print(\"in_window\", in_window)\n", + " # if in_window:\n", + " # print(\"in window\")\n", + "\n", + " if in_window and rsi14[i] > 60: # and low[i, c.col] <= hit_price: # and hour == 9: # (4)!\n", + " return out_i\n", + " return -1\n", + "\n", + "@vbt.njit\n", + "def long_exit_place_func_nb(c, high, close, time_index, tp, sl): # (5)!\n", + " entry_i = c.from_i - c.wait\n", + " entry_price = close[entry_i, c.col]\n", + " hit_price = entry_price * (1 + tp)\n", + " stop_price = entry_price * (1 - sl)\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " last_bar_of_day = vbt.dt_nb.day_changed_nb(time_index[i], time_index[i + 1])\n", + "\n", + " #print(next_day)\n", + " if last_bar_of_day: #pokud je dalsi next day, tak zavirame posledni\n", + " print(\"ted\",out_i)\n", + " return out_i\n", + " if close[i, c.col] >= hit_price or close[i, c.col] <= stop_price :\n", + " return out_i\n", + " return -1\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(np.random.random(size=(5, 10)), columns=list('abcdefghij'))\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.sum()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/strat_SUPERTREND/SUPERTREND_v1_SINGLE.ipynb b/research/strat_SUPERTREND/SUPERTREND_v1_SINGLE.ipynb new file mode 100644 index 0000000..9eac6c3 --- /dev/null +++ b/research/strat_SUPERTREND/SUPERTREND_v1_SINGLE.ipynb @@ -0,0 +1,679 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SUPERTREND\n", + "\n", + "* kombinace supertrendu na vice urovnich" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import load_dotenv\n", + "\n", + "#as V2realbot is client , load env variables here\n", + "env_file = \"/Users/davidbrazda/Documents/Development/python/.env\"\n", + "# Load the .env file\n", + "load_dotenv(env_file)\n", + "\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "# from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "from v2realbot.config import DATA_DIR\n", + "from lightweight_charts import JupyterChart, chart, Panel, PlotAccessor\n", + "from IPython.display import display\n", + "\n", + "# init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "#LOAD FROM PARQUET\n", + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "print('\\n'.join(map(str, files)))\n", + "file_name = \"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet\"\n", + "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", + "#filter ohlcv_df to certain date range (assuming datetime index)\n", + "#ohlcv_df = ohlcv_df.loc[\"2024-02-12 9:30\":\"2024-02-14 16:00\"]\n", + "\n", + "#add vwap column to ohlcv_df\n", + "#ohlcv_df[\"hlcc4\"] = (ohlcv_df[\"close\"] + ohlcv_df[\"high\"] + ohlcv_df[\"low\"] + ohlcv_df[\"close\"]) / 4\n", + "\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": ohlcv_df}), tz_convert=zoneNY)\n", + "ohlcv_df= None\n", + "basic_data.wrapper.index.normalize().nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basic_data.data[\"BAC\"].info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add resample function to custom columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro.utils.config import merge_dicts, Config, HybridConfig\n", + "from vectorbtpro import _typing as tp\n", + "from vectorbtpro.generic import nb as generic_nb\n", + "\n", + "_feature_config: tp.ClassVar[Config] = HybridConfig(\n", + " {\n", + " \"buyvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " ),\n", + " \"sellvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " ),\n", + " \"trades\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " )\n", + " }\n", + ")\n", + "\n", + "basic_data._feature_config = _feature_config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','trades','sellvolume']]\n", + "\n", + "s5data = s1data.resample(\"5s\")\n", + "s5data = s5data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','trades','sellvolume']].resample(\"1T\")\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "# t1data.data[\"BAC\"].info()\n", + "\n", + "t30data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','trades','sellvolume']].resample(\"30T\")\n", + "t30data = t30data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "# t30data.data[\"BAC\"].info()\n", + "\n", + "s1close = s1data.close\n", + "t1close = t1data.close\n", + "t30close = t30data.close\n", + "t30volume = t30data.volume\n", + "\n", + "#resample on specific index \n", + "resampler = vbt.Resampler(t30data.index, s1data.index, source_freq=\"30T\", target_freq=\"1s\")\n", + "t30close_realigned = t30close.vbt.realign_closing(resampler)\n", + "\n", + "#resample 1min to s\n", + "resampler_s = vbt.Resampler(t1data.index, s1data.index, source_freq=\"1T\", target_freq=\"1s\")\n", + "t1close_realigned = t1close.vbt.realign_closing(resampler_s)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.IF.list_indicators(\"*vwap\")\n", + "vbt.phelp(vbt.VWAP.run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# VWAP" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "t1vwap_h = vbt.VWAP.run(t1data.high, t1data.low, t1data.close, t1data.volume, anchor=\"H\")\n", + "t1vwap_d = vbt.VWAP.run(t1data.high, t1data.low, t1data.close, t1data.volume, anchor=\"D\")\n", + "t1vwap_t = vbt.VWAP.run(t1data.high, t1data.low, t1data.close, t1data.volume, anchor=\"T\")\n", + "\n", + "t1vwap_h_real = t1vwap_h.vwap.vbt.realign_closing(resampler_s)\n", + "t1vwap_d_real = t1vwap_d.vwap.vbt.realign_closing(resampler_s)\n", + "t1vwap_t_real = t1vwap_t.vwap.vbt.realign_closing(resampler_s)\n", + "\n", + "#t1vwap_5t.xloc[\"2024-01-3 09:30:00\":\"2024-01-03 16:00:00\"].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#m30data.close.lw.plot()\n", + "#quick few liner\n", + "pane1 = Panel(\n", + " histogram=[\n", + " #(s1data.volume, \"volume\",None, 0.8),\n", + " #(m30volume, \"m30volume\",None, 1)\n", + " ], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n", + " right=[\n", + " (s1data.close, \"1s close\"),\n", + " (t1data.close, \"1min close\"),\n", + " (t1vwap_t, \"1mvwap_t\"),\n", + " (t1vwap_h, \"1mvwap_h\"),\n", + " (t1vwap_d, \"1mvwap_d\"),\n", + " (t1vwap_t_real, \"1mvwap_t_real\"),\n", + " (t1vwap_h_real, \"1mvwap_h_real\"),\n", + " (t1vwap_d_real, \"1mvwap_d_real\")\n", + " # (t1close_realigned, \"1min close realigned\"),\n", + " # (m30data.close, \"30min-close\"),\n", + " # (m30close_realigned, \"30min close realigned\"),\n", + " ],\n", + ")\n", + "ch = chart([pane1], size=\"s\", xloc=slice(\"2024-05-1 09:30:00\",\"2024-05-25 16:00:00\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SUPERTREND" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "supertrend_s1 = vbt.SUPERTREND.run(s1data.high, s1data.low, s1data.close, period=5, multiplier=3)\n", + "direction_series_s1 = supertrend_s1.direction\n", + "supertrend_t1 = vbt.SUPERTREND.run(t1data.high, t1data.low, t1data.close, period=14, multiplier=3)\n", + "direction_series_t1 = supertrend_t1.direction\n", + "supertrend_t30 = vbt.SUPERTREND.run(t30data.high, t30data.low, t30data.close, period=14, multiplier=3)\n", + "direction_series_t30 = supertrend_t30.direction\n", + "\n", + "resampler_1t_sec = vbt.Resampler(direction_series_t1.index, direction_series_s1.index, source_freq=\"1T\", target_freq=\"1s\")\n", + "resampler_30t_sec = vbt.Resampler(direction_series_t30.index, direction_series_s1.index, source_freq=\"30T\", target_freq=\"1s\")\n", + "direction_series_t1_realigned = direction_series_t1.vbt.realign_closing(resampler_1t_sec)\n", + "direction_series_t30_realigned = direction_series_t30.vbt.realign_closing(resampler_30t_sec)\n", + "\n", + "#supertrend_s1.xloc[\"2024-01-3 09:30:00\":\"2024-01-03 16:00:00\"].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# aligned_ups= pd.Series(False, index=direction_real.index)\n", + "# aligned_downs= pd.Series(False, index=direction_real.index)\n", + "\n", + "# aligned_ups = direction_real == 1 & supertrend.direction == 1\n", + "# aligned_ups" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s5close = s5data.data[\"BAC\"].close\n", + "s5open = s5data.data[\"BAC\"].open\n", + "s5high = s5data.data[\"BAC\"].high\n", + "s5close_prev = s5close.shift(1)\n", + "s5open_prev = s5open.shift(1)\n", + "s5high_prev = s5high.shift(1)\n", + "#gap nahoru od byci svicky a nevraci se zpet na jeji uroven\n", + "entry_ups = (s5close_prev > s5open_prev) & (s5open > s5high_prev + 0.010) & (s5close > s5close_prev)\n", + "\n", + "entry_ups.value_counts()\n", + "\n", + "#entry_ups.info()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Entry window" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 10\n", + "entry_window_closes = 370" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "entry_window_open= pd.Series(False, index=entry_ups.index)\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (entry_ups.index.hour - market_open.hour) * 60 + (entry_ups.index.minute - market_open.minute)\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "#entry_window_open\n", + "\n", + "entry_ups = entry_ups & entry_window_open\n", + "# entry_ups\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s5vwap_h = vbt.VWAP.run(s5data.high, s5data.low, s5data.close, s5data.volume, anchor=\"H\")\n", + "s5vwap_d = vbt.VWAP.run(s5data.high, s5data.low, s5data.close, s5data.volume, anchor=\"D\")\n", + "\n", + "# s5vwap_h_real = s5vwap_h.vwap.vbt.realign_closing(resampler_s)\n", + "# s5vwap_d_real = s5vwap_d.vwap.vbt.realign_closing(resampler_s)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pane1 = Panel(\n", + " ohlcv=(s5data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6), opacity\")]\n", + " right=[#(bbands,), #[(series, name, entries, exits, other_markers)]\n", + " (s5data.data[\"BAC\"].close, \"close\", entry_ups),\n", + " (s5data.data[\"BAC\"].open, \"open\"),\n", + " (s5vwap_h, \"vwap5s_H\",),\n", + " (s5vwap_d, \"vwap5s_D\",)\n", + " # (t1data.data[\"BAC\"].vwap, \"vwap\"),\n", + " # (t1data.close, \"1min close\"),\n", + " # (supertrend_s1.trend,\"STtrend\"),\n", + " # (supertrend_s1.long,\"STlong\"),\n", + " # (supertrend_s1.short,\"STshort\")\n", + " ],\n", + " left = [\n", + " #(direction_series_s1,\"direction_s1\"),\n", + " # (direction_series_t1,\"direction_t1\"),\n", + " # (direction_series_t30,\"direction_t30\")\n", + " \n", + " ],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "# pane2 = Panel(\n", + "# ohlcv=(t1data.data[\"BAC\"],uptrend_m30, downtrend_m30), #(series, entries, exits, other_markers)\n", + "# histogram=[], # [(series, name, \"rgba(53, 94, 59, 0.6), opacity\")]\n", + "# left=[#(bbands,), #[(series, name, entries, exits, other_markers)]\n", + "# (direction_real,\"direction30min_real\"),\n", + "# ],\n", + "# # left = [(supertrendm30.direction,\"STdirection30\")],\n", + "# # # right=[(bbands.upperband, \"upperband\",),\n", + "# # # (bbands.lowerband, \"lowerband\",),\n", + "# # # (bbands.middleband, \"middleband\",)\n", + "# # # ], #[(series, name, entries, exits, other_markers)]\n", + "# middle1=[],\n", + "# middle2=[],\n", + "# title = \"1m\")\n", + "\n", + "ch = chart([pane1], sync=True, size=\"s\", xloc=slice(\"2024-02-20 09:30:00\",\"2024-02-22 16:00:00\"), precision=6)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# data = s5data.xloc[\"2024-01-03 09:30:00\":\"2024-03-10 16:00:00\"]\n", + "# entry = entry_ups.vbt.xloc[\"2024-01-03 09:30:00\":\"2024-03-10 16:00:00\"].obj\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=s5data, entries=entry_ups, direction=\"longonly\", sl_stop=0.05/100, tp_stop = 0.05/100, fees=0.0167/100, freq=\"5s\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.xloc[\"2024-01-26 09:30:00\":\"2024-02-28 16:00:00\"].positions.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.xloc[\"2024-01-26 09:30:00\":\"2024-01-28 16:00:00\"].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pd.set_option('display.max_rows', None)\n", + "pf.stats()\n", + "# pf.xloc[\"monday\"].stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "buyvolume = t1data.data[\"BAC\"].buyvolume\n", + "sellvolume = t1data.data[\"BAC\"].sellvolume\n", + "totalvolume = buyvolume + sellvolume\n", + "\n", + "#adjust to minimal value to avoid division by zero\n", + "sellvolume_adjusted = sellvolume.replace(0, 1e-10)\n", + "oibratio = buyvolume / sellvolume\n", + "\n", + "#cumulative order flow (net difference)\n", + "cof = buyvolume - sellvolume\n", + "\n", + "# Calculate the order imbalance (net differene) normalize the order imbalance by calculating the difference between buy and sell volumes and then scaling it by the total volume.\n", + "order_imbalance = cof / totalvolume\n", + "order_imbalance = order_imbalance.fillna(0) #nan nahradime 0\n", + "\n", + "order_imbalance_allvolume = cof / t1data.data[\"BAC\"].volume\n", + "\n", + "order_imbalance_sma = vbt.indicator(\"talib:EMA\").run(order_imbalance, timeperiod=5)\n", + "short_signals = order_imbalance.vbt < -0.5\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_signals.value_counts()\n", + "short_signals.name = \"short_entries\"\n", + "#.fillna(False)\n", + "short_exits = short_signals.shift(-2).fillna(False).astype(bool)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pane1 = Panel(\n", + " ohlcv=(t1data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " histogram=[(order_imbalance_allvolume, \"oib_allvolume\", \"rgba(53, 94, 59, 0.6)\",0.5),\n", + " (t1data.data[\"BAC\"].trades, \"trades\",None,0.4),\n", + " ], # [(series, name, \"rgba(53, 94, 59, 0.6)\", opacity)]\n", + " # right=[\n", + " # (supertrend.trend,\"STtrend\"),\n", + " # (supertrend.long,\"STlong\"),\n", + " # (supertrend.short,\"STshort\")\n", + " # ],\n", + " # left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "pane2 = Panel(\n", + " ohlcv=(basic_data.data[\"BAC\"],), #(series, entries, exits, other_markers)\n", + " left=[(basic_data.data[\"BAC\"].trades, \"trades\")],\n", + " histogram=[(basic_data.data[\"BAC\"].trades, \"trades_hist\", \"white\", 0.5)], #\"rgba(53, 94, 59, 0.6)\"\n", + " # ], # [(series, name, \"rgba(53, 94, 59, 0.6)\")]\n", + " # right=[\n", + " # (supertrend.trend,\"STtrend\"),\n", + " # (supertrend.long,\"STlong\"),\n", + " # (supertrend.short,\"STshort\")\n", + " # ],\n", + " # left = [(supertrend.direction,\"STdirection\")],\n", + " # right=[(bbands.upperband, \"upperband\",),\n", + " # (bbands.lowerband, \"lowerband\",),\n", + " # (bbands.middleband, \"middleband\",)\n", + " # ], #[(series, name, entries, exits, other_markers)]\n", + " middle1=[],\n", + " middle2=[],\n", + ")\n", + "\n", + "\n", + "ch = chart([pane1, pane2], size=\"m\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#short_signal = t1slope.real_below(t1_th) & t2slope.real_below(t2_th) & t3slope.real_below(t3_th) & t4slope.real_below(t4_th)\n", + "#long_signal = t1slope.real_above(t1_th) & t2slope.real_above(t2_th) & t3slope.real_above(t3_th) & t4slope.real_above(t4_th)\n", + "\n", + "#test na daily s reversem crossed 0\n", + "short_signal = t2slope.vbt < -0.01 & t3slope.vbt < -0.01 #min value of threshold\n", + "long_signal = t2slope.vbt > 0.01 & t3slope.vbt > 0.01 #min\n", + "\n", + "# thirty_up_signal = t3slope.vbt.crossed_above(0.01)\n", + "# thirty_down_signal = t3slope.vbt.crossed_below(-0.01)\n", + "\n", + "fig = plot_2y_close(priminds=[], secinds=[t3slope], close=t1data.close)\n", + "#short_signal.vbt.signals.plot_as_entries(basic_data.close, fig=fig)\n", + "\n", + "short_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"SHORTS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"red\", symbol=\"triangle-down\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "long_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"LONGS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"limegreen\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "\n", + "# thirty_down_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"DOWN30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"yellow\", symbol=\"triangle-down\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "# thirty_up_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"UP30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"grey\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "# thirtymin_slope_to_compare.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True), trace_kwargs=dict(name=\"30min slope\",\n", + "# line=dict(color=\"yellow\"), \n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "fig.show()\n", + "# print(\"short signal\")\n", + "# print(short_signal.value_counts())\n", + "\n", + "#forced_exit = pd.Series(False, index=close.index)\n", + "forced_exit = basic_data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signal & entry_window_open)\n", + "exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "# print(short_exits.value_counts())\n", + "# print(short_entries.value_counts())\n", + "\n", + "#fig = plot_2y_close([],[momshort, rocp], close)\n", + "#short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + "#print(sl_stop)\n", + "#short_entries=short_entries, short_exits=short_exits,\n", + "# pf = vbt.Portfolio.from_signals(close=basic_data, entries=short_entries, exits=exits, tsl_stop=0.005, tp_stop = 0.05, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "# pf.stats()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "forced_exit = t1data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= t1data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signals & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signals & entry_window_open)\n", + "exits = forced_exit\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=t1data, entries=entries, exits=exits, short_entries=short_entries, short_exits=exits,\n", + "td_stop=2, time_delta_format=\"rows\",\n", + "tsl_stop=0.005, tp_stop = 0.005, fees=0.0167/100)#, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.get_drawdowns().records_readable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.orders.records_readable" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/strat_TIME_ENTRIES copy/v1_MULTI.ipynb b/research/strat_TIME_ENTRIES copy/v1_MULTI.ipynb new file mode 100644 index 0000000..99c2982 --- /dev/null +++ b/research/strat_TIME_ENTRIES copy/v1_MULTI.ipynb @@ -0,0 +1,932 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from v2realbot.tools.loadbatch import load_batch\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n", + "\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "#LOAD FROM PARQUET\n", + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "#print('\\n'.join(map(str, files)))\n", + "file_name = \"ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\"\n", + "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"SPY\": ohlcv_df}), tz_convert=zoneNY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#parameters (primary y line, secondary y line, close)\n", + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig\n", + "\n", + "# close = basic_data.xloc[\"09:30\":\"10:00\"].close" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#PIPELINE - FOR - LOOP\n", + "\n", + "#indicator parameters\n", + "mom_timeperiod = list(range(2, 12))\n", + "\n", + "#uzavreni okna od 1 do 200\n", + "#entry_window_closes = list(range(2, 50, 3))\n", + "entry_window_closes = [5, 10, 30, 45]\n", + "#entry_window_closes = 30\n", + "#threshold entries parameters\n", + "#long\n", + "mom_th = np.round(np.arange(0.01, 0.5 + 0.02, 0.02),4).tolist()#-0.02\n", + "# short\n", + "#mom_th = np.round(np.arange(-0.01, -0.3 - 0.02, -0.02),4).tolist()#-0.02\n", + "roc_th = np.round(np.arange(-0.2, -0.8 - 0.05, -0.05),4).tolist()#-0.2\n", + "#print(mom_th, roc_th)\n", + "\n", + "#portfolio simulation parameters\n", + "sl_stop =np.round(np.arange(0.02/100, 0.7/100, 0.05/100),4).tolist()\n", + "tp_stop = np.round(np.arange(0.02/100, 0.7/100, 0.05/100),4).tolist()\n", + "\n", + "combs = list(product(mom_timeperiod, mom_th, roc_th, sl_stop, tp_stop))\n", + "\n", + "@vbt.parameterized(merge_func = \"concat\", random_subset = 2000, show_progress=True) \n", + "def test_strat(entry_window_closes=60,\n", + " mom_timeperiod=2,\n", + " mom_th=-0.04,\n", + " #roc_th=-0.2,\n", + " sl_stop=0.19/100,\n", + " tp_stop=0.19/100):\n", + " # mom_timeperiod=2\n", + " # mom_th=-0.06\n", + " # roc_th=-0.2\n", + " # sl_stop=0.04/100\n", + " # tp_stop=0.04/100\n", + "\n", + " momshort = vbt.indicator(\"talib:MOM\").run(basic_data.close, timeperiod=mom_timeperiod, short_name = \"slope_short\")\n", + " rocp = vbt.indicator(\"talib:ROC\").run(basic_data.close, short_name = \"rocp\")\n", + " #rate of change + momentum\n", + "\n", + " #momshort.plot rocp.real_crossed_below(roc_th) & \n", + " #short_signal = momshort.real_crossed_below(mom_th)\n", + " long_signal = momshort.real_crossed_above(mom_th)\n", + " # print(\"short signal\")\n", + " # print(short_signal.value_counts())\n", + "\n", + " #forced_exit = pd.Series(False, index=close.index)\n", + " forced_exit = basic_data.symbol_wrapper.fill(False)\n", + " #entry_window_open = pd.Series(False, index=close.index)\n", + " entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + " #print(entry_window_closes, \"entry window closes\")\n", + " # Calculate the time difference in minutes from market open for each timestamp\n", + " elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + " entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + " #print(entry_window_open.value_counts())\n", + "\n", + " forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + " #short_entries = (short_signal & entry_window_open)\n", + " #short_exits = forced_exit\n", + " entries = (long_signal & entry_window_open)\n", + " exits = forced_exit\n", + " #long_entries.info()\n", + " #number of trues and falses in long_entries\n", + " #print(short_exits.value_counts())\n", + " #print(short_entries.value_counts())\n", + "\n", + " #fig = plot_2y_close([],[momshort, rocp], close)\n", + " #short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " #print(sl_stop)\n", + " #tsl_th=sl_stop, \n", + " #short_entries=short_entries, short_exits=short_exits,\n", + " pf = vbt.Portfolio.from_signals(close=basic_data.close, entries=entries, exits=exits, tsl_stop=sl_stop, tp_stop = tp_stop, fees=0.0167/100, freq=\"1s\", price=\"close\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + " \n", + " return pf.stats([\n", + " 'total_return',\n", + " 'max_dd', \n", + " 'total_trades', \n", + " 'win_rate', \n", + " 'expectancy'\n", + " ])\n", + "\n", + "pf_results = test_strat(vbt.Param(entry_window_closes),\n", + " vbt.Param(mom_timeperiod),\n", + " vbt.Param(mom_th),\n", + " #vbt.Param(roc_th)\n", + " vbt.Param(sl_stop),\n", + " vbt.Param(tp_stop, condition=\"tp_stop > sl_stop\"))\n", + "pf_results = pf_results.unstack(level=-1)\n", + "pf_results.sort_values(by=[\"Total Return [%]\", \"Max Drawdown [%]\"], ascending=[False, True])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#pf_results.load(\"10tiscomb.pickle\")\n", + "#pf_results.info()\n", + "\n", + "vbt.save(pf_results, \"8tiscomb_tsl.pickle\")\n", + "\n", + "# pf_results = vbt.load(\"8tiscomb_tsl.pickle\")\n", + "# pf_results\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# parallel_coordinates method¶\n", + "\n", + "# attach_px_methods..plot_func(\n", + "# *args,\n", + "# layout=None,\n", + "# **kwargs\n", + "# )\n", + "\n", + "# pf_results.vbt.px.parallel_coordinates() #ocdf\n", + "\n", + "res = pf_results.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf_results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.decomposition import PCA\n", + "from sklearn.preprocessing import StandardScaler\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Assuming pf_results is your DataFrame\n", + "# Convert columns to numeric, assuming NaNs where conversion fails\n", + "metrics = ['Total Return [%]', 'Max Drawdown [%]', 'Total Trades']\n", + "for metric in metrics:\n", + " pf_results[metric] = pd.to_numeric(pf_results[metric], errors='coerce')\n", + "\n", + "# Handle missing values, for example filling with the median\n", + "pf_results['Max Drawdown [%]'].fillna(pf_results['Max Drawdown [%]'].median(), inplace=True)\n", + "\n", + "# Extract the metrics into a new DataFrame\n", + "data_for_pca = pf_results[metrics]\n", + "\n", + "# Standardize the data before applying PCA\n", + "scaler = StandardScaler()\n", + "data_scaled = scaler.fit_transform(data_for_pca)\n", + "\n", + "# Apply PCA\n", + "pca = PCA(n_components=2) # Adjust components as needed\n", + "principal_components = pca.fit_transform(data_scaled)\n", + "\n", + "# Create a DataFrame with the principal components\n", + "pca_results = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])\n", + "\n", + "# Visualize the results\n", + "plt.figure(figsize=(8,6))\n", + "plt.scatter(pca_results['PC1'], pca_results['PC2'], alpha=0.5)\n", + "plt.xlabel('Principal Component 1')\n", + "plt.ylabel('Principal Component 2')\n", + "plt.title('PCA of Strategy Optimization Results')\n", + "plt.grid(True)\n", + "plt.savefig(\"ddd.png\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check if there is any unnamed level and rename it\n", + "if None in df.index.names:\n", + " # Generate new names list replacing None with 'stat'\n", + " new_names = ['stat' if name is None else name for name in df.index.names]\n", + " df.index.set_names(new_names, inplace=True)\n", + "\n", + "rs= df\n", + "\n", + "rs.info()\n", + "\n", + "\n", + "# # Now, 'stat' is the name of the previously unnamed level\n", + "\n", + "# # Filter for 'Total Return' assuming it is a correct identifier in the 'stat' level\n", + "# total_return_series = df.xs('Total Return [%]', level='stat')\n", + "\n", + "# # Sort the Series to get the largest 'Total Return' values\n", + "# sorted_series = total_return_series.sort_values(ascending=False)\n", + "\n", + "# # Print the sorted filtered data\n", + "# sorted_series.head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sorted_series.vbt.save()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#df.info()\n", + "total_return_series = df.xs('Total Return [%]')\n", + "sorted_series = total_return_series.sort_values(ascending=False)\n", + "\n", + "# Display the top N entries, e.g., top 5\n", + "sorted_series.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "comb_stats_df.nlargest(10, 'Total Return [%]')\n", + "#stats_df.info()\n", + "\n", + "\n", + "8\t-0.06\t-0.2\t0.0028\t0.0048\t4.156254\n", + "4 -0.02 -0.25 0.0028 0.0048 0.84433\n", + "3 -0.02 -0.25 0.0033 0.0023 Total Return [%] 0.846753\n", + "#2\t-0.04\t-0.2\t0.0019\t0.0019\n", + "# 2\t-0.04\t-0.2\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.25\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.3\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.35\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.4\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.2\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.25\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.3\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.35\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.4\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basic_data.symbols" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + ">>> def apply_func(ts, entries, exits, fastw, sloww, minp=None):\n", + "... fast_ma = vbt.nb.rolling_mean_nb(ts, fastw, minp=minp)\n", + "... slow_ma = vbt.nb.rolling_mean_nb(ts, sloww, minp=minp)\n", + "... entries[:] = vbt.nb.crossed_above_nb(fast_ma, slow_ma) \n", + "... exits[:] = vbt.nb.crossed_above_nb(slow_ma, fast_ma)\n", + "... return (fast_ma, slow_ma) \n", + "\n", + ">>> CrossSig = vbt.IF(\n", + "... class_name=\"CrossSig\",\n", + "... input_names=['ts'],\n", + "... in_output_names=['entries', 'exits'],\n", + "... param_names=['fastw', 'sloww'],\n", + "... output_names=['fast_ma', 'slow_ma']\n", + "... ).with_apply_func(\n", + "... apply_func,\n", + "... in_output_settings=dict(\n", + "... entries=dict(dtype=np.bool_), #initialize output with bool\n", + "... exits=dict(dtype=np.bool_)\n", + "... )\n", + "... )\n", + ">>> cross_sig = CrossSig.run(ts2, 2, 4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#PIPELINE - parameters in one go\n", + "\n", + "\n", + "#TOTO prepsat do FOR-LOOPu\n", + "\n", + "\n", + "#indicator parameters\n", + "mom_timeperiod = list(range(2, 6))\n", + "\n", + "#threshold entries parameters\n", + "mom_th = np.round(np.arange(-0.02, -0.1 - 0.02, -0.02),4).tolist()#-0.02\n", + "roc_th = np.round(np.arange(-0.2, -0.4 - 0.05, -0.05),4).tolist()#-0.2\n", + "#print(mom_th, roc_th)\n", + "#jejich product\n", + "# mom_th_prod, roc_th_prod = zip(*product(mom_th, roc_th))\n", + "\n", + "# #convert threshold to vbt param\n", + "# mom_th_index = vbt.Param(mom_th_prod, name='mom_th_th') \n", + "# roc_th_index = vbt.Param(roc_th_prod, name='roc_th_th')\n", + "\n", + "mom_th = vbt.Param(mom_th, name='mom_th')\n", + "roc_th = vbt.Param(roc_th, name='roc_th')\n", + "\n", + "#portfolio simulation parameters\n", + "sl_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "sl_stop = [round(val, 4) for val in sl_stop]\n", + "tp_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "tp_stop = [round(val, 4) for val in tp_stop]\n", + "sl_stop = vbt.Param(sl_stop) #np.nan mean s no stoploss\n", + "tp_stop = vbt.Param(tp_stop) #np.nan mean s no stoploss\n", + "\n", + "\n", + "#def test_mom(window=14, mom_th=0.2, roc_th=0.2, sl_stop=0.03/100, tp_stop=0.03/100):\n", + "#close = basic_data.xloc[\"09:30\":\"10:00\"].close\n", + "momshort = vbt.indicator(\"talib:MOM\").run(basic_data.get(\"Close\"), timeperiod=mom_timeperiod, short_name = \"slope_short\")\n", + "\n", + "#ht_trendline = vbt.indicator(\"talib:HT_TRENDLINE\").run(close, short_name = \"httrendline\")\n", + "rocp = vbt.indicator(\"talib:ROC\").run(basic_data.get(\"Close\"), short_name = \"rocp\")\n", + "#rate of change + momentum\n", + "\n", + "rocp_signal = rocp.real_crossed_below(mom_th)\n", + "mom_signal = momshort.real_crossed_below(roc_th)\n", + "\n", + "#mom_signal\n", + "print(rocp_signal.info())\n", + "print(mom_signal.info())\n", + "#print(rocp.real)\n", + "\n", + "\n", + "short_signal = (mom_signal.vbt & rocp_signal)\n", + "\n", + "# #short_signal = (rocp.real_crossed_below(roc_th_index) & momshort.real_crossed_below(mom_th_index))\n", + "# forced_exit = m1_data.symbol_wrapper.fill(False)\n", + "# entry_window_open= m1_data.symbol_wrapper.fill(False)\n", + "\n", + "\n", + "# # Calculate the time difference in minutes from market open for each timestamp\n", + "# elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "# entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "# forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "# short_entries = (short_signal & entry_window_open)\n", + "# short_exits = forced_exit\n", + "# #long_entries.info()\n", + "# #number of trues and falses in long_entries\n", + "# #short_exits.value_counts()\n", + "# #short_entries.value_counts()\n", + "\n", + "\n", + "# pf = vbt.Portfolio.from_signals(close=close, short_entries=short_entries, short_exits=short_exits, sl_stop=sl_stop, tp_stop = tp_stop, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# filter dates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#filter na dny\n", + "dates_of_interest = pd.to_datetime(['2024-04-22']).tz_localize('US/Eastern')\n", + "filtered_df = df.loc[df.index.normalize().isin(dates_of_interest)]\n", + "\n", + "df = filtered_df\n", + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import plotly.io as pio\n", + "# pio.renderers.default = 'notebook'\n", + "\n", + "#naloadujeme do vbt symbol as column\n", + "basic_data = vbt.Data.from_data({\"BAC\": df}, tz_convert=zoneNY)\n", + "\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "#basic_data.data[\"BAC\"].vbt.ohlcv.plot()\n", + "\n", + "#basic_data.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "\n", + "m1_data.data[\"BAC\"]\n", + "#m5_data = m1_data.resample(\"5T\")\n", + "\n", + "#m5_data.data[\"BAC\"].head(10)\n", + "\n", + "# m15_data = m1_data.resample(\"15T\")\n", + "\n", + "# m15 = m15_data.data[\"BAC\"]\n", + "\n", + "# m15.vbt.ohlcv.plot()\n", + "\n", + "# m1_data.wrapper.index\n", + "\n", + "# m1_resampler = m1_data.wrapper.get_resampler(\"1T\")\n", + "# m1_resampler.index_difference(reverse=True)\n", + "\n", + "\n", + "# m5_resampler.prettify()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MOM indicator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.phelp(vbt.indicator(\"talib:ROCP\").run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "vyuzití rychleho klesani na sekundove urovni behem open rush\n", + "- MOM + ROC during open rush\n", + "- short signal\n", + "- pipeline kombinace thresholdu pro vstup mom_th, roc_th + hodnota sl_stop a tp_stop (pripadne trailing) - nalezeni optimalni kombinace atributu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# fig = plot_2y_close([ht_trendline],[momshort, rocp], close)\n", + "# short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + "\n", + "#parameters (primary y line, secondary y line, close)\n", + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig\n", + "\n", + "close = m1_data.xloc[\"09:30\":\"10:00\"].close\n", + "momshort = vbt.indicator(\"talib:MOM\").run(close, timeperiod=3, short_name = \"slope_short\")\n", + "ht_trendline = vbt.indicator(\"talib:HT_TRENDLINE\").run(close, short_name = \"httrendline\")\n", + "rocp = vbt.indicator(\"talib:ROC\").run(close, short_name = \"rocp\")\n", + "#rate of change + momentum\n", + "short_signal = (rocp.real_crossed_below(-0.2) & momshort.real_crossed_below(-0.02))\n", + "#indlong = vbt.indicator(\"talib:MOM\").run(close, timeperiod=10, short_name = \"slope_long\")\n", + "fig = plot_2y_close([ht_trendline],[momshort, rocp], close)\n", + "short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = m1_data.close\n", + "#vbt.phelp(vbt.OLS.run)\n", + "\n", + "#oer steepmnes of regression line\n", + "#talib.LINEARREG_SLOPE(close, timeperiod=timeperiod)\n", + "#a také ON BALANCE VOLUME - http://5.161.179.223:8000/static/js/vbt/api/indicators/custom/obv/index.html\n", + "\n", + "\n", + "\n", + "mom_ind = vbt.indicator(\"talib:MOM\") \n", + "#vbt.phelp(mom_ind.run)\n", + "\n", + "mom = mom_ind.run(close, timeperiod=10)\n", + "\n", + "plot_2y_close(mom, close)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# defining ENTRY WINDOW and forced EXIT window" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#m1_data.data[\"BAC\"].info()\n", + "import datetime\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 2\n", + "entry_window_closes = 30\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "forced_exit = m1_data.symbol_wrapper.fill(False)\n", + "entry_window_open= m1_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "\n", + "#entry_window_open.info()\n", + "# forced_exit.tail(100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = m1_data.close\n", + "\n", + "#rsi = vbt.RSI.run(close, window=14)\n", + "\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "#short_exits.value_counts()\n", + "short_entries.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_rsi(close, entries, exits):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"RSI\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " #rsi.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " entries.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + " exits.vbt.signals.plot_as_exits(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + " return fig\n", + "\n", + "plot_rsi(close, short_entries, short_exits)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.phelp(vbt.Portfolio.from_signals)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sl_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "sl_stop = [round(val, 4) for val in sl_stop]\n", + "print(sl_stop)\n", + "sl_stop = vbt.Param(sl_stop) #np.nan mean s no stoploss\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=close, short_entries=short_entries, short_exits=short_exits, sl_stop=0.03/100, tp_stop = 0.03/100, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "#pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#list of orders\n", + "#pf.orders.records_readable\n", + "#pf.orders.plots()\n", + "#pf.stats()\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0015,0.0013)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[0.03].plot_trade_signals()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# pristup k pf jako multi index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#pf[0.03].plot()\n", + "#pf.order_records\n", + "pf[(0.03)].stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#zgrupovane statistiky\n", + "stats_df = pf.stats([\n", + " 'total_return',\n", + " 'total_trades',\n", + " 'win_rate',\n", + " 'expectancy'\n", + "], agg_func=None)\n", + "stats_df\n", + "\n", + "\n", + "stats_df.nlargest(10, 'Total Return [%]')\n", + "#stats_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0011,0.0013000000000000002)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pandas.tseries.offsets import DateOffset\n", + "\n", + "temp_data = basic_data['2024-4-22']\n", + "temp_data\n", + "res1m = temp_data[[\"Open\", \"High\", \"Low\", \"Close\", \"Volume\"]]\n", + "\n", + "# Define a custom date offset that starts at 9:30 AM and spans 4 hours\n", + "custom_offset = DateOffset(hours=4, minutes=30)\n", + "\n", + "# res1m = res1m.get().resample(\"4H\").agg({ \n", + "# \"Open\": \"first\",\n", + "# \"High\": \"max\",\n", + "# \"Low\": \"min\",\n", + "# \"Close\": \"last\",\n", + "# \"Volume\": \"sum\"\n", + "# })\n", + "\n", + "res4h = res1m.resample(\"1h\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res4h.data\n", + "\n", + "res15m = res1m.resample(\"15T\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res15m.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@vbt.njit\n", + "def long_entry_place_func_nb(c, low, close, time_in_ns, rsi14, window_open, window_close):\n", + " market_open_minutes = 570 # 9 hours * 60 minutes + 30 minutes\n", + "\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + "\n", + " current_minutes = vbt.dt_nb.hour_nb(time_in_ns[i]) * 60 + vbt.dt_nb.minute_nb(time_in_ns[i])\n", + " #print(\"current_minutes\", current_minutes)\n", + " # Calculate elapsed minutes since market open at 9:30 AM\n", + " elapsed_from_open = current_minutes - market_open_minutes\n", + " elapsed_from_open = elapsed_from_open if elapsed_from_open >= 0 else 0\n", + " #print( \"elapsed_from_open\", elapsed_from_open)\n", + "\n", + " #elapsed_from_open = elapsed_minutes_from_open_nb(time_in_ns) \n", + " in_window = elapsed_from_open > window_open and elapsed_from_open < window_close\n", + " #print(\"in_window\", in_window)\n", + " # if in_window:\n", + " # print(\"in window\")\n", + "\n", + " if in_window and rsi14[i] > 60: # and low[i, c.col] <= hit_price: # and hour == 9: # (4)!\n", + " return out_i\n", + " return -1\n", + "\n", + "@vbt.njit\n", + "def long_exit_place_func_nb(c, high, close, time_index, tp, sl): # (5)!\n", + " entry_i = c.from_i - c.wait\n", + " entry_price = close[entry_i, c.col]\n", + " hit_price = entry_price * (1 + tp)\n", + " stop_price = entry_price * (1 - sl)\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " last_bar_of_day = vbt.dt_nb.day_changed_nb(time_index[i], time_index[i + 1])\n", + "\n", + " #print(next_day)\n", + " if last_bar_of_day: #pokud je dalsi next day, tak zavirame posledni\n", + " print(\"ted\",out_i)\n", + " return out_i\n", + " if close[i, c.col] >= hit_price or close[i, c.col] <= stop_price :\n", + " return out_i\n", + " return -1\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(np.random.random(size=(5, 10)), columns=list('abcdefghij'))\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.sum()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/strat_TIME_ENTRIES copy/v1_SINGLE.ipynb b/research/strat_TIME_ENTRIES copy/v1_SINGLE.ipynb new file mode 100644 index 0000000..edf4cb8 --- /dev/null +++ b/research/strat_TIME_ENTRIES copy/v1_SINGLE.ipynb @@ -0,0 +1,595 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TIME based entries, exits\n", + "\n", + "Recurring time bases entries and exits" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from v2realbot.tools.loadbatch import load_batch\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n", + "\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "#LOAD FROM PARQUET\n", + "#list all files is dir directory with parquet extension\n", + "dir = DATA_DIR + \"/notebooks/\"\n", + "import os\n", + "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", + "print('\\n'.join(map(str, files)))\n", + "file_name = \"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet\"\n", + "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", + "#filter ohlcv_df to certain date range (assuming datetime index)\n", + "ohlcv_df = ohlcv_df.loc[\"2024-02-12 9:30\":\"2024-02-14 16:00\"]\n", + "\n", + "#add vwap column to ohlcv_df\n", + "#ohlcv_df[\"hlcc4\"] = (ohlcv_df[\"close\"] + ohlcv_df[\"high\"] + ohlcv_df[\"low\"] + ohlcv_df[\"close\"]) / 4\n", + "\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": ohlcv_df}), tz_convert=zoneNY)\n", + "ohlcv_df= None" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Add resample function to custom columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro.utils.config import merge_dicts, Config, HybridConfig\n", + "from vectorbtpro import _typing as tp\n", + "from vectorbtpro.generic import nb as generic_nb\n", + "\n", + "_feature_config: tp.ClassVar[Config] = HybridConfig(\n", + " {\n", + " \"buyvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " ),\n", + " \"sellvolume\": dict(\n", + " resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(\n", + " resampler,\n", + " generic_nb.sum_reduce_nb,\n", + " )\n", + " )\n", + " }\n", + ")\n", + "\n", + "basic_data._feature_config = _feature_config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#asic_data.stats()\n", + "basic_data.wrapper.index.normalize().nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basic_data.ohlcv.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']].resample(\"1T\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t1data = t1data.xloc[\"2024-02-12 9:30\":\"2024-02-12 10:20\"]\n", + "#t1data = t1data.transform(lambda df: df.between_time('09:30', '10:00').dropna())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','sellvolume']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "buyvolume = t1data.data[\"BAC\"].buyvolume\n", + "sellvolume = t1data.data[\"BAC\"].sellvolume\n", + "totalvolume = buyvolume + sellvolume\n", + "\n", + "#adjust to minimal value to avoid division by zero\n", + "sellvolume_adjusted = sellvolume.replace(0, 1e-10)\n", + "oibratio = buyvolume / sellvolume\n", + "\n", + "#cumulative order flow (net difference)\n", + "cof = buyvolume - sellvolume\n", + "\n", + "# Calculate the order imbalance (net differene) normalize the order imbalance by calculating the difference between buy and sell volumes and then scaling it by the total volume.\n", + "order_imbalance = cof / totalvolume\n", + "order_imbalance.fillna(0) #nan nahradime 0\n", + "\n", + "order_imbalance_allvolume = cof / t1data.data[\"BAC\"].volume" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cof\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance.vbt.plot()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order_imbalance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#priminds list (same Y as price), secinds list (secondary Y napr. rsi), close, voluminds (volume based) list\n", + "def plot_2y_close(priminds, secinds, close, volumeinds):\n", + " fig = vbt.make_subplots(rows=2, cols=1, shared_xaxes=True, \n", + " specs=[[{\"secondary_y\": True}], [{\"secondary_y\": False}]], \n", + " vertical_spacing=0.02, subplot_titles=(\"Price and Indicators\", \"Volume\"))\n", + "\n", + " # Plotting the close price\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False,row=1, col=1), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " \n", + " # Plotting primary indicators on the first row\n", + " for ind in priminds:\n", + " if isinstance(ind, pd.Series):\n", + " #if series has no name, make the name same as the variable name\n", + " \n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + " \n", + " # Plotting secondary indicators on the first row\n", + " for ind in secinds:\n", + " #ind = ind.rename(str(ind.name))\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1), trace_kwargs=dict(line=dict(color=\"rgba(255, 0, 0, 0.4)\")))\n", + " \n", + " for indvolume in volumeinds:\n", + " # Plotting the volume on the second row\n", + " indvolume.rename(str(indvolume.name)).vbt.barplot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=2, col=1))\n", + " #vbt.Bar(indvolume, fig=fig, add_trace_kwargs=dict(secondary_y=False, row=2, col=1))\n", + " \n", + " return fig\n", + "\n", + "plot_2y_close([], [order_imbalance.rename(\"order_imbalance_norm\")], t1data.close, [t1data.data[\"BAC\"].buyvolume, t1data.data[\"BAC\"].sellvolume, t1data.volume])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "t0data = basic_data\n", + "t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"1T\")\n", + "t2data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"15T\")\n", + "t3data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap']].resample(\"30T\")\n", + "t4data = basic_data[['open', 'high', 'low', 'close', 'volume', 'vwap']].resample(\"D\").dropna()\n", + "\n", + "t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t2data = t2data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "t3data = t3data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "\n", + "#30min data to daily\n", + "# t4data = t3data.resample(\"D\").dropna()\n", + "\n", + "#t4data = t4data.transform(lambda df: df.between_time('09:30', '16:00').dropna())\n", + "#m1data.data[\"SPY\"].info()\n", + "\n", + "#m1data.data[\"SPY\"].vbt.ohlcv.plot()\n", + "#h2data.data[\"SPY\"].vbt.ohlcv.plot()\n", + "#ddata.data[\"SPY\"]\n", + "t2data.data[\"BAC\"].vbt.ohlcv.plot().show()\n", + "\n", + "\n", + "#t4data.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t2data.close\n", + "\n", + "#in df remove rows with nan\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#realign na 1T = t1data + oriznout main session\n", + "t2data_vwap = t2data.vwap.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "t3data_vwap = t3data.vwap.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "t4data_vwap = t4data.vwap.vbt.realign_closing(\"1T\").dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t2data_vwap" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " if isinstance(ind, pd.Series):\n", + " ind = ind.vbt\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t4data.clos.vbt \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "obvind = vbt.indicator.obv.run(t1data.close, t1data.volume)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t1_lengtgh = 15\n", + "t2_length = 15\n", + "t3_length = 15\n", + "t4_length = 5\n", + "t1_th = 0.1\n", + "t2_th = 0.1\n", + "t3_th = 0.1\n", + "t4_th = 0.1\n", + "\n", + "\n", + "\n", + "#minute\n", + "t1slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t1data.close, timeperiod=t1_lengtgh) # -0.09, 0.09\n", + "t2slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t2data.vwap, timeperiod=t2_length) # -0.08 , 0.079\n", + "t3slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t3data.vwap, timeperiod=t3_length) # -0.08, 0.08\n", + "#daily\n", + "t4slope = vbt.indicator(\"talib:LINEARREG_SLOPE \").run(t4data.vwap, timeperiod=t4_length) # -0.1, 0.09\n", + "\n", + "plot_2y_close(priminds=[], secinds=[t1slope, t2slope, t3slope, t4slope], close=t1data.close).show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#thirtymin_slope = thirtymin_slope.real.rename(\"30min\") #timto se prejmenuje real na 30min\n", + "t3slope = t3slope.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "##filter daily_slope_to_compare to only monday to friday\n", + "t3slope = t3slope[t3slope.index.dayofweek < 5]\n", + "\n", + "#t3slope.info()\n", + "\n", + "t2slope = t2slope.real.vbt.realign_closing(\"1T\").between_time('09:30', '16:00').dropna()\n", + "##filter daily_slope_to_compare to only monday to friday\n", + "t2slope = t2slope[t2slope.index.dayofweek < 5]\n", + "\n", + "t2slope.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "oibratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#\n", + "short_entries = order_imbalance.vbt < 0.0002\n", + "#short_entries = oibratio.vbt < 0.01\n", + "short_entries.value_counts()\n", + "\n", + "entries = order_imbalance.vbt > 0.7\n", + "#entries = oibratio.vbt > 10\n", + "entries.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = vbt.make_subplots(rows=3, cols=1, shared_xaxes=True, \n", + " specs=[[{\"secondary_y\": True}], [{\"secondary_y\": True}], [{\"secondary_y\": False}]], \n", + " vertical_spacing=0.02, subplot_titles=(\"Price and Indicators\", \"Volume\"))\n", + "t1data.data[\"BAC\"].vbt.ohlcv.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + "#oibratio.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1))\n", + "order_imbalance.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True, row=1, col=1))\n", + "entries.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"LONGS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"limegreen\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ), add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n", + "\n", + "short_entries.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"SHORTS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"red\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ), add_trace_kwargs=dict(secondary_y=False, row=1, col=1))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# thirtymin_slope_to_compare.vbt.xloc[\"04-16-2024\"].get()\n", + "thirty_down_signal.vbt.xloc[\"04-16-2024\"].get()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#short_signal = t1slope.real_below(t1_th) & t2slope.real_below(t2_th) & t3slope.real_below(t3_th) & t4slope.real_below(t4_th)\n", + "#long_signal = t1slope.real_above(t1_th) & t2slope.real_above(t2_th) & t3slope.real_above(t3_th) & t4slope.real_above(t4_th)\n", + "\n", + "#test na daily s reversem crossed 0\n", + "short_signal = t2slope.vbt < -0.01 & t3slope.vbt < -0.01 #min value of threshold\n", + "long_signal = t2slope.vbt > 0.01 & t3slope.vbt > 0.01 #min\n", + "\n", + "# thirty_up_signal = t3slope.vbt.crossed_above(0.01)\n", + "# thirty_down_signal = t3slope.vbt.crossed_below(-0.01)\n", + "\n", + "fig = plot_2y_close(priminds=[], secinds=[t3slope], close=t1data.close)\n", + "#short_signal.vbt.signals.plot_as_entries(basic_data.close, fig=fig)\n", + "\n", + "short_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"SHORTS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"red\", symbol=\"triangle-down\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "long_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"LONGS\",\n", + " line=dict(color=\"#ffe476\"),\n", + " marker=dict(color=\"limegreen\"),\n", + " fill=None,\n", + " connectgaps=True,\n", + " ))\n", + "\n", + "# thirty_down_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"DOWN30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"yellow\", symbol=\"triangle-down\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "# thirty_up_signal.vbt.signals.plot_as_entries(t1data.close, fig=fig, trace_kwargs=dict(name=\"UP30\",\n", + "# line=dict(color=\"#ffe476\"),\n", + "# marker=dict(color=\"grey\"),\n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "# thirtymin_slope_to_compare.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True), trace_kwargs=dict(name=\"30min slope\",\n", + "# line=dict(color=\"yellow\"), \n", + "# fill=None,\n", + "# connectgaps=True,\n", + "# ))\n", + "\n", + "fig.show()\n", + "# print(\"short signal\")\n", + "# print(short_signal.value_counts())\n", + "\n", + "#forced_exit = pd.Series(False, index=close.index)\n", + "forced_exit = basic_data.symbol_wrapper.fill(False)\n", + "#entry_window_open = pd.Series(False, index=close.index)\n", + "entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + "#print(entry_window_open.value_counts())\n", + "\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "\n", + "entries = (long_signal & entry_window_open)\n", + "exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "# print(short_exits.value_counts())\n", + "# print(short_entries.value_counts())\n", + "\n", + "#fig = plot_2y_close([],[momshort, rocp], close)\n", + "#short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + "#print(sl_stop)\n", + "#short_entries=short_entries, short_exits=short_exits,\n", + "# pf = vbt.Portfolio.from_signals(close=basic_data, entries=short_entries, exits=exits, tsl_stop=0.005, tp_stop = 0.05, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "# pf.stats()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.get_drawdowns().records_readable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.orders.records_readable" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/test.ipynb b/research/test.ipynb new file mode 100644 index 0000000..ee7dbb3 --- /dev/null +++ b/research/test.ipynb @@ -0,0 +1,161 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from v2realbot.tools.loadbatch import load_batch\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "res, df = load_batch(batch_id=\"e44a5075\",\n", + " space_resolution_evenly=False,\n", + " indicators_columns=[\"Rsi14\"],\n", + " main_session_only=True)\n", + "if res < 0:\n", + " print(\"Error\" + str(res) + str(df))\n", + "df = df[\"bars\"]\n", + "\n", + "#df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# filter dates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#naloadujeme do vbt symbol as column\n", + "basic_data = vbt.Data.from_data({\"BAC\": df}, tz_convert=zoneNY)\n", + "start_date = pd.Timestamp('2024-03-12 09:30', tz=zoneNY)\n", + "end_date = pd.Timestamp('2024-03-13 16:00', tz=zoneNY)\n", + "\n", + "#basic_data = basic_data.transform(lambda df: df[df.index.date == start_date.date()])\n", + "basic_data = basic_data.transform(lambda df: df[(df.index >= start_date) & (df.index <= end_date)])\n", + "#basic_data.data[\"BAC\"].info()\n", + "\n", + "# fig = basic_data.plot(plot_volume=False)\n", + "# pivot_info = basic_data.run(\"pivotinfo\", up_th=0.003, down_th=0.002)\n", + "# #pivot_info.plot()\n", + "# pivot_info.plot(fig=fig, conf_value_trace_kwargs=dict(visible=True))\n", + "# fig.show()\n", + "\n", + "\n", + "# rsi14 = basic_data.data[\"BAC\"][\"Rsi14\"].rename(\"Rsi14\")\n", + "\n", + "# rsi14.vbt.plot().show()\n", + "# basic_data.data[\"BAC\"].vbt.ohlcv.plot().show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@vbt.njit\n", + "def long_entry_place_func_nb(c, low, close, time_in_ns, rsi14, window_open, window_close):\n", + " market_open_minutes = 570 # 9 hours * 60 minutes + 30 minutes\n", + "\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + "\n", + " current_minutes = vbt.dt_nb.hour_nb(time_in_ns[i]) * 60 + vbt.dt_nb.minute_nb(time_in_ns[i])\n", + " #print(\"current_minutes\", current_minutes)\n", + " # Calculate elapsed minutes since market open at 9:30 AM\n", + " elapsed_from_open = current_minutes - market_open_minutes\n", + " elapsed_from_open = elapsed_from_open if elapsed_from_open >= 0 else 0\n", + " #print( \"elapsed_from_open\", elapsed_from_open)\n", + "\n", + " #elapsed_from_open = elapsed_minutes_from_open_nb(time_in_ns) \n", + " in_window = elapsed_from_open > window_open and elapsed_from_open < window_close\n", + " #print(\"in_window\", in_window)\n", + " # if in_window:\n", + " # print(\"in window\")\n", + "\n", + " if in_window and rsi14[i] > 60: # and low[i, c.col] <= hit_price: # and hour == 9: # (4)!\n", + " return out_i\n", + " return -1\n", + "\n", + "@vbt.njit\n", + "def long_exit_place_func_nb(c, high, close, time_index, tp, sl): # (5)!\n", + " entry_i = c.from_i - c.wait\n", + " entry_price = close[entry_i, c.col]\n", + " hit_price = entry_price * (1 + tp)\n", + " stop_price = entry_price * (1 - sl)\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " last_bar_of_day = vbt.dt_nb.day_changed_nb(time_index[i], time_index[i + 1])\n", + "\n", + " #print(next_day)\n", + " if last_bar_of_day: #pokud je dalsi next day, tak zavirame posledni\n", + " print(\"ted\",out_i)\n", + " return out_i\n", + " if close[i, c.col] >= hit_price or close[i, c.col] <= stop_price :\n", + " return out_i\n", + " return -1\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(np.random.random(size=(5, 10)), columns=list('abcdefghij'))\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.sum()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/test1.ipynb b/research/test1.ipynb new file mode 100644 index 0000000..94f2ae0 --- /dev/null +++ b/research/test1.ipynb @@ -0,0 +1,82 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import statsmodels.api as sm\n", + "\n", + "# Example time series data\n", + "np.random.seed(0)\n", + "dates = pd.date_range('2023-01-01', periods=100)\n", + "data = pd.Series(np.random.randn(100).cumsum(), index=dates)\n", + "\n", + "# Parameters\n", + "window_size = 20\n", + "\n", + "# Function to calculate rolling window linear regression\n", + "def rolling_linreg(series, window):\n", + " intercepts = []\n", + " slopes = []\n", + " for i in range(len(series) - window + 1):\n", + " y = series[i:i + window]\n", + " x = np.arange(window)\n", + " x = sm.add_constant(x)\n", + " model = sm.OLS(y, x).fit()\n", + " intercepts.append(model.params[0])\n", + " slopes.append(model.params[1])\n", + " return intercepts, slopes\n", + "\n", + "# Calculate rolling linear regression parameters\n", + "intercepts, slopes = rolling_linreg(data, window_size)\n", + "\n", + "# Create a DataFrame for plotting\n", + "rolling_dates = dates[window_size - 1:]\n", + "rolling_intercepts = pd.Series(intercepts, index=rolling_dates)\n", + "rolling_slopes = pd.Series(slopes, index=rolling_dates)\n", + "\n", + "# Plot the original data and the rolling linear regression\n", + "plt.figure(figsize=(14, 7))\n", + "plt.plot(data, label='Original Data')\n", + "for i in range(len(rolling_intercepts)):\n", + " start_date = rolling_dates[i] - pd.DateOffset(days=window_size-1)\n", + " end_date = rolling_dates[i]\n", + " plt.plot([start_date, end_date],\n", + " [rolling_intercepts[i], rolling_intercepts[i] + rolling_slopes[i] * (window_size - 1)],\n", + " color='red', alpha=0.5)\n", + "\n", + "plt.legend()\n", + "plt.title('Rolling Window Linear Regression')\n", + "plt.xlabel('Date')\n", + "plt.ylabel('Value')\n", + "plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/test1sbars.ipynb b/research/test1sbars.ipynb new file mode 100644 index 0000000..d4f3dc3 --- /dev/null +++ b/research/test1sbars.ipynb @@ -0,0 +1,421 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from v2realbot.tools.loadbatch import load_batch\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "from itables import init_notebook_mode, show\n", + "\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "res, df = load_batch(batch_id=\"0fb5043a\", #46 days 1.3 - 6.5.\n", + " space_resolution_evenly=False,\n", + " indicators_columns=[\"Rsi14\"],\n", + " main_session_only=True,\n", + " verbose = False)\n", + "if res < 0:\n", + " print(\"Error\" + str(res) + str(df))\n", + "df = df[\"bars\"]\n", + "\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# filter dates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#filter na dny\n", + "# dates_of_interest = pd.to_datetime(['2024-04-22', '2024-04-23']).tz_localize('US/Eastern')\n", + "# filtered_df = df.loc[df.index.normalize().isin(dates_of_interest)]\n", + "\n", + "# df = filtered_df\n", + "# df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import plotly.io as pio\n", + "pio.renderers.default = 'notebook'\n", + "\n", + "#naloadujeme do vbt symbol as column\n", + "basic_data = vbt.Data.from_data({\"BAC\": df}, tz_convert=zoneNY)\n", + "start_date = pd.Timestamp('2024-03-12 09:30', tz=zoneNY)\n", + "end_date = pd.Timestamp('2024-03-13 16:00', tz=zoneNY)\n", + "\n", + "#basic_data = basic_data.transform(lambda df: df[df.index.date == start_date.date()])\n", + "#basic_data = basic_data.transform(lambda df: df[(df.index >= start_date) & (df.index <= end_date)])\n", + "#basic_data.data[\"BAC\"].info()\n", + "\n", + "# fig = basic_data.plot(plot_volume=False)\n", + "# pivot_info = basic_data.run(\"pivotinfo\", up_th=0.003, down_th=0.002)\n", + "# #pivot_info.plot()\n", + "# pivot_info.plot(fig=fig, conf_value_trace_kwargs=dict(visible=True))\n", + "# fig.show()\n", + "\n", + "\n", + "# rsi14 = basic_data.data[\"BAC\"][\"Rsi14\"].rename(\"Rsi14\")\n", + "\n", + "# rsi14.vbt.plot().show()\n", + "#basic_data.xloc[\"09:30\":\"10:00\"].data[\"BAC\"].vbt.ohlcv.plot().show()\n", + "\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "#basic_data.data[\"BAC\"].vbt.ohlcv.plot()\n", + "\n", + "#basic_data.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "\n", + "m1_data.data[\"BAC\"]\n", + "#m5_data = m1_data.resample(\"5T\")\n", + "\n", + "#m5_data.data[\"BAC\"].head(10)\n", + "\n", + "# m15_data = m1_data.resample(\"15T\")\n", + "\n", + "# m15 = m15_data.data[\"BAC\"]\n", + "\n", + "# m15.vbt.ohlcv.plot()\n", + "\n", + "# m1_data.wrapper.index\n", + "\n", + "# m1_resampler = m1_data.wrapper.get_resampler(\"1T\")\n", + "# m1_resampler.index_difference(reverse=True)\n", + "\n", + "\n", + "# m5_resampler.prettify()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# defining ENTRY WINDOW and forced EXIT window" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#m1_data.data[\"BAC\"].info()\n", + "import datetime\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 350\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "forced_exit = m1_data.symbol_wrapper.fill(False)\n", + "entry_window_open= m1_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "\n", + "#entry_window_open.info()\n", + "# forced_exit.tail(100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = m1_data.close\n", + "\n", + "rsi = vbt.RSI.run(close, window=14)\n", + "\n", + "long_entries = (rsi.rsi.vbt.crossed_below(20) & entry_window_open)\n", + "long_exits = (rsi.rsi.vbt.crossed_above(70) | forced_exit)\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "long_entries.value_counts()\n", + "#long_exits.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_rsi(rsi, close, entries, exits):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"RSI\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " rsi.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " entries.vbt.signals.plot_as_entries(rsi.rsi, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + " exits.vbt.signals.plot_as_exits(rsi.rsi, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + " return fig\n", + "\n", + "plot_rsi(rsi, close, long_entries, long_exits)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.phelp(vbt.Portfolio.from_signals)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sl_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "sl_stop = [round(val, 4) for val in sl_stop]\n", + "print(sl_stop)\n", + "sl_stop = vbt.Param(sl_stop) #np.nan mean s no stoploss\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=close, entries=long_entries, sl_stop=sl_stop, tp_stop = sl_stop, exits=long_exits,fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop, \n", + "\n", + "#pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0015,0.0013)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[0.03].plot_trade_signals()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# pristup k pf jako multi index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#pf[0.03].plot()\n", + "#pf.order_records\n", + "pf[(0.03)].stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#zgrupovane statistiky\n", + "stats_df = pf.stats([\n", + " 'total_return',\n", + " 'total_trades',\n", + " 'win_rate',\n", + " 'expectancy'\n", + "], agg_func=None)\n", + "stats_df\n", + "\n", + "\n", + "stats_df.nlargest(50, 'Total Return [%]')\n", + "#stats_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0011,0.0013)].plot()\n", + "\n", + "#pf[(0.0011,0.0013000000000000002)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pandas.tseries.offsets import DateOffset\n", + "\n", + "temp_data = basic_data['2024-4-22']\n", + "temp_data\n", + "res1m = temp_data[[\"Open\", \"High\", \"Low\", \"Close\", \"Volume\"]]\n", + "\n", + "# Define a custom date offset that starts at 9:30 AM and spans 4 hours\n", + "custom_offset = DateOffset(hours=4, minutes=30)\n", + "\n", + "# res1m = res1m.get().resample(\"4H\").agg({ \n", + "# \"Open\": \"first\",\n", + "# \"High\": \"max\",\n", + "# \"Low\": \"min\",\n", + "# \"Close\": \"last\",\n", + "# \"Volume\": \"sum\"\n", + "# })\n", + "\n", + "res4h = res1m.resample(\"1h\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res4h.data\n", + "\n", + "res15m = res1m.resample(\"15T\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res15m.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@vbt.njit\n", + "def long_entry_place_func_nb(c, low, close, time_in_ns, rsi14, window_open, window_close):\n", + " market_open_minutes = 570 # 9 hours * 60 minutes + 30 minutes\n", + "\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + "\n", + " current_minutes = vbt.dt_nb.hour_nb(time_in_ns[i]) * 60 + vbt.dt_nb.minute_nb(time_in_ns[i])\n", + " #print(\"current_minutes\", current_minutes)\n", + " # Calculate elapsed minutes since market open at 9:30 AM\n", + " elapsed_from_open = current_minutes - market_open_minutes\n", + " elapsed_from_open = elapsed_from_open if elapsed_from_open >= 0 else 0\n", + " #print( \"elapsed_from_open\", elapsed_from_open)\n", + "\n", + " #elapsed_from_open = elapsed_minutes_from_open_nb(time_in_ns) \n", + " in_window = elapsed_from_open > window_open and elapsed_from_open < window_close\n", + " #print(\"in_window\", in_window)\n", + " # if in_window:\n", + " # print(\"in window\")\n", + "\n", + " if in_window and rsi14[i] > 60: # and low[i, c.col] <= hit_price: # and hour == 9: # (4)!\n", + " return out_i\n", + " return -1\n", + "\n", + "@vbt.njit\n", + "def long_exit_place_func_nb(c, high, close, time_index, tp, sl): # (5)!\n", + " entry_i = c.from_i - c.wait\n", + " entry_price = close[entry_i, c.col]\n", + " hit_price = entry_price * (1 + tp)\n", + " stop_price = entry_price * (1 - sl)\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " last_bar_of_day = vbt.dt_nb.day_changed_nb(time_index[i], time_index[i + 1])\n", + "\n", + " #print(next_day)\n", + " if last_bar_of_day: #pokud je dalsi next day, tak zavirame posledni\n", + " print(\"ted\",out_i)\n", + " return out_i\n", + " if close[i, c.col] >= hit_price or close[i, c.col] <= stop_price :\n", + " return out_i\n", + " return -1\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(np.random.random(size=(5, 10)), columns=list('abcdefghij'))\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.sum()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/test1sbars_roc.ipynb b/research/test1sbars_roc.ipynb new file mode 100644 index 0000000..fcf2d6b --- /dev/null +++ b/research/test1sbars_roc.ipynb @@ -0,0 +1,935 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from v2realbot.tools.loadbatch import load_batch\n", + "from v2realbot.utils.utils import zoneNY\n", + "import pandas as pd\n", + "import numpy as np\n", + "import vectorbtpro as vbt\n", + "from itables import init_notebook_mode, show\n", + "import datetime\n", + "from itertools import product\n", + "\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings['plotting']['layout']['width'] = 1280\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "# Set the option to display with pagination\n", + "pd.set_option('display.notebook_repr_html', True)\n", + "pd.set_option('display.max_rows', 10) # Number of rows per page\n", + "\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 1\n", + "entry_window_closes = 370\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "res, df = load_batch(batch_id=\"0fb5043a\", #0fb5043a bde6d0be\n", + " space_resolution_evenly=False,\n", + " indicators_columns=[\"Rsi14\"],\n", + " main_session_only=True,\n", + " verbose = False)\n", + "if res < 0:\n", + " print(\"Error\" + str(res) + str(df))\n", + "df = df[\"bars\"]\n", + "\n", + "#df\n", + "\n", + "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": df}), tz_convert=zoneNY)\n", + "#m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "basic_data = basic_data.transform(lambda df: df.between_time('09:30', '16:00'))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#parameters (primary y line, secondary y line, close)\n", + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig\n", + "\n", + "# close = basic_data.xloc[\"09:30\":\"10:00\"].close" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#PIPELINE - FOR - LOOP\n", + "\n", + "#indicator parameters\n", + "mom_timeperiod = list(range(2, 12))\n", + "\n", + "#uzavreni okna od 1 do 200\n", + "entry_window_closes = list(range(2, 50, 3))\n", + "#entry_window_closes = [3, 7, 15, 29, 45, 50, 70]\n", + "#threshold entries parameters\n", + "#long\n", + "mom_th = np.round(np.arange(0.01, 0.5 + 0.02, 0.02),4).tolist()#-0.02\n", + "# short mom_th = np.round(np.arange(-0.01, -0.3 - 0.02, -0.02),4).tolist()#-0.02\n", + "roc_th = np.round(np.arange(-0.2, -0.8 - 0.05, -0.05),4).tolist()#-0.2\n", + "#print(mom_th, roc_th)\n", + "\n", + "#portfolio simulation parameters\n", + "sl_stop =np.round(np.arange(0.03/100, 0.7/100, 0.05/100),4).tolist()\n", + "tp_stop = np.round(np.arange(0.03/100, 0.7/100, 0.05/100),4).tolist()\n", + "\n", + "combs = list(product(mom_timeperiod, mom_th, roc_th, sl_stop, tp_stop))\n", + "\n", + "@vbt.parameterized(merge_func = \"concat\", random_subset = 1000, show_progress=True) \n", + "def test_strat(entry_window_closes=60,\n", + " mom_timeperiod=2,\n", + " mom_th=-0.04,\n", + " #roc_th=-0.2,\n", + " sl_stop=0.19/100,\n", + " tp_stop=0.19/100):\n", + " # mom_timeperiod=2\n", + " # mom_th=-0.06\n", + " # roc_th=-0.2\n", + " # sl_stop=0.04/100\n", + " # tp_stop=0.04/100\n", + "\n", + " momshort = vbt.indicator(\"talib:MOM\").run(basic_data.close, timeperiod=mom_timeperiod, short_name = \"slope_short\")\n", + " rocp = vbt.indicator(\"talib:ROC\").run(basic_data.close, short_name = \"rocp\")\n", + " #rate of change + momentum\n", + "\n", + " #momshort.plot rocp.real_crossed_below(roc_th) & \n", + " short_signal = momshort.real_crossed_below(mom_th)\n", + " long_signal = momshort.real_crossed_above(mom_th)\n", + " # print(\"short signal\")\n", + " # print(short_signal.value_counts())\n", + "\n", + " #forced_exit = pd.Series(False, index=close.index)\n", + " forced_exit = basic_data.symbol_wrapper.fill(False)\n", + " #entry_window_open = pd.Series(False, index=close.index)\n", + " entry_window_open= basic_data.symbol_wrapper.fill(False)\n", + "\n", + " #print(entry_window_closes, \"entry window closes\")\n", + " # Calculate the time difference in minutes from market open for each timestamp\n", + " elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + " entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "\n", + " #print(entry_window_open.value_counts())\n", + "\n", + " forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + " short_entries = (short_signal & entry_window_open)\n", + " short_exits = forced_exit\n", + " entries = (long_signal & entry_window_open)\n", + " exits = forced_exit\n", + " #long_entries.info()\n", + " #number of trues and falses in long_entries\n", + " # print(short_exits.value_counts())\n", + " # print(short_entries.value_counts())\n", + "\n", + " #fig = plot_2y_close([],[momshort, rocp], close)\n", + " #short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " #print(sl_stop)\n", + " #tsl_th=sl_stop, \n", + " #short_entries=short_entries, short_exits=short_exits,\n", + " pf = vbt.Portfolio.from_signals(close=basic_data.close, entries=entries, exits=exits, tsl_stop=sl_stop, tp_stop = tp_stop, fees=0.0167/100, freq=\"1s\", price=\"nextopen\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + " \n", + " return pf.stats([\n", + " 'total_return',\n", + " 'max_dd', \n", + " 'total_trades', \n", + " 'win_rate', \n", + " 'expectancy'\n", + " ])\n", + "\n", + "pf_results = test_strat(vbt.Param(entry_window_closes),\n", + " vbt.Param(mom_timeperiod),\n", + " vbt.Param(mom_th),\n", + " #vbt.Param(roc_th)\n", + " vbt.Param(sl_stop),\n", + " vbt.Param(tp_stop, condition=\"tp_stop > sl_stop\"))\n", + "pf_results = pf_results.unstack(level=-1)\n", + "pf_results.sort_values(by=[\"Total Return [%]\", \"Max Drawdown [%]\"], ascending=[False, True])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#pf_results.load(\"10tiscomb.pickle\")\n", + "#pf_results.info()\n", + "\n", + "vbt.save(pf_results, \"8tiscomb_tsl.pickle\")\n", + "\n", + "# pf_results = vbt.load(\"8tiscomb_tsl.pickle\")\n", + "# pf_results\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# parallel_coordinates method¶\n", + "\n", + "# attach_px_methods..plot_func(\n", + "# *args,\n", + "# layout=None,\n", + "# **kwargs\n", + "# )\n", + "\n", + "# pf_results.vbt.px.parallel_coordinates() #ocdf\n", + "\n", + "res = pf_results.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf_results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.decomposition import PCA\n", + "from sklearn.preprocessing import StandardScaler\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Assuming pf_results is your DataFrame\n", + "# Convert columns to numeric, assuming NaNs where conversion fails\n", + "metrics = ['Total Return [%]', 'Max Drawdown [%]', 'Total Trades']\n", + "for metric in metrics:\n", + " pf_results[metric] = pd.to_numeric(pf_results[metric], errors='coerce')\n", + "\n", + "# Handle missing values, for example filling with the median\n", + "pf_results['Max Drawdown [%]'].fillna(pf_results['Max Drawdown [%]'].median(), inplace=True)\n", + "\n", + "# Extract the metrics into a new DataFrame\n", + "data_for_pca = pf_results[metrics]\n", + "\n", + "# Standardize the data before applying PCA\n", + "scaler = StandardScaler()\n", + "data_scaled = scaler.fit_transform(data_for_pca)\n", + "\n", + "# Apply PCA\n", + "pca = PCA(n_components=2) # Adjust components as needed\n", + "principal_components = pca.fit_transform(data_scaled)\n", + "\n", + "# Create a DataFrame with the principal components\n", + "pca_results = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])\n", + "\n", + "# Visualize the results\n", + "plt.figure(figsize=(8,6))\n", + "plt.scatter(pca_results['PC1'], pca_results['PC2'], alpha=0.5)\n", + "plt.xlabel('Principal Component 1')\n", + "plt.ylabel('Principal Component 2')\n", + "plt.title('PCA of Strategy Optimization Results')\n", + "plt.grid(True)\n", + "plt.savefig(\"ddd.png\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check if there is any unnamed level and rename it\n", + "if None in df.index.names:\n", + " # Generate new names list replacing None with 'stat'\n", + " new_names = ['stat' if name is None else name for name in df.index.names]\n", + " df.index.set_names(new_names, inplace=True)\n", + "\n", + "rs= df\n", + "\n", + "rs.info()\n", + "\n", + "\n", + "# # Now, 'stat' is the name of the previously unnamed level\n", + "\n", + "# # Filter for 'Total Return' assuming it is a correct identifier in the 'stat' level\n", + "# total_return_series = df.xs('Total Return [%]', level='stat')\n", + "\n", + "# # Sort the Series to get the largest 'Total Return' values\n", + "# sorted_series = total_return_series.sort_values(ascending=False)\n", + "\n", + "# # Print the sorted filtered data\n", + "# sorted_series.head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sorted_series.vbt.save()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#df.info()\n", + "total_return_series = df.xs('Total Return [%]')\n", + "sorted_series = total_return_series.sort_values(ascending=False)\n", + "\n", + "# Display the top N entries, e.g., top 5\n", + "sorted_series.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "comb_stats_df.nlargest(10, 'Total Return [%]')\n", + "#stats_df.info()\n", + "\n", + "\n", + "8\t-0.06\t-0.2\t0.0028\t0.0048\t4.156254\n", + "4 -0.02 -0.25 0.0028 0.0048 0.84433\n", + "3 -0.02 -0.25 0.0033 0.0023 Total Return [%] 0.846753\n", + "#2\t-0.04\t-0.2\t0.0019\t0.0019\n", + "# 2\t-0.04\t-0.2\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.25\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.3\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.35\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.4\t0.0019\t0.0019\t0.556919\t91\t60.43956\t0.00612\n", + "# 2\t-0.04\t-0.2\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.25\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.3\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.35\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853\n", + "# 2\t-0.04\t-0.4\t0.0019\t0.0017\t0.451338\t93\t63.44086\t0.004853" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basic_data.symbols" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + ">>> def apply_func(ts, entries, exits, fastw, sloww, minp=None):\n", + "... fast_ma = vbt.nb.rolling_mean_nb(ts, fastw, minp=minp)\n", + "... slow_ma = vbt.nb.rolling_mean_nb(ts, sloww, minp=minp)\n", + "... entries[:] = vbt.nb.crossed_above_nb(fast_ma, slow_ma) \n", + "... exits[:] = vbt.nb.crossed_above_nb(slow_ma, fast_ma)\n", + "... return (fast_ma, slow_ma) \n", + "\n", + ">>> CrossSig = vbt.IF(\n", + "... class_name=\"CrossSig\",\n", + "... input_names=['ts'],\n", + "... in_output_names=['entries', 'exits'],\n", + "... param_names=['fastw', 'sloww'],\n", + "... output_names=['fast_ma', 'slow_ma']\n", + "... ).with_apply_func(\n", + "... apply_func,\n", + "... in_output_settings=dict(\n", + "... entries=dict(dtype=np.bool_), #initialize output with bool\n", + "... exits=dict(dtype=np.bool_)\n", + "... )\n", + "... )\n", + ">>> cross_sig = CrossSig.run(ts2, 2, 4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#PIPELINE - parameters in one go\n", + "\n", + "\n", + "#TOTO prepsat do FOR-LOOPu\n", + "\n", + "\n", + "#indicator parameters\n", + "mom_timeperiod = list(range(2, 6))\n", + "\n", + "#threshold entries parameters\n", + "mom_th = np.round(np.arange(-0.02, -0.1 - 0.02, -0.02),4).tolist()#-0.02\n", + "roc_th = np.round(np.arange(-0.2, -0.4 - 0.05, -0.05),4).tolist()#-0.2\n", + "#print(mom_th, roc_th)\n", + "#jejich product\n", + "# mom_th_prod, roc_th_prod = zip(*product(mom_th, roc_th))\n", + "\n", + "# #convert threshold to vbt param\n", + "# mom_th_index = vbt.Param(mom_th_prod, name='mom_th_th') \n", + "# roc_th_index = vbt.Param(roc_th_prod, name='roc_th_th')\n", + "\n", + "mom_th = vbt.Param(mom_th, name='mom_th')\n", + "roc_th = vbt.Param(roc_th, name='roc_th')\n", + "\n", + "#portfolio simulation parameters\n", + "sl_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "sl_stop = [round(val, 4) for val in sl_stop]\n", + "tp_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "tp_stop = [round(val, 4) for val in tp_stop]\n", + "sl_stop = vbt.Param(sl_stop) #np.nan mean s no stoploss\n", + "tp_stop = vbt.Param(tp_stop) #np.nan mean s no stoploss\n", + "\n", + "\n", + "#def test_mom(window=14, mom_th=0.2, roc_th=0.2, sl_stop=0.03/100, tp_stop=0.03/100):\n", + "#close = basic_data.xloc[\"09:30\":\"10:00\"].close\n", + "momshort = vbt.indicator(\"talib:MOM\").run(basic_data.get(\"Close\"), timeperiod=mom_timeperiod, short_name = \"slope_short\")\n", + "\n", + "#ht_trendline = vbt.indicator(\"talib:HT_TRENDLINE\").run(close, short_name = \"httrendline\")\n", + "rocp = vbt.indicator(\"talib:ROC\").run(basic_data.get(\"Close\"), short_name = \"rocp\")\n", + "#rate of change + momentum\n", + "\n", + "rocp_signal = rocp.real_crossed_below(mom_th)\n", + "mom_signal = momshort.real_crossed_below(roc_th)\n", + "\n", + "#mom_signal\n", + "print(rocp_signal.info())\n", + "print(mom_signal.info())\n", + "#print(rocp.real)\n", + "\n", + "\n", + "short_signal = (mom_signal.vbt & rocp_signal)\n", + "\n", + "# #short_signal = (rocp.real_crossed_below(roc_th_index) & momshort.real_crossed_below(mom_th_index))\n", + "# forced_exit = m1_data.symbol_wrapper.fill(False)\n", + "# entry_window_open= m1_data.symbol_wrapper.fill(False)\n", + "\n", + "\n", + "# # Calculate the time difference in minutes from market open for each timestamp\n", + "# elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "# entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "# forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "# short_entries = (short_signal & entry_window_open)\n", + "# short_exits = forced_exit\n", + "# #long_entries.info()\n", + "# #number of trues and falses in long_entries\n", + "# #short_exits.value_counts()\n", + "# #short_entries.value_counts()\n", + "\n", + "\n", + "# pf = vbt.Portfolio.from_signals(close=close, short_entries=short_entries, short_exits=short_exits, sl_stop=sl_stop, tp_stop = tp_stop, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# filter dates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#filter na dny\n", + "dates_of_interest = pd.to_datetime(['2024-04-22']).tz_localize('US/Eastern')\n", + "filtered_df = df.loc[df.index.normalize().isin(dates_of_interest)]\n", + "\n", + "df = filtered_df\n", + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import plotly.io as pio\n", + "# pio.renderers.default = 'notebook'\n", + "\n", + "#naloadujeme do vbt symbol as column\n", + "basic_data = vbt.Data.from_data({\"BAC\": df}, tz_convert=zoneNY)\n", + "\n", + "vbt.settings.plotting.auto_rangebreaks = True\n", + "#basic_data.data[\"BAC\"].vbt.ohlcv.plot()\n", + "\n", + "#basic_data.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "\n", + "m1_data.data[\"BAC\"]\n", + "#m5_data = m1_data.resample(\"5T\")\n", + "\n", + "#m5_data.data[\"BAC\"].head(10)\n", + "\n", + "# m15_data = m1_data.resample(\"15T\")\n", + "\n", + "# m15 = m15_data.data[\"BAC\"]\n", + "\n", + "# m15.vbt.ohlcv.plot()\n", + "\n", + "# m1_data.wrapper.index\n", + "\n", + "# m1_resampler = m1_data.wrapper.get_resampler(\"1T\")\n", + "# m1_resampler.index_difference(reverse=True)\n", + "\n", + "\n", + "# m5_resampler.prettify()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MOM indicator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.phelp(vbt.indicator(\"talib:ROCP\").run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "vyuzití rychleho klesani na sekundove urovni behem open rush\n", + "- MOM + ROC during open rush\n", + "- short signal\n", + "- pipeline kombinace thresholdu pro vstup mom_th, roc_th + hodnota sl_stop a tp_stop (pripadne trailing) - nalezeni optimalni kombinace atributu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# fig = plot_2y_close([ht_trendline],[momshort, rocp], close)\n", + "# short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + "\n", + "#parameters (primary y line, secondary y line, close)\n", + "def plot_2y_close(priminds, secinds, close):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"MOM\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False), trace_kwargs=dict(line=dict(color=\"blue\")))\n", + " for ind in priminds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " for ind in secinds:\n", + " ind.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " return fig\n", + "\n", + "close = m1_data.xloc[\"09:30\":\"10:00\"].close\n", + "momshort = vbt.indicator(\"talib:MOM\").run(close, timeperiod=3, short_name = \"slope_short\")\n", + "ht_trendline = vbt.indicator(\"talib:HT_TRENDLINE\").run(close, short_name = \"httrendline\")\n", + "rocp = vbt.indicator(\"talib:ROC\").run(close, short_name = \"rocp\")\n", + "#rate of change + momentum\n", + "short_signal = (rocp.real_crossed_below(-0.2) & momshort.real_crossed_below(-0.02))\n", + "#indlong = vbt.indicator(\"talib:MOM\").run(close, timeperiod=10, short_name = \"slope_long\")\n", + "fig = plot_2y_close([ht_trendline],[momshort, rocp], close)\n", + "short_signal.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = m1_data.close\n", + "#vbt.phelp(vbt.OLS.run)\n", + "\n", + "#oer steepmnes of regression line\n", + "#talib.LINEARREG_SLOPE(close, timeperiod=timeperiod)\n", + "#a také ON BALANCE VOLUME - http://5.161.179.223:8000/static/js/vbt/api/indicators/custom/obv/index.html\n", + "\n", + "\n", + "\n", + "mom_ind = vbt.indicator(\"talib:MOM\") \n", + "#vbt.phelp(mom_ind.run)\n", + "\n", + "mom = mom_ind.run(close, timeperiod=10)\n", + "\n", + "plot_2y_close(mom, close)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# defining ENTRY WINDOW and forced EXIT window" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#m1_data.data[\"BAC\"].info()\n", + "import datetime\n", + "# Define the market open and close times\n", + "market_open = datetime.time(9, 30)\n", + "market_close = datetime.time(16, 0)\n", + "entry_window_opens = 2\n", + "entry_window_closes = 30\n", + "\n", + "forced_exit_start = 380\n", + "forced_exit_end = 390\n", + "\n", + "forced_exit = m1_data.symbol_wrapper.fill(False)\n", + "entry_window_open= m1_data.symbol_wrapper.fill(False)\n", + "\n", + "# Calculate the time difference in minutes from market open for each timestamp\n", + "elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n", + "\n", + "entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n", + "forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n", + "\n", + "#entry_window_open.info()\n", + "# forced_exit.tail(100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "close = m1_data.close\n", + "\n", + "#rsi = vbt.RSI.run(close, window=14)\n", + "\n", + "short_entries = (short_signal & entry_window_open)\n", + "short_exits = forced_exit\n", + "#long_entries.info()\n", + "#number of trues and falses in long_entries\n", + "#short_exits.value_counts()\n", + "short_entries.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_rsi(close, entries, exits):\n", + " fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"RSI\", \"Price\" ))\n", + " close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n", + " #rsi.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n", + " entries.vbt.signals.plot_as_entries(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + " exits.vbt.signals.plot_as_exits(close, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n", + " return fig\n", + "\n", + "plot_rsi(close, short_entries, short_exits)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbt.phelp(vbt.Portfolio.from_signals)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sl_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n", + "# Using the round function\n", + "sl_stop = [round(val, 4) for val in sl_stop]\n", + "print(sl_stop)\n", + "sl_stop = vbt.Param(sl_stop) #np.nan mean s no stoploss\n", + "\n", + "pf = vbt.Portfolio.from_signals(close=close, short_entries=short_entries, short_exits=short_exits, sl_stop=0.03/100, tp_stop = 0.03/100, fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop,\n", + "\n", + "#pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#list of orders\n", + "#pf.orders.records_readable\n", + "#pf.orders.plots()\n", + "#pf.stats()\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0015,0.0013)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[0.03].plot_trade_signals()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# pristup k pf jako multi index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#pf[0.03].plot()\n", + "#pf.order_records\n", + "pf[(0.03)].stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#zgrupovane statistiky\n", + "stats_df = pf.stats([\n", + " 'total_return',\n", + " 'total_trades',\n", + " 'win_rate',\n", + " 'expectancy'\n", + "], agg_func=None)\n", + "stats_df\n", + "\n", + "\n", + "stats_df.nlargest(10, 'Total Return [%]')\n", + "#stats_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf[(0.0011,0.0013000000000000002)].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pandas.tseries.offsets import DateOffset\n", + "\n", + "temp_data = basic_data['2024-4-22']\n", + "temp_data\n", + "res1m = temp_data[[\"Open\", \"High\", \"Low\", \"Close\", \"Volume\"]]\n", + "\n", + "# Define a custom date offset that starts at 9:30 AM and spans 4 hours\n", + "custom_offset = DateOffset(hours=4, minutes=30)\n", + "\n", + "# res1m = res1m.get().resample(\"4H\").agg({ \n", + "# \"Open\": \"first\",\n", + "# \"High\": \"max\",\n", + "# \"Low\": \"min\",\n", + "# \"Close\": \"last\",\n", + "# \"Volume\": \"sum\"\n", + "# })\n", + "\n", + "res4h = res1m.resample(\"1h\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res4h.data\n", + "\n", + "res15m = res1m.resample(\"15T\", resample_kwargs=dict(origin=\"start\"))\n", + "\n", + "res15m.data[\"BAC\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@vbt.njit\n", + "def long_entry_place_func_nb(c, low, close, time_in_ns, rsi14, window_open, window_close):\n", + " market_open_minutes = 570 # 9 hours * 60 minutes + 30 minutes\n", + "\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + "\n", + " current_minutes = vbt.dt_nb.hour_nb(time_in_ns[i]) * 60 + vbt.dt_nb.minute_nb(time_in_ns[i])\n", + " #print(\"current_minutes\", current_minutes)\n", + " # Calculate elapsed minutes since market open at 9:30 AM\n", + " elapsed_from_open = current_minutes - market_open_minutes\n", + " elapsed_from_open = elapsed_from_open if elapsed_from_open >= 0 else 0\n", + " #print( \"elapsed_from_open\", elapsed_from_open)\n", + "\n", + " #elapsed_from_open = elapsed_minutes_from_open_nb(time_in_ns) \n", + " in_window = elapsed_from_open > window_open and elapsed_from_open < window_close\n", + " #print(\"in_window\", in_window)\n", + " # if in_window:\n", + " # print(\"in window\")\n", + "\n", + " if in_window and rsi14[i] > 60: # and low[i, c.col] <= hit_price: # and hour == 9: # (4)!\n", + " return out_i\n", + " return -1\n", + "\n", + "@vbt.njit\n", + "def long_exit_place_func_nb(c, high, close, time_index, tp, sl): # (5)!\n", + " entry_i = c.from_i - c.wait\n", + " entry_price = close[entry_i, c.col]\n", + " hit_price = entry_price * (1 + tp)\n", + " stop_price = entry_price * (1 - sl)\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " last_bar_of_day = vbt.dt_nb.day_changed_nb(time_index[i], time_index[i + 1])\n", + "\n", + " #print(next_day)\n", + " if last_bar_of_day: #pokud je dalsi next day, tak zavirame posledni\n", + " print(\"ted\",out_i)\n", + " return out_i\n", + " if close[i, c.col] >= hit_price or close[i, c.col] <= stop_price :\n", + " return out_i\n", + " return -1\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(np.random.random(size=(5, 10)), columns=list('abcdefghij'))\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.sum()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ff91ddf --- /dev/null +++ b/setup.py @@ -0,0 +1,16 @@ +from setuptools import find_packages, setup + +setup(name='v2realbot_research', + version='0.1', + description='Research for v2realbot', + author='David Brazda', + author_email='davidbrazda61@gmail.com', + packages=find_packages(), + install_requires=[ + 'pandas', + 'pywebview>=5.0.5', + 'orjson', + 'v2trading @ git+https://github.com/drew2323/v2trading.git@master#egg=v2trading', + 'lightweight-charts-python @ https://github.com/drew2323/lightweight-charts-python.git@main#egg=lightweight-charts-python' + ] + ) \ No newline at end of file diff --git a/to_explore/PQN_Patterns.ipynb b/to_explore/PQN_Patterns.ipynb new file mode 100644 index 0000000..3c251c4 --- /dev/null +++ b/to_explore/PQN_Patterns.ipynb @@ -0,0 +1,499 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a7f13baa-bf3a-41e2-b4f4-bef957746b6a", + "metadata": {}, + "source": [ + "# How to backtest chart patterns with VectorBT PRO" + ] + }, + { + "cell_type": "markdown", + "id": "1e5237a6-cb1e-42b2-8b74-841af2e8859a", + "metadata": {}, + "source": [ + "VectorBT PRO (https://vectorbt.pro/) is a proprietary Python package designed for backtesting and analyzing quantitative trading strategies. It provides a comprehensive suite of tools for every stage of an algorithmic trading workflow, including data acquisition, signal generation and analysis, portfolio optimization, strategy simulation, hyperparameter tuning, and cross-validation. These modular components empower users to flexibly customize their analysis, setting it apart from monolithic backtesting frameworks." + ] + }, + { + "cell_type": "markdown", + "id": "51ad2b2b-3ffa-4600-9f03-547f83d8babb", + "metadata": {}, + "source": [ + "One of these components is a data pattern detector that efficiently scans data using variable-length windows, assessing their similarity to a specified pattern. This process, optimized with Numba (https://numba.pydata.org/), operates on any hardware without the need for machine learning. To showcase the detector's capabilities, we will conduct backtesting on a range of patterns and their combinations on a single dataset." + ] + }, + { + "cell_type": "markdown", + "id": "36f9e6a9-eedf-4595-b214-2d00f02d9c90", + "metadata": {}, + "source": [ + "## Imports and set up" + ] + }, + { + "cell_type": "markdown", + "id": "33459b0c-c21f-4251-b13b-6492c9171f6c", + "metadata": {}, + "source": [ + "Due to VectorBT PRO's self-contained design, only minimal imports are necessary." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc8d53d7-0290-4e6c-b760-6c9ba8a6873e", + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro import *\n", + "# whats_imported()\n", + "\n", + "vbt.settings.set_theme(\"dark\")" + ] + }, + { + "cell_type": "markdown", + "id": "0aec0980-6ee2-41b1-a713-4a062a823fe5", + "metadata": {}, + "source": [ + "VectorBT PRO features built-in data downloading from sources such as Yahoo Finance, Alpaca, Polygon, TradingView, and many more. We will perform pattern detection on hourly price data pulled from TradingView." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d12bcb35-95ba-424e-8dfc-0e9edff8df99", + "metadata": {}, + "outputs": [], + "source": [ + "symbols = [\n", + " \"NASDAQ:META\",\n", + " \"NASDAQ:AMZN\",\n", + " \"NASDAQ:AAPL\",\n", + " \"NASDAQ:NFLX\",\n", + " \"NASDAQ:GOOG\",\n", + "]\n", + "\n", + "data = vbt.TVData.pull(symbols, timeframe=\"hourly\")" + ] + }, + { + "cell_type": "markdown", + "id": "77e48d78-436d-4a52-95d4-8ff8c1e8ff4c", + "metadata": {}, + "source": [ + "TradingView does not offer the option to specify a date range in advance, so we will need to select it afterward." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b96e37aa-3c00-4373-8030-ca3d97f872b1", + "metadata": {}, + "outputs": [], + "source": [ + "start_date = \"2020\"\n", + "end_date = None\n", + "\n", + "data = data.xloc[start_date:end_date]" + ] + }, + { + "cell_type": "markdown", + "id": "3c9c8009-3a78-4799-bc98-2bd191e22851", + "metadata": {}, + "source": [ + "Ensure that our data spans the correct date period and is free of NaN values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76b530eb-f42e-4bdf-b270-20298a66eb6b", + "metadata": {}, + "outputs": [], + "source": [ + "print(data.stats())" + ] + }, + { + "cell_type": "markdown", + "id": "4cf31468-ce25-4284-b0c6-dec873e62268", + "metadata": {}, + "source": [ + "As pattern detection requires only a single time series, we must choose the suitable feature. We'll utilize HLC/3, which effectively captures price fluctuations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "650c3662-684c-4e57-b7fa-45ba8b2f7f1d", + "metadata": {}, + "outputs": [], + "source": [ + "price = data.hlc3" + ] + }, + { + "cell_type": "markdown", + "id": "74dcad43-dd64-435a-9a9d-591681514209", + "metadata": {}, + "source": [ + "## Define patterns" + ] + }, + { + "cell_type": "markdown", + "id": "f387c42a-1224-46d9-9397-b6479e6e21e7", + "metadata": {}, + "source": [ + "Numerous chart patterns can be translated into numerical sequences, like the \"Double Top\" pattern (https://www.investopedia.com/terms/d/doubletop.asp) represented as [1, 3, 2, 3, 1]. It's important to note that while the numbers themselves can be arbitrary, their relative spacing should mirror the relative distance between the pattern's chart points. For instance, in this sequence, 2 aligns with the midpoint between valley point 1 and peak point 3. The same principle applies to temporal distribution: points should be equidistant from one another." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40827a2a-ee12-4feb-9f6a-4505ed24060d", + "metadata": {}, + "outputs": [], + "source": [ + "bullish_patterns = {\n", + " \"double_bottom\": [5, 1, 3, 1, 5],\n", + " \"exp_triangle\": [3, 4, 2, 5, 1, 6],\n", + " \"asc_triangle\": [1, 5, 2, 5, 3, 6],\n", + " \"symm_triangle\": [1, 6, 2, 5, 3, 6],\n", + " \"pennant\": [6, 1, 5, 2, 4, 3, 6]\n", + "}\n", + "bearish_patterns = {\n", + " \"head_and_shoulders\": [1, 4, 2, 6, 2, 4, 1],\n", + " \"double_top\": [1, 5, 3, 5, 1],\n", + " \"desc_triangle\": [6, 2, 5, 2, 4, 1],\n", + " \"symm_triangle\": [6, 1, 5, 2, 4, 1],\n", + " \"pennant\": [1, 6, 2, 5, 3, 4, 1]\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "af76a114-d588-443a-8c62-19274c97c416", + "metadata": {}, + "source": [ + "Confirm the visual representation of a pattern by plotting its corresponding line graph." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0114e669-fff0-48b1-922b-412ad6941914", + "metadata": {}, + "outputs": [], + "source": [ + "pd.Series(bullish_patterns[\"double_bottom\"]).vbt.plot().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "11172c01-2675-4c12-ab51-ae21137c097a", + "metadata": {}, + "source": [ + "Each generated sequence serves as a rough approximation of the desired chart pattern, and there's no need for precise adjustments: VectorBT PRO's similarity-based algorithm is flexible and can identify patterns, even if they are not perfectly consistent in their design." + ] + }, + { + "cell_type": "markdown", + "id": "4292665d-4168-436a-a59d-94b42bfd9482", + "metadata": {}, + "source": [ + "## Detect patterns in data" + ] + }, + { + "cell_type": "markdown", + "id": "0a355587-347a-4f4f-9f7a-fa041127f36a", + "metadata": {}, + "source": [ + "Iterate through each pattern, dataset, and timestamp within the dataset. Search for matches within windows spanning from 1 to 30 days, and create a record for each match that exceeds a pre-defined minimum similarity score, which is set by default to 85%." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1a9af95-b7d1-4f29-9a6b-b40d57e5f597", + "metadata": {}, + "outputs": [], + "source": [ + "min_window = 24\n", + "max_window = 24 * 30\n", + "\n", + "def detect_patterns(patterns):\n", + " return vbt.PatternRanges.from_pattern_search(\n", + " price,\n", + " open=data.open, # OHLC for plotting\n", + " high=data.high,\n", + " low=data.low,\n", + " close=data.close,\n", + " pattern=patterns,\n", + " window=min_window,\n", + " max_window=max_window,\n", + " execute_kwargs=dict( # multithreading\n", + " engine=\"threadpool\", \n", + " chunk_len=\"auto\", \n", + " )\n", + " )\n", + "\n", + "bullish_matches = detect_patterns(vbt.Param(bullish_patterns, name=\"bullish_pattern\"))\n", + "bearish_matches = detect_patterns(vbt.Param(bearish_patterns, name=\"bearish_pattern\"))" + ] + }, + { + "cell_type": "markdown", + "id": "12733006-548c-4c28-a4ac-902aa066f0b3", + "metadata": {}, + "source": [ + "In just several minutes, VectorBT PRO seamlessly detected matches among all patterns. This process, involving around 230 million unique pattern and window combinations, was executed in parallel." + ] + }, + { + "cell_type": "markdown", + "id": "714ddd1f-f5a5-420e-9d4d-707e4b5e4685", + "metadata": {}, + "source": [ + "Get the number of matches for each pattern and dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77c5957e-a906-4c0c-998a-5b2e92fd652d", + "metadata": {}, + "outputs": [], + "source": [ + "print(bullish_matches.count())" + ] + }, + { + "cell_type": "markdown", + "id": "88b7627b-f48c-4d51-986e-cc269abf9604", + "metadata": {}, + "source": [ + "Plot the pattern and dataset with the most matches." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "708b0d74-8c5e-4921-87e2-4704050ce7ed", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.settings.plotting.auto_rangebreaks = True # for stocks\n", + "\n", + "display_column = bullish_matches.count().idxmax()\n", + "\n", + "bullish_matches.plot(column=display_column, fit_ranges=True).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "af6a60cf-0d98-49e5-ad48-cc872f6d2ce9", + "metadata": {}, + "source": [ + "Zoom in on a match." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f22c6c82-cc7c-4d5d-94b9-e14753e82072", + "metadata": {}, + "outputs": [], + "source": [ + "display_match = 3\n", + "\n", + "bullish_matches.plot(column=display_column, fit_ranges=display_match).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "45f49c51-bd6f-4952-8ff4-76f6ebc00f7f", + "metadata": {}, + "source": [ + "The window data closely aligns with the pattern. This functionality is highly comprehensive, offering the flexibility to adjust fitness levels, modify rescaling and interpolation algorithms, and more to suit specific requirements." + ] + }, + { + "cell_type": "markdown", + "id": "16779944-3cae-44e8-a63d-36194479217c", + "metadata": {}, + "source": [ + "## Transform matches to signals" + ] + }, + { + "cell_type": "markdown", + "id": "26c9e03d-95ff-44a3-bd56-2a581673aa27", + "metadata": {}, + "source": [ + "To conduct backtesting on the identified patterns, we will convert them into signals, triggering a signal once a pattern has fully developed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce91720c-dd56-496e-aadb-faad71e1a529", + "metadata": {}, + "outputs": [], + "source": [ + "entries = bullish_matches.last_pd_mask\n", + "exits = bearish_matches.last_pd_mask" + ] + }, + { + "cell_type": "markdown", + "id": "d049c224-03b7-42fa-8927-51a502812e54", + "metadata": {}, + "source": [ + "Generate a Cartesian product of bullish and bearish patterns to systematically test each bullish pattern against each bearish pattern." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5f0a9db-632d-4705-af3f-1c33dfb6f884", + "metadata": {}, + "outputs": [], + "source": [ + "entries, exits = entries.vbt.x(exits)" + ] + }, + { + "cell_type": "markdown", + "id": "90a043fe-c990-4358-94be-b8f4b92dec4f", + "metadata": {}, + "source": [ + "Both arrays have been converted into equally-shaped DataFrames, each comprising 125 columns. Each column represents an individual backtest, encompassing three parameters: bullish pattern, bearish pattern, and symbol." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b3a1466-246b-42b5-a61d-a0ae1137c54d", + "metadata": {}, + "outputs": [], + "source": [ + "print(entries.columns)" + ] + }, + { + "cell_type": "markdown", + "id": "05669332-15a4-4ac5-b376-bdc08006d952", + "metadata": {}, + "source": [ + "## Backtest signals" + ] + }, + { + "cell_type": "markdown", + "id": "a44e90d0-f172-445a-9f4b-865444ae0cb3", + "metadata": {}, + "source": [ + "Establish a portfolio by simulating signals." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d3ec70d-73e4-407d-8ea7-2a6b0f4436ba", + "metadata": {}, + "outputs": [], + "source": [ + "pf = vbt.Portfolio.from_signals(data, entries, exits)" + ] + }, + { + "cell_type": "markdown", + "id": "ff7d821d-f20e-45c3-83d5-ad1aa2ba109b", + "metadata": {}, + "source": [ + "Get the mean total return for every combination of bullish and bearish patterns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad8b789d-c543-4e42-8df7-1a351cceda5f", + "metadata": {}, + "outputs": [], + "source": [ + "mean_total_return = pf.total_return.groupby([\"bullish_pattern\", \"bearish_pattern\"]).mean()\n", + "\n", + "print(mean_total_return)" + ] + }, + { + "cell_type": "markdown", + "id": "946aa00a-b183-496e-8a63-7f11485ad3dc", + "metadata": {}, + "source": [ + "As visual beings, let's represent these values as a heatmap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d74ac965-0461-4563-813e-56b9cce979c8", + "metadata": {}, + "outputs": [], + "source": [ + "mean_total_return.vbt.heatmap(x_level=\"bearish_pattern\", y_level=\"bullish_pattern\").show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "5d9f9706-d3da-480e-8b65-5eaa47196049", + "metadata": {}, + "source": [ + "Although the displayed performance of each pattern combination does not guarantee future results, it provides insight into how the market responded to pattern events in the past. For instance, it's noteworthy that the \"Bearish Symmetrical Triangle\" exhibited a notably bullish trend. Cross-validation and robustness testing are next essential steps for a comprehensive assessment." + ] + }, + { + "cell_type": "markdown", + "id": "2b5b8516-8620-41aa-a11c-96b48798c343", + "metadata": {}, + "source": [ + "Read more at https://vectorbt.pro/tutorials/patterns-and-projections/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9efd6597-880f-4769-a486-65e17b1c5475", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/to_explore/PQN_Projections.ipynb b/to_explore/PQN_Projections.ipynb new file mode 100644 index 0000000..870f05b --- /dev/null +++ b/to_explore/PQN_Projections.ipynb @@ -0,0 +1,266 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9d9b5c91-f3a3-4709-a36f-40ecc86595d6", + "metadata": {}, + "source": [ + "# Forecasting future price trends by projecting historical price patterns" + ] + }, + { + "cell_type": "markdown", + "id": "2cffb873-e431-44f3-b243-e35969bbd2c1", + "metadata": {}, + "source": [ + "In our previous newsletter focusing on VectorBT PRO (VBT), we dived into the pattern detection capabilities of this powerful library. An additional key functionality is VBT's capacity to extrapolate identified price segments into the future and aggregate them for statistical analysis. This feature can be an invaluable tool for real-time decision-making in market analysis." + ] + }, + { + "cell_type": "markdown", + "id": "c472968b-1863-4d79-a299-ec67c1757455", + "metadata": {}, + "source": [ + "## Imports and set up" + ] + }, + { + "cell_type": "markdown", + "id": "ddf68612-622b-4803-87fc-a1ad80341536", + "metadata": {}, + "source": [ + "Given the self-contained design of VBT, a single import suffices." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a42ccb91-bc73-4ad5-9327-18c7c22af598", + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro import *\n", + "# whats_imported()\n", + "\n", + "vbt.settings.set_theme(\"dark\")" + ] + }, + { + "cell_type": "markdown", + "id": "15412fda-c27f-4820-9273-17366164b2b3", + "metadata": {}, + "source": [ + "Let's define a set of variables for our analysis." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc016fe0-5ae6-416f-bb4d-84a33a91fce8", + "metadata": {}, + "outputs": [], + "source": [ + "SYMBOL = \"BTCUSDT\"\n", + "TIMEFRAME = \"1 hour\"\n", + "START = \"one year ago\"\n", + "\n", + "LAST_N_BARS = 24\n", + "PRED_N_BARS = 12\n", + "\n", + "GIF_FNAME = \"projections.gif\"\n", + "GIF_N_BARS = 72\n", + "GIF_FPS = 4\n", + "GIF_PAD = 0.01" + ] + }, + { + "cell_type": "markdown", + "id": "e4667d70-f1d9-4f34-81ff-fdf8320477ae", + "metadata": {}, + "source": [ + "We will execute the analysis using price data retrieved from BinanceData, based on the parameters we previously defined." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b797e0ff-320d-456b-91df-1e0e369d83a9", + "metadata": {}, + "outputs": [], + "source": [ + "data = vbt.BinanceData.pull(SYMBOL, timeframe=TIMEFRAME, start=START)" + ] + }, + { + "cell_type": "markdown", + "id": "43fade8d-2d1f-492b-88bb-95facd21ceda", + "metadata": {}, + "source": [ + "## Find and plot projections" + ] + }, + { + "cell_type": "markdown", + "id": "0013fab2-d1fa-4777-99e9-2081a90444e3", + "metadata": {}, + "source": [ + "Let's write a function that analyzes the most recent price trend and employs it as a pattern to identify similar price movements in historical data. This pattern recognition function will focus exclusively on segments of price history having a comparable percentage change from their respective starting points." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f7f4ead3-c4db-47d5-8a30-3f7dbe4347dc", + "metadata": {}, + "outputs": [], + "source": [ + "def find_patterns(data):\n", + " price = data.hlc3\n", + " pattern = price.values[-LAST_N_BARS:]\n", + " pattern_ranges = price.vbt.find_pattern(\n", + " pattern=pattern,\n", + " rescale_mode=\"rebase\",\n", + " overlap_mode=\"allow\",\n", + " wrapper_kwargs=dict(freq=TIMEFRAME)\n", + " )\n", + " pattern_ranges = pattern_ranges.status_closed\n", + " return pattern_ranges\n", + "\n", + "pattern_ranges = find_patterns(data)\n", + "print(pattern_ranges.count())" + ] + }, + { + "cell_type": "markdown", + "id": "6dc1f00c-f0a2-4b74-831f-3043c14f1195", + "metadata": {}, + "source": [ + "We have identified a number of price segments that closely resemble the latest price trend. Now, we'll write a function that extracts the price data immediately succeeding each identified segment and plots these as extensions of the price trend. These subsequent segments are known as \"projections.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9fb7b02c-190a-488e-bfa6-843db23c324e", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_projections(data, pattern_ranges, **kwargs):\n", + " projection_ranges = pattern_ranges.with_delta(\n", + " PRED_N_BARS,\n", + " open=data.open,\n", + " high=data.high,\n", + " low=data.low,\n", + " close=data.close,\n", + " )\n", + " projection_ranges = projection_ranges.status_closed\n", + " return projection_ranges.plot_projections(\n", + " plot_past_period=LAST_N_BARS, \n", + " **kwargs,\n", + " )\n", + "\n", + "plot_projections(data, pattern_ranges, plot_bands=False).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "8df73436-c6ae-411b-8c44-e5764f9c1812", + "metadata": {}, + "source": [ + "As we can see, similar price movements have historically branched into a diverse set of trajectories. For a visually compelling and statistically robust forecast, we will display the confidence bands encompassing all the projections, with 60% of these projections falling between the upper and lower bands." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b97458a5-7428-4877-80c6-a522aef4b5ce", + "metadata": {}, + "outputs": [], + "source": [ + "plot_projections(data, pattern_ranges, plot_bands=True).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "9011e2c5-1745-480c-b9da-c031f6ba9ae2", + "metadata": {}, + "source": [ + "## Generate animation" + ] + }, + { + "cell_type": "markdown", + "id": "ac05a0ea-6883-4736-a815-619f76607966", + "metadata": {}, + "source": [ + "Lastly, we will compile a GIF animation that iterates through a specified range of bars, applying the aforementioned procedure to each bar within that range." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6238530e-9d06-4da4-a71d-3ae7489c2c9a", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_frame(frame_index, **kwargs):\n", + " sub_data = data.loc[:frame_index[-1]]\n", + " pattern_ranges = find_patterns(sub_data)\n", + " if pattern_ranges.count() < 3:\n", + " return None\n", + " return plot_projections(sub_data, pattern_ranges, **kwargs)\n", + "\n", + "vbt.save_animation(\n", + " GIF_FNAME,\n", + " data.index[-GIF_N_BARS:],\n", + " plot_frame,\n", + " plot_projections=False,\n", + " delta=1,\n", + " fps=GIF_FPS,\n", + " writer_kwargs=dict(loop=0),\n", + " yaxis_range=[\n", + " data.low.iloc[-GIF_N_BARS:].min() * (1 - GIF_PAD), \n", + " data.high.iloc[-GIF_N_BARS:].max() * (1 + GIF_PAD)\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "91b825fb-7e4c-4d48-ae73-bffe633a6f52", + "metadata": {}, + "source": [ + "Bear in mind that while the confidence bands describe past performance, they should not be used as guarantees of future results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "319a24bb-e210-4d02-ab2c-0ce58b3dc82c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/to_explore/notebooks/BasicRSI.ipynb b/to_explore/notebooks/BasicRSI.ipynb new file mode 100644 index 0000000..00a425b --- /dev/null +++ b/to_explore/notebooks/BasicRSI.ipynb @@ -0,0 +1,700 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "be764faf-65cc-408a-8ebd-96b8b4f14b60", + "metadata": {}, + "source": [ + "# Basic RSI strategy" + ] + }, + { + "cell_type": "markdown", + "id": "d15aa106-cb66-4fc8-b07d-347c078c634a", + "metadata": { + "tags": [] + }, + "source": [ + "## Single backtest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dee14aee-a14c-4e54-bf82-dbe9f64cbd62", + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro import *\n", + "# whats_imported()\n", + "\n", + "vbt.settings.set_theme('dark')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32105542-882d-4403-a86c-fdcd8002e258", + "metadata": {}, + "outputs": [], + "source": [ + "data = vbt.BinanceData.pull('BTCUSDT')\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "843dc221-4353-47b7-9f3a-68b6b8ba8752", + "metadata": {}, + "outputs": [], + "source": [ + "data.data['BTCUSDT'].vbt.ohlcv.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f14f65d8-a8b8-4cb0-a135-258b01b5ff83", + "metadata": {}, + "outputs": [], + "source": [ + "data.data['BTCUSDT'].info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3ef3cc2-c70f-4469-8067-fe5e1c6a06f3", + "metadata": {}, + "outputs": [], + "source": [ + "open_price = data.get('Open')\n", + "close_price = data.get('Close')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4de98bf9-7f0e-48d8-b532-a917500d1f18", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.IF.list_indicators(\"RSI*\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af3df97f-b81e-4186-bf8e-adb50b29c5c0", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.indicator(\"talib:RSI\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ce59b4c-7e47-4484-be62-0e26a2dc177e", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.RSI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70589f4f-fa0f-448f-95f6-77c4833beede", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.talib('RSI')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ba90109-ca96-49c5-8ea9-2ad90755b43a", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.ta('RSIIndicator')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd56e11e-bf9e-4304-842f-6df7b2b0b738", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.pandas_ta('RSI')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37d1140d-5ef0-4d22-9100-2b29e0e9f2ed", + "metadata": {}, + "outputs": [], + "source": [ + "print(vbt.format_func(vbt.RSI.run))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bee38f74-6c29-4cc9-a37e-25fe44c08bc8", + "metadata": {}, + "outputs": [], + "source": [ + "rsi = vbt.RSI.run(open_price)\n", + "rsi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "943a0d3c-c8f3-4359-9387-744e5bb3da44", + "metadata": {}, + "outputs": [], + "source": [ + "rsi.rsi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69636dd4-8592-48b8-b4cb-fabf7cbd7b15", + "metadata": {}, + "outputs": [], + "source": [ + "entries = rsi.rsi.vbt.crossed_below(30)\n", + "entries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b073280b-69ea-43ba-8694-4c2b506be57e", + "metadata": {}, + "outputs": [], + "source": [ + "exits = rsi.rsi.vbt.crossed_above(70)\n", + "exits" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93ea75b8-5b75-4767-8adf-b535c9588147", + "metadata": {}, + "outputs": [], + "source": [ + "entries = rsi.rsi_crossed_below(30)\n", + "exits = rsi.rsi_crossed_above(70)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c127d5f5-eee4-4d6a-ae26-7592f5d3ab65", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_rsi(rsi, entries, exits):\n", + " fig = rsi.plot()\n", + " entries.vbt.signals.plot_as_entries(rsi.rsi, fig=fig)\n", + " exits.vbt.signals.plot_as_exits(rsi.rsi, fig=fig)\n", + " return fig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6743e20c-f2e6-4b64-9934-022fcc10acb7", + "metadata": {}, + "outputs": [], + "source": [ + "plot_rsi(rsi, entries, exits).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e1ef2e8-e225-4d60-891a-6c3e9cc2821b", + "metadata": {}, + "outputs": [], + "source": [ + "clean_entries, clean_exits = entries.vbt.signals.clean(exits)\n", + "\n", + "plot_rsi(rsi, clean_entries, clean_exits).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d2d4028-034b-4211-9a24-b82944b264da", + "metadata": {}, + "outputs": [], + "source": [ + "clean_entries.vbt.signals.total()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aeb5bbf3-1217-4c2f-a53a-3d426ac069c0", + "metadata": {}, + "outputs": [], + "source": [ + "clean_exits.vbt.signals.total()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f64bd28c-6dc9-476d-9bb1-a2e137a39dfe", + "metadata": {}, + "outputs": [], + "source": [ + "ranges = clean_entries.vbt.signals.between_ranges(target=clean_exits)\n", + "ranges.duration.mean(wrap_kwargs=dict(to_timedelta=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f55e1c9-9960-43ea-b148-41a6131caa7b", + "metadata": {}, + "outputs": [], + "source": [ + "pf = vbt.Portfolio.from_signals(\n", + " close=close_price, \n", + " entries=clean_entries, \n", + " exits=clean_exits,\n", + " size=100,\n", + " size_type='value',\n", + " init_cash='auto'\n", + ")\n", + "pf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02c8de50-5101-4a23-9d58-9c06c1ae4460", + "metadata": {}, + "outputs": [], + "source": [ + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6377f72-4174-49a1-8a9c-ad8095bc4437", + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot(settings=dict(bm_returns=False)).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "c1f78a0a-7bc9-4b32-9f3c-92d0da44d87b", + "metadata": {}, + "source": [ + "## Multiple backtests" + ] + }, + { + "cell_type": "markdown", + "id": "909f506d-8edb-4fa8-8425-5e9c4b6e7b1b", + "metadata": {}, + "source": [ + "### Using for-loop" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a781ea63-d4df-41c5-820e-195afd3770c9", + "metadata": {}, + "outputs": [], + "source": [ + "def test_rsi(window=14, wtype=\"wilder\", lower_th=30, upper_th=70):\n", + " rsi = vbt.RSI.run(open_price, window=window, wtype=wtype)\n", + " entries = rsi.rsi_crossed_below(lower_th)\n", + " exits = rsi.rsi_crossed_above(upper_th)\n", + " pf = vbt.Portfolio.from_signals(\n", + " close=close_price, \n", + " entries=entries, \n", + " exits=exits,\n", + " size=100,\n", + " size_type='value',\n", + " init_cash='auto')\n", + " return pf.stats([\n", + " 'total_return', \n", + " 'total_trades', \n", + " 'win_rate', \n", + " 'expectancy'\n", + " ])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d60e7721-a0bb-4f2d-af92-d074ee523582", + "metadata": {}, + "outputs": [], + "source": [ + "test_rsi()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14e9e01d-9bcf-4e2c-8959-f5c66e1768c7", + "metadata": {}, + "outputs": [], + "source": [ + "test_rsi(lower_th=20, upper_th=80)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7864dbb-5a24-456a-bda2-468e737fa8ea", + "metadata": {}, + "outputs": [], + "source": [ + "from itertools import product\n", + "\n", + "lower_ths = range(20, 31)\n", + "upper_ths = range(70, 81)\n", + "th_combs = list(product(lower_ths, upper_ths))\n", + "len(th_combs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fbc7c48e-e5e3-4ec7-8f76-7169d58231de", + "metadata": {}, + "outputs": [], + "source": [ + "comb_stats = [\n", + " test_rsi(lower_th=lower_th, upper_th=upper_th)\n", + " for lower_th, upper_th in th_combs\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8fbae677-56f9-49bc-88f7-325ff8ee00c4", + "metadata": {}, + "outputs": [], + "source": [ + "comb_stats_df = pd.DataFrame(comb_stats)\n", + "print(comb_stats_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a99e9ded-2954-40c8-93c5-ebff4a174036", + "metadata": {}, + "outputs": [], + "source": [ + "comb_stats_df.index = pd.MultiIndex.from_tuples(\n", + " th_combs, \n", + " names=['lower_th', 'upper_th'])\n", + "print(comb_stats_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69511f79-00cd-40e2-b64f-097137bba69e", + "metadata": {}, + "outputs": [], + "source": [ + "comb_stats_df['Expectancy'].vbt.heatmap().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "037bfdd0-e3dc-4ac5-9c0e-91c3320fcafd", + "metadata": {}, + "source": [ + "### Using columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac5bc6ba-c6e1-4ca8-98fd-19b141a4220a", + "metadata": {}, + "outputs": [], + "source": [ + "windows = list(range(8, 21))\n", + "wtypes = [\"simple\", \"exp\", \"wilder\"]\n", + "lower_ths = list(range(20, 31))\n", + "upper_ths = list(range(70, 81))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb6145b0-6ee7-4ac2-a8c3-5bd7127032be", + "metadata": {}, + "outputs": [], + "source": [ + "rsi = vbt.RSI.run(\n", + " open_price, \n", + " window=windows, \n", + " wtype=wtypes, \n", + " param_product=True)\n", + "rsi.rsi.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f374086a-bf84-425c-a857-0ce4ea8d6a28", + "metadata": {}, + "outputs": [], + "source": [ + "lower_ths_prod, upper_ths_prod = zip(*product(lower_ths, upper_ths))\n", + "len(lower_ths_prod)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "069d9ffe-cdda-45af-832d-62bfb47ede42", + "metadata": {}, + "outputs": [], + "source": [ + "len(upper_ths_prod)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bd96a283-b9bd-4824-9218-86efe06b8cc6", + "metadata": {}, + "outputs": [], + "source": [ + "lower_th_index = vbt.Param(lower_ths_prod, name='lower_th')\n", + "entries = rsi.rsi_crossed_below(lower_th_index)\n", + "entries.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8510247-f1e1-4af7-8b14-0fb6d6f5813b", + "metadata": {}, + "outputs": [], + "source": [ + "upper_th_index = vbt.Param(upper_ths_prod, name='upper_th')\n", + "exits = rsi.rsi_crossed_above(upper_th_index)\n", + "exits.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "209be8ac-393d-4a43-b752-022f3cfd8698", + "metadata": {}, + "outputs": [], + "source": [ + "pf = vbt.Portfolio.from_signals(\n", + " close=close_price, \n", + " entries=entries, \n", + " exits=exits,\n", + " size=100,\n", + " size_type='value',\n", + " init_cash='auto'\n", + ")\n", + "pf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15143c2b-b795-493c-9e8f-5aa0d877d391", + "metadata": {}, + "outputs": [], + "source": [ + "stats_df = pf.stats([\n", + " 'total_return', \n", + " 'total_trades', \n", + " 'win_rate', \n", + " 'expectancy'\n", + "], agg_func=None)\n", + "print(stats_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d65043d-2081-4179-bc94-d42d1465a898", + "metadata": {}, + "outputs": [], + "source": [ + ">>> print(pf.getsize())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "862f2e98-dea6-4c7e-b5e9-21c16ca27de2", + "metadata": {}, + "outputs": [], + "source": [ + ">>> np.product(pf.wrapper.shape) * 8 / 1024 / 1024" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abea668f-5f95-44d3-a158-38440f70433d", + "metadata": {}, + "outputs": [], + "source": [ + "stats_df['Expectancy'].groupby('rsi_window').mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df815592-9f63-496e-8d88-99b1462d1030", + "metadata": {}, + "outputs": [], + "source": [ + "print(stats_df.sort_values(by='Expectancy', ascending=False).head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1cb39bea-298c-4202-a996-c84b157f1ab9", + "metadata": {}, + "outputs": [], + "source": [ + "pf[(22, 80, 20, \"wilder\")].plot_value().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bced4fa8-f7eb-49e4-980b-f0f4acd20540", + "metadata": {}, + "outputs": [], + "source": [ + "data = vbt.BinanceData.pull(['BTCUSDT', 'ETHUSDT'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55e86189-8244-4955-b736-f4f4f840b80a", + "metadata": {}, + "outputs": [], + "source": [ + "open_price = data.get('Open')\n", + "close_price = data.get('Close')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bd7aefae-3f83-48ac-8f20-7d0a4760cd4b", + "metadata": {}, + "outputs": [], + "source": [ + "rsi = vbt.RSI.run(\n", + " open_price, \n", + " window=windows, \n", + " wtype=wtypes, \n", + " param_product=True)\n", + "entries = rsi.rsi_crossed_below(lower_th_index)\n", + "exits = rsi.rsi_crossed_above(upper_th_index)\n", + "pf = vbt.Portfolio.from_signals(\n", + " close=close_price, \n", + " entries=entries, \n", + " exits=exits,\n", + " size=100,\n", + " size_type='value',\n", + " init_cash='auto'\n", + ")\n", + "stats_df = pf.stats([\n", + " 'total_return', \n", + " 'total_trades', \n", + " 'win_rate', \n", + " 'expectancy'\n", + "], agg_func=None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7007b6a-bf08-40a6-a4be-091fec376290", + "metadata": {}, + "outputs": [], + "source": [ + "stats_df.index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a67661e7-d8df-4efe-8479-93d194ca255d", + "metadata": {}, + "outputs": [], + "source": [ + "eth_mask = stats_df.index.get_level_values('symbol') == 'ETHUSDT'\n", + "btc_mask = stats_df.index.get_level_values('symbol') == 'BTCUSDT'\n", + "pd.DataFrame({\n", + " 'ETHUSDT': stats_df[eth_mask]['Expectancy'].values,\n", + " 'BTCUSDT': stats_df[btc_mask]['Expectancy'].values\n", + "}).vbt.histplot(xaxis=dict(title=\"Expectancy\")).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a64e903-1256-4e4b-9d22-667f2706c9da", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/to_explore/notebooks/CrossValidation.ipynb b/to_explore/notebooks/CrossValidation.ipynb new file mode 100644 index 0000000..e4ecffc --- /dev/null +++ b/to_explore/notebooks/CrossValidation.ipynb @@ -0,0 +1,3245 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c69786c2-7c5b-4b1e-bd0c-52380c8df261", + "metadata": {}, + "source": [ + "# Cross-validation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2751478-95ce-47d2-9e80-4d47ed0c7b36", + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro import *\n", + "# whats_imported()\n", + "\n", + "vbt.settings.set_theme(\"dark\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b45492f-d152-43e2-88a9-ab95bbffd546", + "metadata": {}, + "outputs": [], + "source": [ + "data = vbt.BinanceData.pull(\"BTCUSDT\", end=\"2022-11-01 UTC\")\n", + "data.index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45122110-905e-4549-92f4-205c98bd9faa", + "metadata": {}, + "outputs": [], + "source": [ + "@vbt.parameterized(merge_func=\"concat\")\n", + "def sma_crossover_perf(data, fast_window, slow_window):\n", + " fast_sma = data.run(\"sma\", fast_window, short_name=\"fast_sma\")\n", + " slow_sma = data.run(\"sma\", slow_window, short_name=\"slow_sma\")\n", + " entries = fast_sma.real_crossed_above(slow_sma)\n", + " exits = fast_sma.real_crossed_below(slow_sma)\n", + " pf = vbt.Portfolio.from_signals(\n", + " data, entries, exits, direction=\"both\")\n", + " return pf.sharpe_ratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93b5c9e1-7d18-4268-9c2f-91a5404bba0a", + "metadata": {}, + "outputs": [], + "source": [ + "perf = sma_crossover_perf(\n", + " data[\"2020\":\"2020\"],\n", + " vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n", + " vbt.Param(np.arange(5, 50)),\n", + " _execute_kwargs=dict(\n", + " clear_cache=50,\n", + " collect_garbage=50\n", + " )\n", + ")\n", + "perf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "804d9d36-d0c6-46ab-b0c4-5d00285f6b36", + "metadata": {}, + "outputs": [], + "source": [ + "perf.sort_values(ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f9a5d58-aebe-4a34-8847-0bc91817e083", + "metadata": {}, + "outputs": [], + "source": [ + "best_fast_window, best_slow_window = perf.idxmax()\n", + "sma_crossover_perf(\n", + " data[\"2021\":\"2021\"],\n", + " best_fast_window,\n", + " best_slow_window\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "933ad509-009a-4f7a-9de2-6e04d23a9c6f", + "metadata": {}, + "outputs": [], + "source": [ + "data[\"2021\":\"2021\"].run(\"from_holding\").sharpe_ratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "075e0d7a-bb55-4d69-bcbe-277497058dc2", + "metadata": {}, + "outputs": [], + "source": [ + "start_index = data.index[0]\n", + "period = pd.Timedelta(days=180)\n", + "all_is_bounds = {}\n", + "all_is_bl_perf = {}\n", + "all_is_perf = {}\n", + "all_oos_bounds = {}\n", + "all_oos_bl_perf = {}\n", + "all_oos_perf = {}\n", + "split_idx = 0\n", + "period_idx = 0\n", + "\n", + "with vbt.ProgressBar() as pbar:\n", + " while start_index + 2 * period <= data.index[-1]:\n", + " pbar.set_prefix(str(start_index))\n", + "\n", + " is_start_index = start_index\n", + " is_end_index = start_index + period - pd.Timedelta(nanoseconds=1)\n", + " is_data = data[is_start_index : is_end_index]\n", + " is_bl_perf = is_data.run(\"from_holding\").sharpe_ratio\n", + " is_perf = sma_crossover_perf(\n", + " is_data,\n", + " vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n", + " vbt.Param(np.arange(5, 50)),\n", + " _execute_kwargs=dict(\n", + " clear_cache=50,\n", + " collect_garbage=50\n", + " )\n", + " )\n", + "\n", + " oos_start_index = start_index + period\n", + " oos_end_index = start_index + 2 * period - pd.Timedelta(nanoseconds=1)\n", + " oos_data = data[oos_start_index : oos_end_index]\n", + " oos_bl_perf = oos_data.run(\"from_holding\").sharpe_ratio\n", + " best_fw, best_sw = is_perf.idxmax()\n", + " oos_perf = sma_crossover_perf(oos_data, best_fw, best_sw)\n", + " oos_perf_index = is_perf.index[is_perf.index == (best_fw, best_sw)]\n", + " oos_perf = pd.Series([oos_perf], index=oos_perf_index)\n", + "\n", + " all_is_bounds[period_idx] = (is_start_index, is_end_index)\n", + " all_oos_bounds[period_idx + 1] = (oos_start_index, oos_end_index)\n", + " all_is_bl_perf[(split_idx, period_idx)] = is_bl_perf\n", + " all_oos_bl_perf[(split_idx, period_idx + 1)] = oos_bl_perf\n", + " all_is_perf[(split_idx, period_idx)] = is_perf\n", + " all_oos_perf[(split_idx, period_idx + 1)] = oos_perf\n", + " start_index = start_index + period\n", + " split_idx += 1\n", + " period_idx += 1\n", + " pbar.update()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9cb8334-c44c-4580-819a-6eb501e6bb1b", + "metadata": {}, + "outputs": [], + "source": [ + "is_period_ranges = pd.DataFrame.from_dict(\n", + " all_is_bounds, \n", + " orient=\"index\",\n", + " columns=[\"start\", \"end\"]\n", + ")\n", + "is_period_ranges.index.name = \"period\"\n", + "oos_period_ranges = pd.DataFrame.from_dict(\n", + " all_oos_bounds, \n", + " orient=\"index\",\n", + " columns=[\"start\", \"end\"]\n", + ")\n", + "oos_period_ranges.index.name = \"period\"\n", + "period_ranges = pd.concat((is_period_ranges, oos_period_ranges))\n", + "period_ranges = period_ranges.drop_duplicates()\n", + "print(period_ranges)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9800c089-7514-4a16-a72c-65ab7455ae26", + "metadata": {}, + "outputs": [], + "source": [ + "is_bl_perf = pd.Series(all_is_bl_perf)\n", + "is_bl_perf.index.names = [\"split\", \"period\"]\n", + "oos_bl_perf = pd.Series(all_oos_bl_perf)\n", + "oos_bl_perf.index.names = [\"split\", \"period\"]\n", + "bl_perf = pd.concat((\n", + " is_bl_perf.vbt.select_levels(\"period\"), \n", + " oos_bl_perf.vbt.select_levels(\"period\")\n", + "))\n", + "bl_perf = bl_perf.drop_duplicates()\n", + "bl_perf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44f04653-946d-47ec-8857-f063f07e7cc3", + "metadata": {}, + "outputs": [], + "source": [ + "is_perf = pd.concat(all_is_perf, names=[\"split\", \"period\"])\n", + "is_perf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f20ae091-36b8-4811-b789-2a99dc9f0f81", + "metadata": {}, + "outputs": [], + "source": [ + "oos_perf = pd.concat(all_oos_perf, names=[\"split\", \"period\"])\n", + "oos_perf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e73f8adc-bf80-4955-a86e-ebcb5f8254b5", + "metadata": {}, + "outputs": [], + "source": [ + "is_best_mask = is_perf.index.vbt.drop_levels(\"period\").isin(\n", + " oos_perf.index.vbt.drop_levels(\"period\"))\n", + "is_best_perf = is_perf[is_best_mask]\n", + "is_best_perf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b77ffdd0-dbd2-4473-9eb4-ad7d36fc7625", + "metadata": {}, + "outputs": [], + "source": [ + "print(pd.concat((\n", + " is_perf.describe(),\n", + " is_best_perf.describe(),\n", + " is_bl_perf.describe(),\n", + " oos_perf.describe(),\n", + " oos_bl_perf.describe()\n", + "), axis=1, keys=[\n", + " \"IS\", \n", + " \"IS (Best)\", \n", + " \"IS (Baseline)\", \n", + " \"OOS (Test)\", \n", + " \"OOS (Baseline)\"\n", + "]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0f37d37-8051-4f48-b6bd-ad9ee882bd0b", + "metadata": {}, + "outputs": [], + "source": [ + "fig = is_perf.vbt.boxplot(\n", + " by_level=\"period\",\n", + " trace_kwargs=dict(\n", + " line=dict(color=\"lightskyblue\"), \n", + " opacity=0.4,\n", + " showlegend=False\n", + " ),\n", + " xaxis_title=\"Period\", \n", + " yaxis_title=\"Sharpe\",\n", + ")\n", + "fig = is_best_perf.vbt.select_levels(\"period\").vbt.plot(\n", + " trace_kwargs=dict(\n", + " name=\"Best\", \n", + " line=dict(color=\"limegreen\", dash=\"dash\")\n", + " ), \n", + " fig=fig\n", + ")\n", + "fig = bl_perf.vbt.plot(\n", + " trace_kwargs=dict(\n", + " name=\"Baseline\", \n", + " line=dict(color=\"orange\", dash=\"dash\")\n", + " ), \n", + " fig=fig\n", + ")\n", + "fig = oos_perf.vbt.select_levels(\"period\").vbt.plot(\n", + " trace_kwargs=dict(\n", + " name=\"Test\", \n", + " line=dict(color=\"orangered\")\n", + " ), \n", + " fig=fig\n", + ")\n", + "fig.show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3e52247-958a-40ba-af30-7a5f1a55b744", + "metadata": {}, + "outputs": [], + "source": [ + "is_perf_split6 = is_perf.xs(6, level=\"split\")\n", + "is_perf_split6.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53a0d648-a49e-48c5-b7a2-23fb8b18db03", + "metadata": {}, + "outputs": [], + "source": [ + "first_left_bound = period_ranges.loc[6, \"start\"]\n", + "first_right_bound = period_ranges.loc[6, \"end\"]\n", + "data[first_left_bound : first_right_bound].plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11a80453-5f53-4a7e-92b8-79834dd546af", + "metadata": {}, + "outputs": [], + "source": [ + "oos_perf.xs(6, level=\"period\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b01f9be-a5b8-433d-aab3-c0c51219ce56", + "metadata": {}, + "outputs": [], + "source": [ + "is_perf_split6.quantile(0.25)" + ] + }, + { + "cell_type": "markdown", + "id": "c0b946fb-81de-4183-9d47-9defc4f0f44e", + "metadata": {}, + "source": [ + "## Splitter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "113d8739-60b1-40d3-b031-984d7c7adf17", + "metadata": {}, + "outputs": [], + "source": [ + "splitter = vbt.Splitter.from_rolling(\n", + " data.index, \n", + " length=360, \n", + " split=0.5,\n", + " set_labels=[\"IS\", \"OOS\"]\n", + ")\n", + "splitter.plot().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "9e0edb2e-8e5b-4554-8c3b-8a84b54279fe", + "metadata": {}, + "source": [ + "### Schema" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92af91ef-5b3e-4869-9ffd-cf5451714e94", + "metadata": {}, + "outputs": [], + "source": [ + "print(splitter.splits)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31376ef0-288f-461e-8fa7-2023c85248bf", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed969ea9-de5b-4501-9093-7628fb3241ae", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.wrapper.index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c204a8e8-a640-4056-bbdd-2bfb81bbd7a7", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.wrapper.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac099a17-7a4a-4227-8782-f71f089e5943", + "metadata": {}, + "outputs": [], + "source": [ + "oos_splitter = splitter[\"OOS\"]\n", + "print(oos_splitter.splits)" + ] + }, + { + "cell_type": "markdown", + "id": "f043f402-5a79-4a74-a355-12bac03e0235", + "metadata": {}, + "source": [ + "#### Range format" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87ab6012-b22b-4a7d-81f7-0b2d8f6ffce9", + "metadata": {}, + "outputs": [], + "source": [ + "index = vbt.date_range(\"2020\", periods=14)\n", + "index[slice(1, 7)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c6462f8-0f46-4237-a499-b63cd961b55b", + "metadata": {}, + "outputs": [], + "source": [ + "index[1], index[6]" + ] + }, + { + "cell_type": "markdown", + "id": "c2289e7e-6dbc-46cd-9026-09716dd283c4", + "metadata": {}, + "source": [ + "##### Relative" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "980722dd-e524-4ab7-9e75-4871fdbcf31d", + "metadata": {}, + "outputs": [], + "source": [ + "rel_range = vbt.RelRange(offset=10, length=40)\n", + "rel_range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e6c987f-ead6-47f2-978a-2c46bbe995b0", + "metadata": {}, + "outputs": [], + "source": [ + "rel_range.to_slice(total_len=len(splitter.index), prev_end=100)" + ] + }, + { + "cell_type": "markdown", + "id": "f835ba92-9d19-484f-aead-6c11c99a4ad6", + "metadata": {}, + "source": [ + "#### Array format" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41c25ca4-bf22-4e11-bade-d115f4b692f0", + "metadata": {}, + "outputs": [], + "source": [ + "index = vbt.date_range(\"2020\", \"2021\", freq=\"1min\")\n", + "range_ = np.arange(len(index))\n", + "range_.nbytes / 1024 / 1024" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91d67769-695f-4a29-83f6-1b09d77f9397", + "metadata": {}, + "outputs": [], + "source": [ + "range_ = np.full(len(index), True)\n", + "range_.nbytes / 1024 / 1024" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95a932ae-317d-4168-81ba-4045716d6eda", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.splits_arr.dtype" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f73dab8c-8488-48bc-8824-9f263116fa9b", + "metadata": {}, + "outputs": [], + "source": [ + "id(slice(0, 180, None))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "328d2ec2-d2cc-4550-b0ae-273287cdd05f", + "metadata": {}, + "outputs": [], + "source": [ + "range_00 = np.arange(0, 5)\n", + "range_01 = np.arange(5, 15)\n", + "range_10 = np.arange(15, 30)\n", + "range_11 = np.arange(30, 50)\n", + "\n", + "ind_splitter = vbt.Splitter.from_splits(\n", + " data.index,\n", + " [[range_00, range_01], [range_10, range_11]],\n", + " fix_ranges=False\n", + ")\n", + "print(ind_splitter.splits)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "deae0ac0-0218-4835-aedd-6dbc5696cc75", + "metadata": {}, + "outputs": [], + "source": [ + "ind_splitter.splits.loc[0, \"set_1\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35f60dad-3a36-4c62-b07d-c36fb7a1b9da", + "metadata": {}, + "outputs": [], + "source": [ + "ind_splitter.splits.loc[0, \"set_1\"].range_" + ] + }, + { + "cell_type": "markdown", + "id": "6326885f-9be3-4416-a868-187bfc2808d6", + "metadata": {}, + "source": [ + "### Preparation" + ] + }, + { + "cell_type": "markdown", + "id": "79a85a1f-eae2-49ff-b239-574ff90eadc4", + "metadata": {}, + "source": [ + "#### Splits" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "acad6284-91bb-48d0-86bd-f3324236fa40", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.split_range(\n", + " slice(None),\n", + " (vbt.RelRange(length=0.75), vbt.RelRange()),\n", + " index=data.index\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4674b573-34fd-4d15-bc81-0adda3784086", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.split_range(\n", + " slice(None),\n", + " (vbt.RelRange(length=0.75), vbt.RelRange())\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "244f8814-d37e-4804-97fc-4586223f308d", + "metadata": {}, + "outputs": [], + "source": [ + "data[slice(0, 1426, None)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2637af2-a0d0-47bf-bf97-a9cfc9143a06", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.split_range(\n", + " slice(None), \n", + " 0.75, \n", + " index=data.index\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d79151bf-1d01-436b-a4b4-3f73e1016e4f", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.split_range(\n", + " slice(None), \n", + " -0.25,\n", + " index=data.index\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab0e6009-1b0e-4f0f-838a-cfe4a87f7406", + "metadata": {}, + "outputs": [], + "source": [ + "int(0.75 * len(data.index))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c87ffde6-b565-457a-bb97-0be21789705e", + "metadata": {}, + "outputs": [], + "source": [ + "len(data.index) - int(0.25 * len(data.index))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "653cc6d7-2236-4b12-ba92-a115cab15656", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.split_range(\n", + " slice(None), \n", + " (vbt.RelRange(), vbt.RelRange(length=0.25)),\n", + " backwards=True,\n", + " index=data.index\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0065dda0-0bfd-4ce0-b6bc-693c7f0960a5", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.split_range(\n", + " slice(None), \n", + " (1.0, 30), \n", + " backwards=True,\n", + " index=data.index\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a4c1bc39-70e6-4b94-97d5-1472c705e70b", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.split_range(\n", + " slice(None), \n", + " (\n", + " vbt.RelRange(length=0.4, length_space=\"all\"), \n", + " vbt.RelRange(length=0.4, length_space=\"all\"),\n", + " vbt.RelRange()\n", + " ),\n", + " index=data.index\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2c201c9-f114-4f3b-ad47-bcee006d7953", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.split_range(\n", + " slice(None),\n", + " (vbt.RelRange(length=0.75), vbt.RelRange(offset=1)),\n", + " index=data.index\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f23a330-cb2f-4a15-8a09-08504c4cd48a", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.split_range(\n", + " slice(None), \n", + " (\n", + " vbt.RelRange(length=0.75), \n", + " vbt.RelRange(length=1, is_gap=True),\n", + " vbt.RelRange()\n", + " ),\n", + " index=data.index\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "958e0e1c-2735-4568-b05b-b29c8b4f7560", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.split_range(\n", + " slice(None), \n", + " (np.array([3, 4, 5]), np.array([6, 8, 10])),\n", + " index=data.index\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a60e0aaa-d581-4530-a747-3067c8678650", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.split_range(\n", + " slice(None), \n", + " (np.array([3, 4, 5]), np.array([6, 8, 10])),\n", + " range_format=\"indices\",\n", + " index=data.index\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4249fa3b-ef6f-41b7-8d8d-81723e6ce48a", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.split_range(\n", + " slice(None), \n", + " (slice(\"2020\", \"2021\"), slice(\"2021\", \"2022\")),\n", + " index=data.index\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bfbae80f-a314-41f9-8c49-2111f4df01d9", + "metadata": {}, + "outputs": [], + "source": [ + "data.index[867:1233]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "debcb754-e691-4e59-b318-61486af3fbf4", + "metadata": {}, + "outputs": [], + "source": [ + "data.index[1233:1598]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d1340f2-d1b9-4fbe-9898-d41620e3d30d", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.split_range(\n", + " slice(None), \n", + " (\n", + " vbt.RelRange(length=\"180 days\"), \n", + " vbt.RelRange(offset=\"1 day\", length=\"90 days\")\n", + " ),\n", + " index=data.index\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8638db99-b5a3-4ce8-adcc-f3ebf7c59b71", + "metadata": {}, + "source": [ + "#### Method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6a99b43-4326-4d37-85fc-2f6b0076ebd4", + "metadata": {}, + "outputs": [], + "source": [ + "manual_splitter = vbt.Splitter.from_splits(\n", + " data.index,\n", + " [\n", + " (vbt.RelRange(), vbt.RelRange(offset=0.5, length=0.25, length_space=\"all\")),\n", + " (vbt.RelRange(), vbt.RelRange(offset=0.25, length=0.25, length_space=\"all\")),\n", + " (vbt.RelRange(), vbt.RelRange(offset=0, length=0.25, length_space=\"all\")),\n", + " ],\n", + " split_range_kwargs=dict(backwards=True),\n", + " set_labels=[\"IS\", \"OOS\"]\n", + ")\n", + "print(manual_splitter.splits)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "871ceeb6-533d-48ce-803c-49106fc6d807", + "metadata": {}, + "outputs": [], + "source": [ + "manual_splitter.plot().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "22246385-ae17-47db-8815-71788c60453d", + "metadata": {}, + "source": [ + "### Generation" + ] + }, + { + "cell_type": "markdown", + "id": "e6dbecb8-fd62-4a3d-9682-24deb81a8da0", + "metadata": {}, + "source": [ + "#### Rolling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0ab8ccc-dfe7-4612-8a39-3017795e0486", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.from_rolling(\n", + " data.index,\n", + " length=360,\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02d5d230-30c0-4156-a493-6ef4157d727a", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.from_rolling(\n", + " data.index, \n", + " length=360,\n", + " offset=90\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba270114-7b18-45d0-a542-7bcfe3c0ddc3", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.from_rolling(\n", + " data.index, \n", + " length=360,\n", + " offset=-0.5\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f47fb9c2-eebd-40f0-b901-da3dd54ae721", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.from_rolling(\n", + " data.index, \n", + " length=360,\n", + " split=0.5\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66314ec5-2a84-4343-8b08-9712bccc2d37", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.from_rolling(\n", + " data.index,\n", + " length=360,\n", + " split=0.5,\n", + " offset_anchor_set=None\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c0661b1-df61-4792-9da6-720d6866e7c5", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.from_n_rolling(\n", + " data.index,\n", + " n=5,\n", + " split=0.5\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34c752b3-ed6c-4752-b835-e6a0003ff1e5", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.from_n_rolling(\n", + " data.index,\n", + " n=3,\n", + " length=360,\n", + " split=0.5\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b40afcc-b3d1-47c3-9f84-f30d1840333e", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.from_n_rolling(\n", + " data.index,\n", + " n=7,\n", + " length=360,\n", + " split=0.5\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21f304b2-205c-4cb3-a231-905e266d437a", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.from_expanding(\n", + " data.index, \n", + " min_length=360,\n", + " offset=180,\n", + " split=-180\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59a2a68e-b863-4526-aa79-6694b825ac4f", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.from_n_expanding(\n", + " data.index, \n", + " n=5,\n", + " min_length=360,\n", + " split=-180\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "2bd6889b-8da8-4bc6-be03-23a4e7eca992", + "metadata": {}, + "source": [ + "#### Anchored" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ef7b028-471b-4033-a3c4-1aaa3354a237", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.from_ranges(\n", + " data.index,\n", + " every=\"Y\",\n", + " split=0.5\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2eed590b-93be-4ae5-967d-8b9c04bf3990", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.from_ranges(\n", + " data.index,\n", + " every=\"Q\",\n", + " lookback_period=\"Y\",\n", + " split=0.5\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69154548-b1e8-4ecd-95fd-9be2f5466ed4", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.from_ranges(\n", + " data.index,\n", + " every=\"Q\",\n", + " lookback_period=\"Y\",\n", + " split=(\n", + " vbt.RepEval(\"index.month != index.month[-1]\"),\n", + " vbt.RepEval(\"index.month == index.month[-1]\")\n", + " )\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1562819f-2715-41b2-977e-6e046aba4e0b", + "metadata": {}, + "outputs": [], + "source": [ + "def qyear(index):\n", + " return index.to_period(\"Q\")\n", + "\n", + "vbt.Splitter.from_ranges(\n", + " data.index,\n", + " start=0,\n", + " fixed_start=True,\n", + " every=\"Q\",\n", + " closed_end=True,\n", + " split=(\n", + " lambda index: qyear(index) != qyear(index)[-1],\n", + " lambda index: qyear(index) == qyear(index)[-1]\n", + " )\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "432485a6-f8f4-4970-8cec-56689fc1969d", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.from_grouper(\n", + " data.index,\n", + " by=\"Y\",\n", + " split=0.5\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5cd1d516-094e-4f32-a202-6d49e700386c", + "metadata": {}, + "outputs": [], + "source": [ + "def is_split_complete(index, split):\n", + " first_range = split[0]\n", + " first_index = index[first_range][0]\n", + " last_range = split[-1]\n", + " last_index = index[last_range][-1]\n", + " return first_index.is_year_start and last_index.is_year_end\n", + "\n", + "vbt.Splitter.from_grouper(\n", + " data.index,\n", + " by=\"Y\",\n", + " split=0.5,\n", + " split_check_template=vbt.RepFunc(is_split_complete)\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6cc38232-edfb-4395-899f-47ed3aa0723c", + "metadata": {}, + "outputs": [], + "source": [ + "def format_split_labels(index, splits_arr):\n", + " years = map(lambda x: index[x[0]][0].year, splits_arr)\n", + " return pd.Index(years, name=\"split_year\")\n", + "\n", + "vbt.Splitter.from_grouper(\n", + " data.index,\n", + " by=\"Y\",\n", + " split=0.5,\n", + " split_check_template=vbt.RepFunc(is_split_complete),\n", + " split_labels=vbt.RepFunc(format_split_labels)\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11d702be-5df5-42de-9491-9b38ac7aae06", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.from_grouper(\n", + " data.index,\n", + " by=data.index.year,\n", + " split=0.5,\n", + " split_check_template=vbt.RepFunc(is_split_complete)\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "38d4f68a-0372-48e5-bc1a-c45171942e7e", + "metadata": {}, + "source": [ + "#### Random" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e72d4e77-c387-4c7e-bcd5-07e8c15ac15c", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.from_n_random(\n", + " data.index,\n", + " n=50,\n", + " min_length=360,\n", + " seed=42,\n", + " split=0.5\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dbeea5da-bceb-4e8e-866b-2a0e608995f2", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Splitter.from_n_random(\n", + " data.index,\n", + " n=50,\n", + " min_length=60,\n", + " max_length=480,\n", + " seed=42,\n", + " split=0.5\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2c41a6c-b579-499c-98af-b7332cdaa7ab", + "metadata": {}, + "outputs": [], + "source": [ + "def start_p_func(i, indices):\n", + " return indices / indices.sum()\n", + "\n", + "vbt.Splitter.from_n_random(\n", + " data.index,\n", + " n=50,\n", + " min_length=60,\n", + " max_length=480,\n", + " seed=42,\n", + " start_p_func=start_p_func,\n", + " split=0.5\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "bf44a54a-5349-4b2e-a92a-03ecdda01e00", + "metadata": {}, + "source": [ + "#### Scikit-learn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3992f4a8-d943-4256-939f-6f3be3767df6", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import KFold\n", + "\n", + "vbt.Splitter.from_sklearn(\n", + " data.index, \n", + " KFold(n_splits=5)\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "43c93cc4-b494-4119-923a-c01e5e84b458", + "metadata": {}, + "source": [ + "#### Dynamic" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a55967e9-58b6-46a4-92eb-47de2c99ee40", + "metadata": {}, + "outputs": [], + "source": [ + "def split_func(index, prev_start):\n", + " if prev_start is None:\n", + " prev_start = index[0]\n", + " new_start = prev_start + pd.offsets.MonthBegin(1)\n", + " new_end = new_start + pd.DateOffset(years=1)\n", + " if new_end > index[-1] + index.freq:\n", + " return None\n", + " return [\n", + " slice(new_start, new_start + pd.offsets.MonthBegin(9)),\n", + " slice(new_start + pd.offsets.MonthBegin(9), new_end)\n", + " ]\n", + "\n", + "vbt.Splitter.from_split_func(\n", + " data.index,\n", + " split_func=split_func,\n", + " split_args=(vbt.Rep(\"index\"), vbt.Rep(\"prev_start\")),\n", + " range_bounds_kwargs=dict(index_bounds=True)\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3dcf9638-36e8-4304-99e4-c30e1970cfd5", + "metadata": {}, + "outputs": [], + "source": [ + "def get_next_monday(from_date):\n", + " if from_date.weekday == 0 and from_date.ceil(\"H\").hour <= 9:\n", + " return from_date.floor(\"D\")\n", + " return from_date.floor(\"D\") + pd.offsets.Week(n=0, weekday=0)\n", + "\n", + "def get_next_business_range(from_date):\n", + " monday_0000 = get_next_monday(from_date)\n", + " monday_0900 = monday_0000 + pd.DateOffset(hours=9)\n", + " friday_1700 = monday_0900 + pd.DateOffset(days=4, hours=8)\n", + " return slice(monday_0900, friday_1700)\n", + "\n", + "def split_func(index, bounds):\n", + " if len(bounds) == 0:\n", + " from_date = index[0]\n", + " else:\n", + " from_date = bounds[-1][1][0]\n", + " train_range = get_next_business_range(from_date)\n", + " test_range = get_next_business_range(train_range.stop)\n", + " if test_range.stop > index[-1] + index.freq:\n", + " return None\n", + " return train_range, test_range\n", + "\n", + "vbt.Splitter.from_split_func(\n", + " vbt.date_range(\"2020-01\", \"2020-03\", freq=\"15min\"),\n", + " split_func=split_func,\n", + " split_args=(vbt.Rep(\"index\"), vbt.Rep(\"bounds\")),\n", + " range_bounds_kwargs=dict(index_bounds=True)\n", + ").plot().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "4ab3b2d1-5fb5-4978-95e0-2ee627abe829", + "metadata": {}, + "source": [ + "### Validation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b84d3f9-22a1-46b3-9593-4ebe18060fc1", + "metadata": {}, + "outputs": [], + "source": [ + "splitter = vbt.Splitter.from_ranges(\n", + " data.index,\n", + " every=\"Y\",\n", + " closed_end=True,\n", + " split=0.5,\n", + " set_labels=[\"IS\", \"OOS\"]\n", + ")\n", + "splitter.plot().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "589b9b90-e825-4cf6-a402-4720fd82e6b0", + "metadata": {}, + "source": [ + "#### Bounds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9aa53234-fab9-4db4-b0f6-955f8c66f467", + "metadata": {}, + "outputs": [], + "source": [ + "bounds_arr = splitter.get_bounds_arr()\n", + "bounds_arr.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f364195f-9ef1-43ee-b8a9-af7835d147a2", + "metadata": {}, + "outputs": [], + "source": [ + "print(bounds_arr)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3051c7d-3186-49a4-9092-d837f79c09c8", + "metadata": {}, + "outputs": [], + "source": [ + "bounds = splitter.get_bounds(index_bounds=True)\n", + "bounds.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d76d199-12db-4574-a8f4-0b87107d8a0d", + "metadata": {}, + "outputs": [], + "source": [ + "print(bounds)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0818b81-9735-481d-b510-14140ba6c576", + "metadata": {}, + "outputs": [], + "source": [ + "bounds.loc[(0, \"OOS\"), \"end\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21737903-0707-410c-a1c1-133178784fcf", + "metadata": {}, + "outputs": [], + "source": [ + "bounds.loc[(1, \"IS\"), \"start\"]" + ] + }, + { + "cell_type": "markdown", + "id": "c5b5d61b-66fe-4057-b699-64d1863d061c", + "metadata": {}, + "source": [ + "#### Masks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32b1ad13-4523-41d9-b424-637f1b25b4a2", + "metadata": {}, + "outputs": [], + "source": [ + "mask = splitter.get_mask()\n", + "mask.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a21e4d35-4403-4d5e-ad67-0086da9444ce", + "metadata": {}, + "outputs": [], + "source": [ + "print(mask)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "328a93e7-7b14-42d8-acc6-3eba2a72ec4f", + "metadata": {}, + "outputs": [], + "source": [ + "mask[\"2021\":\"2021\"].any()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8bccca0a-65be-4810-bd5e-5222edfba6c4", + "metadata": {}, + "outputs": [], + "source": [ + "print(mask.resample(vbt.offset(\"Y\")).sum())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "629ea5be-73c7-4799-8d07-ab28a90df329", + "metadata": {}, + "outputs": [], + "source": [ + "results = []\n", + "for mask in splitter.get_iter_split_masks():\n", + " results.append(mask.resample(vbt.offset(\"Y\")).sum())\n", + "print(pd.concat(results, axis=1, keys=splitter.split_labels))" + ] + }, + { + "cell_type": "markdown", + "id": "ec5cbff7-c630-4d35-a607-cf4f7c482f0f", + "metadata": {}, + "source": [ + "#### Coverage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76bb4bb3-027b-465f-be37-fb44cb567bc6", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.get_split_coverage()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "071e5e14-def1-4133-8682-d4e214810079", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.get_set_coverage()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d885fac2-0a56-4e4e-81d0-52a29b8cd572", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.get_range_coverage()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c4301ee-8998-44d5-b112-0e463f0213ea", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.get_coverage()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d328277-71e9-49c1-8b54-d7ecf08682e2", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.index_bounds.loc[(2, \"OOS\"), \"start\"].is_leap_year" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8e94289-4550-43bf-9cca-f13cb6e195ad", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.get_range_coverage(relative=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c9e5444-4276-42bc-b0c6-0799c7606345", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.get_set_coverage(relative=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf6715aa-0a33-4202-9b38-3dc8f6b24ebc", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.get_split_coverage(overlapping=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1567e6fe-85e5-4075-b4a4-f0490d69bfb4", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.get_set_coverage(overlapping=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1935522-f1f7-460d-939b-8134fe1287e1", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.get_coverage(overlapping=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2466892b-1521-4b7d-94bd-54ab21adf816", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.plot_coverage().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4f46cc9-edeb-42bb-b9d1-fbb26d9510e9", + "metadata": {}, + "outputs": [], + "source": [ + "print(splitter.get_overlap_matrix(by=\"range\", normalize=False))" + ] + }, + { + "cell_type": "markdown", + "id": "0f125754-29e4-4d22-9f0f-defb3db268ce", + "metadata": {}, + "source": [ + "#### Grouping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7af58745-fa11-4a7a-bd1e-833df3d3a8b4", + "metadata": {}, + "outputs": [], + "source": [ + "print(splitter.get_bounds(index_bounds=True, set_group_by=True))" + ] + }, + { + "cell_type": "markdown", + "id": "b5d73373-ed8d-4943-860f-05342ca2c511", + "metadata": {}, + "source": [ + "### Manipulation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2d1d4116-8e59-47d2-9364-c44aa908d2f0", + "metadata": {}, + "outputs": [], + "source": [ + "splitter = vbt.Splitter.from_grouper(\n", + " data.index, \n", + " by=data.index.year.rename(\"split_year\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b9c5b86-7aec-4496-94d9-4c592766e167", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0aa022d7-6eb4-471f-aa5a-ab26c5deee58", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.plots().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98537c1f-efbb-4574-9f57-5bdb07b452b9", + "metadata": {}, + "outputs": [], + "source": [ + "splitter = splitter.iloc[1:-1]\n", + "splitter.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a442aad5-ead3-4d4e-b049-7e3090200ddf", + "metadata": {}, + "outputs": [], + "source": [ + "def new_split(index):\n", + " return [\n", + " np.isin(index.quarter, [1, 2]), \n", + " index.quarter == 3, \n", + " index.quarter == 4\n", + " ]\n", + "\n", + "splitter = splitter.split_set(\n", + " vbt.RepFunc(new_split),\n", + " new_set_labels=[\"train\", \"valid\", \"test\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a938abed-831a-4cb2-9729-9b015d819e38", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f9125b7-6d85-4117-ad6b-37b24bf3c50d", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.plots().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "35f5cfe5-0626-4372-a90b-df68e155d994", + "metadata": {}, + "source": [ + "#### Homework" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d9fb4df-5ca0-43f5-869c-4721c3993402", + "metadata": {}, + "outputs": [], + "source": [ + "splitter = splitter.merge_sets(columns=[\"valid\", \"test\"], new_set_label=\"test\")\n", + "splitter.plots().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "d00fdcdf-0bd1-49ed-ade9-411a2ae3d79c", + "metadata": {}, + "source": [ + "## Applications" + ] + }, + { + "cell_type": "markdown", + "id": "3ebb6158-e395-49e9-bcc8-cd258c17783f", + "metadata": {}, + "source": [ + "### Taking" + ] + }, + { + "cell_type": "markdown", + "id": "1aa23c97-e221-48ed-8a5e-484fda11e955", + "metadata": {}, + "source": [ + "#### Without stacking" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed524fc3-b782-43dc-a8a6-0bced61a3708", + "metadata": {}, + "outputs": [], + "source": [ + "close_slices = splitter.take(data.close)\n", + "close_slices" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74d768fe-ce73-41c1-945f-c21fbb7fd662", + "metadata": {}, + "outputs": [], + "source": [ + "close_slices[2020, \"test\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79ac7f33-08aa-4bb7-87d2-f8e80c94db0c", + "metadata": {}, + "outputs": [], + "source": [ + "def get_total_return(sr):\n", + " return sr.vbt.to_returns().vbt.returns.total()\n", + "\n", + "close_slices.apply(get_total_return)" + ] + }, + { + "cell_type": "markdown", + "id": "02213852-3258-46d6-97a4-32a3a5f19820", + "metadata": {}, + "source": [ + "##### Complex objects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2d3208f-2454-4501-8cd2-e94694238a56", + "metadata": {}, + "outputs": [], + "source": [ + "trendlb = data.run(\"trendlb\", 1.0, 0.5)\n", + "trendlb.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4926dbe3-ecc6-445d-868a-46a88af5756c", + "metadata": {}, + "outputs": [], + "source": [ + "grouper = pd.Index(trendlb.labels.map({1: \"U\", 0: \"D\"}), name=\"trend\")\n", + "trend_splitter = vbt.Splitter.from_grouper(data.index, grouper)\n", + "trend_splitter.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f41b604c-84ca-4ea2-a293-0d40878eb65c", + "metadata": {}, + "outputs": [], + "source": [ + "hold_pf = vbt.Portfolio.from_holding(data)\n", + "hold_returns_acc = hold_pf.returns_acc\n", + "\n", + "fast_sma, slow_sma = vbt.talib(\"SMA\").run_combs(\n", + " data.close, np.arange(5, 50), short_names=[\"fast_sma\", \"slow_sma\"])\n", + "entries = fast_sma.real_crossed_above(slow_sma)\n", + "exits = fast_sma.real_crossed_below(slow_sma)\n", + "strat_pf = vbt.Portfolio.from_signals(\n", + " data, entries, exits, direction=\"both\")\n", + "strat_returns_acc = strat_pf.returns_acc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84335fef-a4c9-44a3-8f68-61ab14e6d01d", + "metadata": {}, + "outputs": [], + "source": [ + "hold_returns_acc_slices = trend_splitter.take(hold_returns_acc)\n", + "strat_returns_acc_slices = trend_splitter.take(strat_returns_acc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "faae3461-56b6-4f50-aad8-f196745375e3", + "metadata": {}, + "outputs": [], + "source": [ + "hold_returns_acc_slices[\"U\"].sharpe_ratio()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91ae1996-5fce-459d-a08c-58950ca7ac83", + "metadata": {}, + "outputs": [], + "source": [ + "strat_returns_acc_slices[\"U\"].sharpe_ratio().vbt.heatmap(\n", + " x_level=\"fast_sma_timeperiod\", \n", + " y_level=\"slow_sma_timeperiod\",\n", + " symmetric=True\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea0824b5-c6b5-48a5-9abd-3b8d582afeca", + "metadata": {}, + "outputs": [], + "source": [ + "hold_returns_acc_slices[\"D\"].sharpe_ratio()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dff164c5-5840-4bda-b25a-b290be490d2e", + "metadata": {}, + "outputs": [], + "source": [ + "strat_returns_acc_slices[\"D\"].sharpe_ratio().vbt.heatmap(\n", + " x_level=\"fast_sma_timeperiod\", \n", + " y_level=\"slow_sma_timeperiod\",\n", + " symmetric=True\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42039308-cbeb-4a20-928c-8c1b4588b84e", + "metadata": {}, + "outputs": [], + "source": [ + "trend_splitter = trend_splitter.break_up_splits(\"by_gap\", sort=True)\n", + "trend_splitter.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e564cf0-f138-4552-afc6-0ce34502e3e8", + "metadata": {}, + "outputs": [], + "source": [ + "strat_pf_slices = strat_pf.split(trend_splitter)\n", + "strat_pf_slices" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "247f3770-249d-4612-bb7c-4c3d64e141f9", + "metadata": {}, + "outputs": [], + "source": [ + "trend_range_perf = strat_pf_slices.apply(lambda pf: pf.sharpe_ratio)\n", + "median_trend_perf = trend_range_perf.median(axis=1)\n", + "median_trend_perf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "483eaa0d-cde1-4c30-873e-cc8d668139f8", + "metadata": {}, + "outputs": [], + "source": [ + "trend_perf_ts = data.symbol_wrapper.fill().rename(\"trend_perf\")\n", + "for label, sr in trend_splitter.bounds.iterrows():\n", + " trend_perf_ts.iloc[sr[\"start\"]:sr[\"end\"]] = median_trend_perf[label]\n", + "data.close.vbt.overlay_with_heatmap(trend_perf_ts).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "7f013d68-bbc6-43b4-ae38-0414781602a4", + "metadata": {}, + "source": [ + "#### Column stacking" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c899aa78-29d8-4833-b307-2949132e0b78", + "metadata": {}, + "outputs": [], + "source": [ + "close_stacked = pd.concat(\n", + " close_slices.values.tolist(), \n", + " axis=1, \n", + " keys=close_slices.index\n", + ")\n", + "print(close_stacked)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e83e120-ee01-4664-81bb-39b4c084d3b6", + "metadata": {}, + "outputs": [], + "source": [ + "get_total_return(close_stacked)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20abc610-b2a9-4ecd-a724-f3c267e71636", + "metadata": {}, + "outputs": [], + "source": [ + "close_stacked = splitter.take(data.close, into=\"stacked\")\n", + "close_stacked.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d67665d-ea55-400c-ace4-bd5a2adaef12", + "metadata": {}, + "outputs": [], + "source": [ + "close_stacked = splitter.take(data.close, into=\"reset_stacked\")\n", + "print(close_stacked)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6a65f09-264e-40d4-a01b-f8ef208e331d", + "metadata": {}, + "outputs": [], + "source": [ + "close_stacked = splitter.take(data.close, into=\"from_end_stacked\")\n", + "print(close_stacked)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25c891ae-ffab-4ef1-a97d-0de12a1c0f63", + "metadata": {}, + "outputs": [], + "source": [ + "close_stacked = splitter.take(data.close, into=\"reset_stacked_by_set\")\n", + "close_stacked" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78f294b1-a07b-4f1e-a43f-bf6b33ec0d13", + "metadata": {}, + "outputs": [], + "source": [ + "print(close_stacked[\"train\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "94613e5d-2c2f-48ba-9714-e95b6d706b7a", + "metadata": {}, + "outputs": [], + "source": [ + "print(182 * 8)\n", + "print(1461 * 8)\n", + "print(1 - 1456 / 11688)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f72feaad-a8d0-4249-9719-7e0f5811f9c5", + "metadata": {}, + "outputs": [], + "source": [ + "index_slices = splitter.take(data.index)\n", + "index_slices" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e0b9776-b7a4-4dc2-bcf5-3f73c47d9d5d", + "metadata": {}, + "outputs": [], + "source": [ + "close_stacked_wb = splitter.take(\n", + " data.close, \n", + " into=\"reset_stacked_by_set\",\n", + " attach_bounds=\"index\",\n", + " right_inclusive=True\n", + ")\n", + "print(close_stacked_wb[\"train\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48b59d45-5ddc-4777-969b-1582b4660055", + "metadata": {}, + "outputs": [], + "source": [ + "@vbt.parameterized(merge_func=\"concat\")\n", + "def set_sma_crossover_perf(close, fast_window, slow_window, freq):\n", + " fast_sma = vbt.talib(\"sma\").run(\n", + " close, fast_window, short_name=\"fast_sma\", hide_params=True) \n", + " slow_sma = vbt.talib(\"sma\").run(\n", + " close, slow_window, short_name=\"slow_sma\", hide_params=True) \n", + " entries = fast_sma.real_crossed_above(slow_sma)\n", + " exits = fast_sma.real_crossed_below(slow_sma)\n", + " pf = vbt.Portfolio.from_signals(\n", + " close, entries, exits, freq=freq, direction=\"both\")\n", + " return pf.sharpe_ratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21ec3795-d820-4f7d-b6fa-de6710fac0cd", + "metadata": {}, + "outputs": [], + "source": [ + "train_perf = set_sma_crossover_perf(\n", + " close_stacked[\"train\"],\n", + " vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n", + " vbt.Param(np.arange(5, 50)),\n", + " data.index.freq,\n", + " _execute_kwargs=dict(\n", + " clear_cache=50,\n", + " collect_garbage=50\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63202321-0689-4aca-9e82-7ece4fd18a0e", + "metadata": {}, + "outputs": [], + "source": [ + "train_perf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75669ef5-ad2a-4122-8be9-c510ca252fb0", + "metadata": {}, + "outputs": [], + "source": [ + "train_perf.vbt.heatmap(\n", + " x_level=\"fast_window\",\n", + " y_level=\"slow_window\",\n", + " slider_level=\"split_year\",\n", + " symmetric=True\n", + ").show_svg() # replace with show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66734f37-624d-43ed-83ae-a9b33ace0085", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def prox_median_nb(arr):\n", + " if (~np.isnan(arr)).sum() < 20:\n", + " return np.nan\n", + " return np.nanmedian(arr)\n", + "\n", + "prox_perf_list = []\n", + "for split_label, perf_sr in train_perf.groupby(\"split_year\"):\n", + " perf_df = perf_sr.vbt.unstack_to_df(0, [1, 2])\n", + " prox_perf_df = perf_df.vbt.proximity_apply(2, prox_median_nb)\n", + " prox_perf_sr = prox_perf_df.stack([0, 1])\n", + " prox_perf_list.append(prox_perf_sr.reindex(perf_sr.index))\n", + "\n", + "train_prox_perf = pd.concat(prox_perf_list)\n", + "train_prox_perf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d64b2b46-598e-4211-8e66-fd2824156f5a", + "metadata": {}, + "outputs": [], + "source": [ + "train_prox_perf.vbt.heatmap(\n", + " x_level=\"fast_window\",\n", + " y_level=\"slow_window\",\n", + " slider_level=\"split_year\",\n", + " symmetric=True\n", + ").show_svg() # replace with show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2bd48a4-74c9-4e9d-a2a4-c1a726ca1619", + "metadata": {}, + "outputs": [], + "source": [ + "best_params = train_prox_perf.groupby(\"split_year\").idxmax()\n", + "best_params = train_prox_perf[best_params].index\n", + "train_prox_perf[best_params]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c99e5a80-3fa4-453b-ad33-88c825f2dfa7", + "metadata": {}, + "outputs": [], + "source": [ + "test_perf = set_sma_crossover_perf(\n", + " vbt.RepEval(\n", + " \"test_close.iloc[:, [config_idx]]\", \n", + " context=dict(test_close=close_stacked[\"test\"])\n", + " ),\n", + " vbt.Param(best_params.get_level_values(\"fast_window\"), level=0),\n", + " vbt.Param(best_params.get_level_values(\"slow_window\"), level=0),\n", + " data.index.freq\n", + ")\n", + "test_perf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee16a258-2788-402b-bf2b-c065254c1c3e", + "metadata": {}, + "outputs": [], + "source": [ + "def get_index_sharpe(index):\n", + " return data.loc[index].run(\"from_holding\").sharpe_ratio\n", + "\n", + "index_slices.xs(\"test\", level=\"set\").apply(get_index_sharpe)" + ] + }, + { + "cell_type": "markdown", + "id": "dd73c429-db22-40ff-8a34-f773ae0e7762", + "metadata": {}, + "source": [ + "#### Row stacking" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e890c631-33b8-4822-914d-35432379cedf", + "metadata": {}, + "outputs": [], + "source": [ + "block_size = int(3.15 * len(data.index) ** (1 / 3))\n", + "block_splitter = vbt.Splitter.from_rolling(\n", + " data.index, \n", + " length=block_size, \n", + " offset=1,\n", + " offset_anchor=\"prev_start\"\n", + ")\n", + "block_splitter.n_splits" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a04c2d4-c53a-4a42-9cf1-0f912b7df08e", + "metadata": {}, + "outputs": [], + "source": [ + "size = int(block_splitter.n_splits / block_size)\n", + "sample_splitter = block_splitter.shuffle_splits(size=size, replace=True)\n", + "sample_splitter.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42ae502d-3e72-458c-9d8a-09d475968013", + "metadata": {}, + "outputs": [], + "source": [ + "returns = data.returns\n", + "sample_rets = sample_splitter.take(returns, into=\"stacked\", stack_axis=0)\n", + "sample_rets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59eb80cc-0738-4024-873b-98009964c5d4", + "metadata": {}, + "outputs": [], + "source": [ + "sample_rets.index = data.index[:len(sample_rets)]\n", + "sample_cumrets = data.close[0] * (sample_rets + 1).cumprod()\n", + "sample_cumrets.vbt.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78e88fe1-8433-4ce8-9beb-4fc24878745a", + "metadata": {}, + "outputs": [], + "source": [ + "samples_rets_list = []\n", + "for i in vbt.ProgressBar(range(1000)):\n", + " sample_spl = block_splitter.shuffle_splits(size=size, replace=True)\n", + " sample_rets = sample_spl.take(returns, into=\"stacked\", stack_axis=0)\n", + " sample_rets.index = returns.index[:len(sample_rets)]\n", + " sample_rets.name = i\n", + " samples_rets_list.append(sample_rets)\n", + "sample_rets_stacked = pd.concat(samples_rets_list, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e909ffcc-d18f-4351-9626-714c81c18321", + "metadata": {}, + "outputs": [], + "source": [ + "sample_sharpe = sample_rets_stacked.vbt.returns.sharpe_ratio()\n", + "sample_sharpe.vbt.boxplot(horizontal=True).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0815456c-9b75-4ab8-b499-faa9947406ea", + "metadata": {}, + "outputs": [], + "source": [ + "sample_sharpe.quantile(0.025), sample_sharpe.quantile(0.975)" + ] + }, + { + "cell_type": "markdown", + "id": "1fee36b8-a009-4b73-aeaa-18f918c92c55", + "metadata": {}, + "source": [ + "### Applying" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8ee38db-2b98-488c-b53c-f539e0bccedb", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.apply(\n", + " get_total_return,\n", + " vbt.Takeable(data.close),\n", + " merge_func=\"concat\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d86ce45-9b6b-43e2-8e2e-79944cd4f413", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.apply(\n", + " get_total_return,\n", + " vbt.RepFunc(lambda range_: data.close[range_]),\n", + " merge_func=\"concat\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ae1a59f-4466-4383-8a51-668c24f4321e", + "metadata": {}, + "outputs": [], + "source": [ + "def get_total_return(range_, data):\n", + " return data.returns[range_].vbt.returns.total()\n", + "\n", + "splitter.apply(\n", + " get_total_return,\n", + " vbt.Rep(\"range_\"),\n", + " data,\n", + " merge_func=\"concat\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35f59608-ee6f-4e16-a9a7-ad9dcaaab0cd", + "metadata": {}, + "outputs": [], + "source": [ + "def get_total_return(data):\n", + " return data.returns.vbt.returns.total()\n", + "\n", + "splitter.apply(\n", + " get_total_return,\n", + " vbt.Takeable(data),\n", + " merge_func=\"concat\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11b5fa1d-a6a7-4703-a897-be517ecc396b", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.apply(\n", + " get_total_return,\n", + " vbt.Takeable(data),\n", + " set_group_by=True,\n", + " merge_func=\"concat\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62816646-c23f-4c12-8dfb-c0e327badeb6", + "metadata": {}, + "outputs": [], + "source": [ + "splitter.apply(\n", + " get_total_return,\n", + " vbt.Takeable(data),\n", + " split=[2020, 2021],\n", + " set_=\"train\",\n", + " merge_func=\"concat\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8f03eb7-2835-45c7-9879-7b951c7a4647", + "metadata": {}, + "outputs": [], + "source": [ + "train_perf = splitter.apply(\n", + " sma_crossover_perf,\n", + " vbt.Takeable(data),\n", + " vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n", + " vbt.Param(np.arange(5, 50)),\n", + " _execute_kwargs=dict(\n", + " clear_cache=50,\n", + " collect_garbage=50\n", + " ),\n", + " set_=\"train\",\n", + " merge_func=\"concat\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ebb869c-1edf-491f-ad26-bcba207c0f26", + "metadata": {}, + "outputs": [], + "source": [ + "train_perf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "261c2dd1-2f5e-480e-a2c8-7593ac26771b", + "metadata": {}, + "outputs": [], + "source": [ + "best_params = train_perf.groupby(\"split_year\").idxmax()\n", + "best_params = train_perf[best_params].index\n", + "train_perf[best_params]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1749022b-1d3c-4ccb-b6a1-9fe0a06e52a5", + "metadata": {}, + "outputs": [], + "source": [ + "best_fast_windows = best_params.get_level_values(\"fast_window\")\n", + "best_slow_windows = best_params.get_level_values(\"slow_window\")\n", + "\n", + "test_perf = splitter.apply(\n", + " sma_crossover_perf,\n", + " vbt.Takeable(data),\n", + " vbt.RepFunc(lambda split_idx: best_fast_windows[split_idx]),\n", + " vbt.RepFunc(lambda split_idx: best_slow_windows[split_idx]),\n", + " set_=\"test\",\n", + " merge_func=\"concat\"\n", + ")\n", + "test_perf" + ] + }, + { + "cell_type": "markdown", + "id": "432e35f6-98b2-4278-9616-54a2206eb182", + "metadata": {}, + "source": [ + "#### Iteration schemes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e079d58d-f171-41ef-b685-80b502cb31b3", + "metadata": {}, + "outputs": [], + "source": [ + "def cv_sma_crossover(\n", + " data, \n", + " fast_windows, \n", + " slow_windows, \n", + " split_idx,\n", + " set_idx,\n", + " train_perf_list\n", + "):\n", + " if set_idx == 0:\n", + " train_perf = sma_crossover_perf(\n", + " data,\n", + " vbt.Param(fast_windows, condition=\"x < slow_window\"),\n", + " vbt.Param(slow_windows),\n", + " _execute_kwargs=dict(\n", + " clear_cache=50,\n", + " collect_garbage=50\n", + " )\n", + " )\n", + " train_perf_list.append(train_perf)\n", + " best_params = train_perf.idxmax()\n", + " return train_perf[[best_params]]\n", + " else:\n", + " train_perf = train_perf_list[split_idx]\n", + " best_params = train_perf.idxmax()\n", + " test_perf = sma_crossover_perf(\n", + " data,\n", + " vbt.Param([best_params[0]]),\n", + " vbt.Param([best_params[1]]),\n", + " )\n", + " return test_perf\n", + " \n", + "train_perf_list = []\n", + "cv_perf = splitter.apply(\n", + " cv_sma_crossover,\n", + " vbt.Takeable(data),\n", + " np.arange(5, 50),\n", + " np.arange(5, 50),\n", + " vbt.Rep(\"split_idx\"),\n", + " vbt.Rep(\"set_idx\"),\n", + " train_perf_list,\n", + " iteration=\"set_major\",\n", + " merge_func=\"concat\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7f1f4a1-fe05-44c2-b2c3-fcbb432bdeaa", + "metadata": {}, + "outputs": [], + "source": [ + "train_perf = pd.concat(train_perf_list, keys=splitter.split_labels)\n", + "train_perf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd134bf0-8c5d-4365-871b-a5a0aa232e4b", + "metadata": {}, + "outputs": [], + "source": [ + "cv_perf" + ] + }, + { + "cell_type": "markdown", + "id": "8da1d36b-6b51-4429-be5c-ed0acbcfdf24", + "metadata": {}, + "source": [ + "#### Merging" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23da7a75-db32-451d-8a7d-8bf729584e99", + "metadata": {}, + "outputs": [], + "source": [ + "def get_entries_and_exits(data, fast_window, slow_window):\n", + " fast_sma = data.run(\"sma\", fast_window, short_name=\"fast_sma\")\n", + " slow_sma = data.run(\"sma\", slow_window, short_name=\"slow_sma\")\n", + " entries = fast_sma.real_crossed_above(slow_sma)\n", + " exits = fast_sma.real_crossed_below(slow_sma)\n", + " return entries, exits\n", + "\n", + "entries, exits = splitter.apply(\n", + " get_entries_and_exits,\n", + " vbt.Takeable(data),\n", + " 20,\n", + " 30,\n", + " merge_func=\"column_stack\"\n", + ")\n", + "\n", + "print(entries)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2822ee69-5a35-4700-b025-442d2e9396fc", + "metadata": {}, + "outputs": [], + "source": [ + "entries, exits = splitter.apply(\n", + " get_entries_and_exits,\n", + " vbt.Takeable(data),\n", + " 20,\n", + " 30,\n", + " merge_all=False,\n", + " merge_func=\"row_stack\"\n", + ")\n", + "\n", + "entries.loc[2018]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80266dbb-b700-4389-8956-5a253b18b816", + "metadata": {}, + "outputs": [], + "source": [ + "def get_signal_count(*args, **kwargs):\n", + " entries, exits = get_entries_and_exits(*args, **kwargs)\n", + " return entries.vbt.signals.total(), exits.vbt.signals.total()\n", + "\n", + "entry_count, exit_count = splitter.apply(\n", + " get_signal_count,\n", + " vbt.Takeable(data),\n", + " 20,\n", + " 30,\n", + " merge_func=\"concat\",\n", + " attach_bounds=\"index\"\n", + ")\n", + "entry_count" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "56842a7c-290c-41ca-8e39-2fe148b9a352", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_entries_and_exits(results, data, keys):\n", + " set_labels = keys.get_level_values(\"set\")\n", + " fig = data.plot(plot_volume=False)\n", + " train_seen = False\n", + " test_seen = False\n", + "\n", + " for i in range(len(results)):\n", + " entries, exits = results[i]\n", + " set_label = set_labels[i]\n", + " if set_label == \"train\":\n", + " entries.vbt.signals.plot_as_entries(\n", + " data.close,\n", + " trace_kwargs=dict(\n", + " marker=dict(color=\"limegreen\"), \n", + " name=f\"Entries ({set_label})\",\n", + " legendgroup=f\"Entries ({set_label})\",\n", + " showlegend=not train_seen\n", + " ),\n", + " fig=fig\n", + " ),\n", + " exits.vbt.signals.plot_as_exits(\n", + " data.close,\n", + " trace_kwargs=dict(\n", + " marker=dict(color=\"orange\"), \n", + " name=f\"Exits ({set_label})\",\n", + " legendgroup=f\"Exits ({set_label})\",\n", + " showlegend=not train_seen\n", + " ),\n", + " fig=fig\n", + " )\n", + " train_seen = True\n", + " else:\n", + " entries.vbt.signals.plot_as_entries(\n", + " data.close,\n", + " trace_kwargs=dict(\n", + " marker=dict(color=\"skyblue\"), \n", + " name=f\"Entries ({set_label})\",\n", + " legendgroup=f\"Entries ({set_label})\",\n", + " showlegend=not test_seen\n", + " ),\n", + " fig=fig\n", + " ),\n", + " exits.vbt.signals.plot_as_exits(\n", + " data.close,\n", + " trace_kwargs=dict(\n", + " marker=dict(color=\"magenta\"), \n", + " name=f\"Exits ({set_label})\",\n", + " legendgroup=f\"Entries ({set_label})\",\n", + " showlegend=not test_seen\n", + " ),\n", + " fig=fig\n", + " )\n", + " test_seen = True\n", + " return fig\n", + "\n", + "splitter.apply(\n", + " get_entries_and_exits,\n", + " vbt.Takeable(data),\n", + " 20,\n", + " 30,\n", + " merge_func=plot_entries_and_exits,\n", + " merge_kwargs=dict(data=data, keys=vbt.Rep(\"keys\")),\n", + ").show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "93378394-5bd8-49e4-ad0a-e9d0eb25f58d", + "metadata": {}, + "source": [ + "#### Decorators" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63999a23-9b3e-41e9-ba9e-aefc4775c60c", + "metadata": {}, + "outputs": [], + "source": [ + "@vbt.split(splitter=splitter)\n", + "def get_split_total_return(data):\n", + " return data.returns.vbt.returns.total()\n", + "\n", + "get_split_total_return(vbt.Takeable(data))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47f5be24-bd93-4f16-9849-2774d7d29617", + "metadata": {}, + "outputs": [], + "source": [ + "def get_total_return(data):\n", + " return data.returns.vbt.returns.total()\n", + "\n", + "get_split_total_return = vbt.split(\n", + " get_total_return, \n", + " splitter=splitter\n", + ")\n", + "get_split_total_return(vbt.Takeable(data))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e508211b-3517-4925-ae8d-f248e94579d3", + "metadata": {}, + "outputs": [], + "source": [ + "@vbt.split\n", + "def get_split_total_return(data):\n", + " return data.returns.vbt.returns.total()\n", + "\n", + "get_split_total_return(vbt.Takeable(data), _splitter=splitter)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bfaab288-43f0-454e-88d2-4b8b36b3932c", + "metadata": {}, + "outputs": [], + "source": [ + "get_split_total_return(\n", + " vbt.Takeable(data.loc[\"2020\":\"2020\"]), \n", + " _splitter=\"from_rolling\", \n", + " _splitter_kwargs=dict(length=\"30d\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20d941b7-4419-4dfb-81f0-72f1b0839888", + "metadata": {}, + "outputs": [], + "source": [ + "get_total_return_by_month = vbt.split(\n", + " get_total_return,\n", + " splitter=\"from_grouper\", \n", + " splitter_kwargs=dict(by=vbt.RepEval(\"index.to_period('M')\")),\n", + " takeable_args=[\"data\"]\n", + ")\n", + "\n", + "get_total_return_by_month(data.loc[\"2020\":\"2020\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d61a83eb-c72c-4257-a195-a34a82378f56", + "metadata": {}, + "outputs": [], + "source": [ + "cv_sma_crossover_perf = vbt.split(\n", + " sma_crossover_perf, \n", + " splitter=\"from_single\",\n", + " splitter_kwargs=dict(split=0.6, set_labels=[\"train\", \"test\"]),\n", + " takeable_args=[\"data\"],\n", + " merge_func=\"concat\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27387cd2-f223-4cbd-b2e5-65aef60e512c", + "metadata": {}, + "outputs": [], + "source": [ + "train_perf = cv_sma_crossover_perf(\n", + " data.loc[\"2020\":\"2021\"],\n", + " vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n", + " vbt.Param(np.arange(5, 50)),\n", + " p_execute_kwargs=dict(\n", + " clear_cache=50,\n", + " collect_garbage=50\n", + " ),\n", + " _forward_kwargs_as={\n", + " \"p_execute_kwargs\": \"_execute_kwargs\"\n", + " },\n", + " _apply_kwargs=dict(set_=\"train\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9878d18a-7b05-4b83-9ca5-8d291dae2d5d", + "metadata": {}, + "outputs": [], + "source": [ + "train_perf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b4665fe-f78c-4257-922d-f05b351171ec", + "metadata": {}, + "outputs": [], + "source": [ + "test_perf = cv_sma_crossover_perf(\n", + " data.loc[\"2020\":\"2021\"],\n", + " train_perf.idxmax()[0],\n", + " train_perf.idxmax()[1],\n", + " _apply_kwargs=dict(set_=\"test\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a921ee5-4dd4-4c8e-be52-430a912059a6", + "metadata": {}, + "outputs": [], + "source": [ + "test_perf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ebf0a46-778d-4186-bc7a-dfa4e897c872", + "metadata": {}, + "outputs": [], + "source": [ + "@njit(nogil=True)\n", + "def sma_crossover_perf_nb(close, fast_window, slow_window, ann_factor):\n", + " fast_sma = vbt.nb.ma_nb(close, fast_window)\n", + " slow_sma = vbt.nb.ma_nb(close, slow_window)\n", + " entries = vbt.nb.crossed_above_nb(fast_sma, slow_sma)\n", + " exits = vbt.nb.crossed_above_nb(slow_sma, fast_sma)\n", + " sim_out = vbt.pf_nb.from_signals_nb(\n", + " target_shape=close.shape,\n", + " group_lens=np.full(close.shape[1], 1),\n", + " close=close,\n", + " long_entries=entries,\n", + " short_entries=exits,\n", + " save_returns=True\n", + " )\n", + " return vbt.ret_nb.sharpe_ratio_nb(\n", + " sim_out.in_outputs.returns, \n", + " ann_factor\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01f161db-2548-4627-a385-40a2d84c9034", + "metadata": {}, + "outputs": [], + "source": [ + "sma_crossover_perf_nb(vbt.to_2d_array(data.close), 20, 30, 365)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e35b428-f5e6-4000-afc7-8ef9bd3aeffc", + "metadata": {}, + "outputs": [], + "source": [ + "cv_sma_crossover_perf = vbt.cv_split(\n", + " sma_crossover_perf_nb,\n", + " splitter=\"from_rolling\",\n", + " splitter_kwargs=dict(\n", + " length=360, \n", + " split=0.5, \n", + " set_labels=[\"train\", \"test\"]\n", + " ),\n", + " takeable_args=[\"close\"],\n", + " merge_func=\"concat\",\n", + " parameterized_kwargs=dict(\n", + " engine=\"dask\", \n", + " chunk_len=\"auto\",\n", + " )\n", + ")\n", + "\n", + "grid_perf, best_perf = cv_sma_crossover_perf(\n", + " vbt.to_2d_array(data.close),\n", + " vbt.Param(np.arange(5, 50), condition=\"x < slow_window\"),\n", + " vbt.Param(np.arange(5, 50)),\n", + " pd.Timedelta(days=365) // data.index.freq,\n", + " _merge_kwargs=dict(wrapper=data.symbol_wrapper),\n", + " _index=data.index,\n", + " _return_grid=\"all\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca4288fd-79ea-4b0b-a007-7ca2d8d85472", + "metadata": {}, + "outputs": [], + "source": [ + "grid_perf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75fd72d5-7004-46a5-a47d-d45ee3b95381", + "metadata": {}, + "outputs": [], + "source": [ + "best_perf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87e59f91-a076-4b75-a1a6-64ad04f35cbe", + "metadata": {}, + "outputs": [], + "source": [ + "best_train_perf = best_perf.xs(\"train\", level=\"set\")\n", + "best_test_perf = best_perf.xs(\"test\", level=\"set\")\n", + "best_train_perf.corr(best_test_perf)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e548f6f6-ac62-46c3-b6ef-40467a4564fe", + "metadata": {}, + "outputs": [], + "source": [ + "param_cross_set_corr = grid_perf\\\n", + " .unstack(\"set\")\\\n", + " .groupby([\"fast_window\", \"slow_window\"])\\\n", + " .apply(lambda x: x[\"train\"].corr(x[\"test\"]))\n", + "param_cross_set_corr.vbt.heatmap(symmetric=True).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a40f160-6159-4b52-bc15-13f68c36be31", + "metadata": {}, + "outputs": [], + "source": [ + "grid_test_perf = grid_perf.xs(\"test\", level=\"set\")\n", + "grid_df = grid_test_perf.rename(\"grid\").reset_index()\n", + "del grid_df[\"fast_window\"]\n", + "del grid_df[\"slow_window\"]\n", + "best_df = best_test_perf.rename(\"best\").reset_index()\n", + "del best_df[\"fast_window\"]\n", + "del best_df[\"slow_window\"]\n", + "merged_df = pd.merge(grid_df, best_df, on=[\"split\", \"symbol\"])\n", + "grid_better_mask = merged_df[\"grid\"] > merged_df[\"best\"]\n", + "grid_better_mask.index = grid_test_perf.index\n", + "grid_better_cnt = grid_better_mask.groupby([\"split\", \"symbol\"]).mean()\n", + "grid_better_cnt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0f127bb-3b1a-4425-a1bc-ec6be6a27f3e", + "metadata": {}, + "outputs": [], + "source": [ + "cv_splitter = cv_sma_crossover_perf(\n", + " _index=data.index, \n", + " _return_splitter=True\n", + ")\n", + "stacked_close = cv_splitter.take(\n", + " data.close, \n", + " into=\"reset_stacked\",\n", + " set_=\"test\"\n", + ")\n", + "hold_pf = vbt.Portfolio.from_holding(stacked_close, freq=\"daily\")\n", + "hold_perf = hold_pf.sharpe_ratio\n", + "hold_perf" + ] + }, + { + "cell_type": "markdown", + "id": "935ca582-ef09-40f9-b6ff-c303c98989b1", + "metadata": {}, + "source": [ + "### Modeling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f9cc747-d0b4-470f-a262-7cfc7ad1d62e", + "metadata": {}, + "outputs": [], + "source": [ + "X = data.run(\"talib\")\n", + "X.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7fdb5b1c-e4cc-4472-bdb1-4629492306e7", + "metadata": {}, + "outputs": [], + "source": [ + "trendlb = data.run(\"trendlb\", 1.0, 0.5, mode=\"binary\")\n", + "y = trendlb.labels\n", + "y.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce1e42f0-7f1f-474c-865b-2fabd6dfc907", + "metadata": {}, + "outputs": [], + "source": [ + "X = X.replace([-np.inf, np.inf], np.nan)\n", + "invalid_column_mask = X.isnull().all(axis=0) | (X.nunique() == 1)\n", + "X = X.loc[:, ~invalid_column_mask]\n", + "invalid_row_mask = X.isnull().any(axis=1) | y.isnull()\n", + "X = X.loc[~invalid_row_mask]\n", + "y = y.loc[~invalid_row_mask]\n", + "X.shape, y.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf1fe438-bf27-4fc3-9d9b-5c71a7b046ae", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "clf = RandomForestClassifier(random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f64bbb84-5340-4554-a78c-5a58bdbe5010", + "metadata": {}, + "outputs": [], + "source": [ + "cv = vbt.SplitterCV(\n", + " \"from_expanding\", \n", + " min_length=360, \n", + " offset=180, \n", + " split=-180,\n", + " set_labels=[\"train\", \"test\"]\n", + ")\n", + "\n", + "cv_splitter = cv.get_splitter(X)\n", + "cv_splitter.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c26f554-c0ff-4973-a625-d95549bbbe36", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import cross_val_score\n", + "\n", + "cross_val_score(clf, X, y, cv=cv, scoring=\"accuracy\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "580ae7c7-d108-454f-903f-937c8dcf058e", + "metadata": {}, + "outputs": [], + "source": [ + "X_slices = cv_splitter.take(X)\n", + "y_slices = cv_splitter.take(y)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9186a59-798a-404a-b763-f6360d098dc9", + "metadata": {}, + "outputs": [], + "source": [ + "test_labels = []\n", + "test_preds = []\n", + "for split in X_slices.index.unique(level=\"split\"):\n", + " X_train_slice = X_slices[(split, \"train\")]\n", + " y_train_slice = y_slices[(split, \"train\")]\n", + " X_test_slice = X_slices[(split, \"test\")]\n", + " y_test_slice = y_slices[(split, \"test\")]\n", + " slice_clf = clf.fit(X_train_slice, y_train_slice)\n", + " test_pred = slice_clf.predict(X_test_slice)\n", + " test_pred = pd.Series(test_pred, index=y_test_slice.index)\n", + " test_labels.append(y_test_slice)\n", + " test_preds.append(test_pred)\n", + " \n", + "test_labels = pd.concat(test_labels).rename(\"labels\")\n", + "test_preds = pd.concat(test_preds).rename(\"preds\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "411d961d-7e24-427a-92e1-07c6fbc52f4d", + "metadata": {}, + "outputs": [], + "source": [ + "data.close.vbt.overlay_with_heatmap(test_labels).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a904c9f0-9514-4c17-bc14-a2d9980debe6", + "metadata": {}, + "outputs": [], + "source": [ + "data.close.vbt.overlay_with_heatmap(test_preds).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0592fce-7a89-4fc6-9206-1ca7b3a51700", + "metadata": {}, + "outputs": [], + "source": [ + "pf = vbt.Portfolio.from_signals(\n", + " data.close[test_preds.index], \n", + " test_preds == 1, \n", + " test_preds == 0, \n", + " direction=\"both\"\n", + ")\n", + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70cc579d-fb48-41ff-935a-1aa035c8a1f8", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/to_explore/notebooks/MTFAnalysis.ipynb b/to_explore/notebooks/MTFAnalysis.ipynb new file mode 100644 index 0000000..5766858 --- /dev/null +++ b/to_explore/notebooks/MTFAnalysis.ipynb @@ -0,0 +1,1525 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "257110ac-fb97-4051-adfe-81c1130b8cd2", + "metadata": {}, + "source": [ + "# MTF analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "957fc6b1-f3b9-4778-ab72-28921aed1449", + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro import *\n", + "# whats_imported()\n", + "\n", + "vbt.settings.set_theme(\"dark\")" + ] + }, + { + "cell_type": "markdown", + "id": "c705f5df-b3ab-4a8f-8ea8-952d0cc6cd81", + "metadata": {}, + "source": [ + "## Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8356ce73-60e4-469e-b3b3-2e414b82eb5e", + "metadata": {}, + "outputs": [], + "source": [ + "# h1_data = vbt.BinanceData.pull(\n", + "# \"BTCUSDT\", \n", + "# start=\"2020-01-01 UTC\", \n", + "# end=\"2021-01-01 UTC\",\n", + "# timeframe=\"1h\"\n", + "# )\n", + "\n", + "# h1_data.to_hdf()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06507a95-fc57-49db-9b97-62ccc4184055", + "metadata": {}, + "outputs": [], + "source": [ + "h1_data = vbt.HDFData.pull('BinanceData.h5')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fab718a-09f2-4640-b477-dd681753ac53", + "metadata": {}, + "outputs": [], + "source": [ + "h1_data.wrapper.index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a930576-729d-4c06-a9a3-6a8ad3de34e6", + "metadata": {}, + "outputs": [], + "source": [ + "h1_resampler = h1_data.wrapper.get_resampler(\"1h\")\n", + "h1_resampler.index_difference(reverse=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4124583-e303-4eda-bfb0-f97e3908ef91", + "metadata": {}, + "outputs": [], + "source": [ + "h1_data.wrapper.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f39bc0ee-6e5c-4c87-93a3-1b5a31fe86f4", + "metadata": {}, + "outputs": [], + "source": [ + "h1_ohlcv_data = h1_data[[\"Open\", \"High\", \"Low\", \"Close\", \"Volume\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "461c8632-4e29-4b0d-8e85-72ea3b1754ae", + "metadata": {}, + "outputs": [], + "source": [ + "h4_ohlcv = h1_ohlcv_data.get().resample(\"4h\").agg({\n", + " \"Open\": \"first\",\n", + " \"High\": \"max\",\n", + " \"Low\": \"min\",\n", + " \"Close\": \"last\",\n", + " \"Volume\": \"sum\"\n", + "})\n", + "h4_ohlcv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b84728de-a16b-417f-9ee8-ddafce9b24c3", + "metadata": {}, + "outputs": [], + "source": [ + "print(h1_ohlcv_data.get().iloc[:4])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "def713aa-9e46-488a-b100-83a563c5c30f", + "metadata": {}, + "outputs": [], + "source": [ + "print(h4_ohlcv.iloc[[0]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e0d8b54-f1b6-4ff0-851e-c0cb8b8c415a", + "metadata": {}, + "outputs": [], + "source": [ + "print(vbt.prettify(vbt.BinanceData.feature_config))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5fcdcdbd-4274-4078-9e8f-5b64c4cd1e46", + "metadata": {}, + "outputs": [], + "source": [ + "h1_data.use_feature_config_of(vbt.BinanceData)\n", + "\n", + "h4_data = h1_data.resample(\"4h\")\n", + "d1_data = h1_data.resample(\"1d\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5980b577-d6ee-484d-8faf-9db6647dd29b", + "metadata": {}, + "outputs": [], + "source": [ + "print(d1_data.get().iloc[[0, -1]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "000ebb64-4228-4967-aaac-2dc88975da61", + "metadata": {}, + "outputs": [], + "source": [ + "print(vbt.BinanceData.pull(\n", + " \"BTCUSDT\", \n", + " start=\"2020-01-01 UTC\", \n", + " end=\"2021-01-01 UTC\",\n", + " timeframe=\"1d\"\n", + ").get().iloc[[0, -1]])" + ] + }, + { + "cell_type": "markdown", + "id": "f47840ab-b31d-49b3-bf24-d6406a2b542c", + "metadata": {}, + "source": [ + "## Alignment" + ] + }, + { + "cell_type": "markdown", + "id": "0c0ebc21-981a-4c09-8b38-2339e204cf5d", + "metadata": {}, + "source": [ + "### Pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "73b897e3-4f35-41b2-a5c1-8f1c4010c0c2", + "metadata": {}, + "outputs": [], + "source": [ + "h1_close = h1_data.get(\"Close\")\n", + "h4_close = h4_data.get(\"Close\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dccb01e-8056-4dc0-b5f6-0be44e3f5099", + "metadata": {}, + "outputs": [], + "source": [ + "h1_close.iloc[:4]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a3a3c1c-0747-4a18-9462-863162dcdddb", + "metadata": {}, + "outputs": [], + "source": [ + "h4_close.iloc[:1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1c22394-c1fc-4abc-8036-788848acbe92", + "metadata": {}, + "outputs": [], + "source": [ + "h1_h4_ratio = h1_close / h4_close\n", + "h1_h4_ratio.iloc[:4]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "acc7216c-0457-46a0-819d-a4675780f242", + "metadata": {}, + "outputs": [], + "source": [ + "h4_close_shifted = h4_close.shift()\n", + "h1_h4_ratio = h1_close / h4_close_shifted\n", + "h1_h4_ratio.iloc[:8]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8dbbb29-8b79-4c8c-8ce8-5c2582434c95", + "metadata": {}, + "outputs": [], + "source": [ + "h1_h4_ratio.shift(-1).iloc[:8]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd546cfa-559d-45a3-8e35-b2af75bd9df6", + "metadata": {}, + "outputs": [], + "source": [ + "h4_h1_close = h4_close.shift(1).resample(\"1h\").last().shift(-1).ffill()\n", + "h4_h1_close.iloc[:8]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba830c23-074b-4ab9-ab4d-3b738a14be56", + "metadata": {}, + "outputs": [], + "source": [ + "fig = h1_close.rename(\"H1\").iloc[:16].vbt.plot()\n", + "h4_h1_close.rename(\"H4_H1\").iloc[:16].vbt.plot(fig=fig).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e8c73ee-5c05-4e95-89d0-ad195e138a96", + "metadata": {}, + "outputs": [], + "source": [ + "h1_h4_ratio = h1_close / h4_h1_close\n", + "h1_h4_ratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "023c506a-8e48-43e6-9244-9093c876edb0", + "metadata": {}, + "outputs": [], + "source": [ + "h1_open = h1_data.get(\"Open\")\n", + "h4_open = h4_data.get(\"Open\")\n", + "\n", + "h1_open.iloc[:8]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca1bd16a-4157-43ba-be2f-61749ba83a3a", + "metadata": {}, + "outputs": [], + "source": [ + "h4_h1_open = h4_open.resample(\"1h\").first().ffill()\n", + "h4_h1_open.iloc[:8]" + ] + }, + { + "cell_type": "markdown", + "id": "ffcce240-8595-470d-b743-9b9f743f4543", + "metadata": {}, + "source": [ + "### VBT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e90d2b3-d4b0-4b4f-ae19-fc6e0260ac16", + "metadata": {}, + "outputs": [], + "source": [ + "h4_close.vbt.realign_closing(\"1h\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "328daf52-40c4-40a8-9614-ea83c702f429", + "metadata": {}, + "outputs": [], + "source": [ + "h4_open.vbt.realign_opening(\"1h\")" + ] + }, + { + "cell_type": "markdown", + "id": "fceafe15-dd02-495f-bbcb-7978981dbfc8", + "metadata": {}, + "source": [ + "#### Resampler" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9a79ae2-812a-4693-b68f-4b20246f86d4", + "metadata": {}, + "outputs": [], + "source": [ + "h4_h1_resampler = h4_close.vbt.wrapper.get_resampler(\"1h\")\n", + "h4_h1_resampler.source_index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75260aef-922a-46d9-b2c2-268247e57c20", + "metadata": {}, + "outputs": [], + "source": [ + "h4_h1_resampler.target_index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1bcb75a6-1808-4738-b746-80805817bd97", + "metadata": {}, + "outputs": [], + "source": [ + "h4_h1_resampler.source_freq" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb51ba6c-2d10-4f4e-9d5e-432d515d72c4", + "metadata": {}, + "outputs": [], + "source": [ + "h4_h1_resampler.target_freq" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef759223-074b-4961-87f4-47d9ab48e31a", + "metadata": {}, + "outputs": [], + "source": [ + "pd_resampler = h4_close.resample(\"1h\")\n", + "vbt.Resampler.from_pd_resampler(pd_resampler)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "402d9361-3657-46de-bf30-5a57d3006c50", + "metadata": {}, + "outputs": [], + "source": [ + "resampler = vbt.Resampler.from_date_range(\n", + " source_index=h4_close.index,\n", + " source_freq=\"4h\",\n", + " start=\"2020-01-01 10:00:00\",\n", + " end=\"2020-01-01 22:00:00\",\n", + " freq=\"1h\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52df63ec-3951-4ed7-a1ea-18628dc9a0c0", + "metadata": {}, + "outputs": [], + "source": [ + "h4_close.vbt.realign_closing(resampler)" + ] + }, + { + "cell_type": "markdown", + "id": "beef78cd-9046-456f-8484-14e1e04b3c2f", + "metadata": {}, + "source": [ + "#### Custom index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63ff2b4e-46b9-4ad3-8ef6-2f751ad7a9d8", + "metadata": {}, + "outputs": [], + "source": [ + "target_index = pd.Index([\n", + " \"2020-01-01\",\n", + " \"2020-02-01\",\n", + " \"2020-03-01\",\n", + " \"2020-04-01\",\n", + " \"2020-05-01\",\n", + " \"2020-06-01\",\n", + " \"2020-07-01\",\n", + " \"2020-08-01\",\n", + " \"2020-09-01\",\n", + " \"2020-10-01\",\n", + " \"2020-11-01\",\n", + " \"2020-12-01\",\n", + " \"2021-01-01\"\n", + "])\n", + "resampler = vbt.Resampler(h4_close.index, target_index, target_freq=False)\n", + "h4_close.vbt.realign_closing(resampler)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5b6de84-d1a8-4d16-9677-fd8e7d8e5b1f", + "metadata": {}, + "outputs": [], + "source": [ + "h4_close[h4_close.index < \"2020-09-01\"].iloc[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3612f6b6-c960-4a10-b581-ccfbaf583cf8", + "metadata": {}, + "outputs": [], + "source": [ + "h4_open.vbt.realign_opening(resampler)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4fed1a9d-0867-49a1-bd20-4acf2d974e91", + "metadata": {}, + "outputs": [], + "source": [ + "h4_open[h4_open.index <= \"2020-08-01\"].iloc[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "063cfaaf-045c-40f4-be79-c177196c884b", + "metadata": {}, + "outputs": [], + "source": [ + "target_index = pd.Index([\n", + " \"2020-01-01\",\n", + " \"2020-02-01\",\n", + "])\n", + "resampler = vbt.Resampler(h4_close.index, target_index, target_freq=False)\n", + "h4_close.vbt.realign_closing(resampler)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f3fb1cf-8646-4c71-b38a-b7631d94907e", + "metadata": {}, + "outputs": [], + "source": [ + "resampler = vbt.Resampler(h4_close.index, target_index, target_freq=\"30d\")\n", + "h4_close.vbt.realign_closing(resampler)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07f61c2b-ea6a-4b6f-9633-760ce35901c1", + "metadata": {}, + "outputs": [], + "source": [ + "h4_open.vbt.realign(\"2020-06-07 12:15:00\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42601b68-e4ea-4b70-b302-4f6bb2025969", + "metadata": {}, + "outputs": [], + "source": [ + "h4_close.vbt.realign(\n", + " \"2020-06-07 12:15:00\", \n", + " source_rbound=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a5685b5-4847-4e59-8463-e7690bcf84f7", + "metadata": {}, + "outputs": [], + "source": [ + "h4_high = h4_data.get(\"High\")\n", + "h4_high.vbt.realign(\n", + " target_index, \n", + " source_rbound=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "65eb1d9f-a7a6-4c03-89ec-68db9685c1d1", + "metadata": {}, + "outputs": [], + "source": [ + "h4_high.index[h4_high.index < \"2020-02-01\"][-1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9873388-768a-4ff5-a034-081779b3f879", + "metadata": {}, + "outputs": [], + "source": [ + "h4_high.vbt.realign(\n", + " target_index, \n", + " source_rbound=True,\n", + " target_rbound=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3be78c1c-1424-406c-b959-64bdc8cbd826", + "metadata": {}, + "outputs": [], + "source": [ + "resampler = vbt.Resampler(h4_high.index, target_index)\n", + "resampler.target_rbound_index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0631eb39-d180-461f-93c6-3b9c4b5efce2", + "metadata": {}, + "outputs": [], + "source": [ + "resampler = vbt.Resampler(\n", + " h4_high.index, \n", + " target_index, \n", + " target_freq=pd.offsets.MonthBegin(1))\n", + "resampler.target_rbound_index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b96efd81-4a14-4f65-8663-5d049b01a42b", + "metadata": {}, + "outputs": [], + "source": [ + "h4_high.vbt.realign(\n", + " resampler.replace(\n", + " target_index=resampler.target_rbound_index, \n", + " target_freq=False\n", + " ), \n", + " wrap_kwargs=dict(index=target_index)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e3f96e1-8161-4417-8201-5084aaec43ee", + "metadata": {}, + "outputs": [], + "source": [ + "h4_high.vbt.realign(\n", + " target_index, \n", + " freq=pd.offsets.MonthBegin(1),\n", + " target_rbound=\"pandas\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bac8e960-94ac-420a-81db-58db70a623d3", + "metadata": {}, + "outputs": [], + "source": [ + "h4_high[h4_high.index < \"2020-03-01\"].resample(vbt.offset(\"M\")).last()" + ] + }, + { + "cell_type": "markdown", + "id": "fa904b11-4aa9-4a3c-9990-4dfb17e5092f", + "metadata": {}, + "source": [ + "#### Numeric index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "acfcad7a-b094-4378-a380-8dd5dc8bc4ea", + "metadata": {}, + "outputs": [], + "source": [ + "resampler = vbt.Resampler(\n", + " source_index=np.arange(len(h4_high)),\n", + " target_index=np.arange(len(h4_high))[::6],\n", + " source_freq=1,\n", + " target_freq=6\n", + ")\n", + "h4_high.vbt.realign(\n", + " resampler, \n", + " source_rbound=True,\n", + " target_rbound=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d3ec6678-66b8-4a9d-92cf-a41ca96a9adb", + "metadata": {}, + "source": [ + "#### Forward filling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f27ab10-cba8-4c5f-97e6-e0533943795a", + "metadata": {}, + "outputs": [], + "source": [ + "min5_index = vbt.date_range(start=\"2020\", freq=\"5min\", periods=3)\n", + "min1_index = vbt.date_range(start=\"2020\", freq=\"1min\", periods=15)\n", + "min5_mask = pd.Series(False, index=min5_index)\n", + "min5_mask.iloc[0] = True\n", + "min5_mask.iloc[2] = True\n", + "\n", + "resampler = vbt.Resampler(min5_index, min1_index)\n", + "min1_mask = min5_mask.vbt.realign_closing(resampler)\n", + "min1_mask" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab594857-24ea-4316-83e9-aa65c0be621c", + "metadata": {}, + "outputs": [], + "source": [ + "min1_mask = min5_mask.vbt.realign_closing(resampler, ffill=False)\n", + "min1_mask" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "053aeb5f-b8c1-46d5-849a-ec431e64ce8c", + "metadata": {}, + "outputs": [], + "source": [ + "min1_mask = min1_mask.fillna(False).astype(bool)\n", + "min1_mask" + ] + }, + { + "cell_type": "markdown", + "id": "0a220a77-0bd8-43f3-92ea-d532b883a793", + "metadata": {}, + "source": [ + "### Indicators" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f1b6073-9e03-49cf-a212-83953d9910e3", + "metadata": {}, + "outputs": [], + "source": [ + "h4_sma = vbt.talib(\"SMA\").run(h4_data.get(\"Close\"), skipna=True).real\n", + "d1_sma = vbt.talib(\"SMA\").run(d1_data.get(\"Close\"), skipna=True).real\n", + "\n", + "h4_sma = h4_sma.ffill()\n", + "d1_sma = d1_sma.ffill()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13ace4e8-4647-4f29-a516-142a94460a76", + "metadata": {}, + "outputs": [], + "source": [ + "resampler = vbt.Resampler(\n", + " d1_sma.index,\n", + " h4_sma.index,\n", + " source_freq=\"1d\",\n", + " target_freq=\"4h\"\n", + ")\n", + "d1_h4_sma = d1_sma.vbt.realign_closing(resampler)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "837efa98-5a41-411f-91e4-7e07db205703", + "metadata": {}, + "outputs": [], + "source": [ + "d1_sma[\"2020-12-30\":]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f4fab4d-05ec-4c96-be22-7d383ad493bb", + "metadata": {}, + "outputs": [], + "source": [ + "d1_h4_sma[\"2020-12-30\":]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e4de9a1-e7e4-49f4-b2d4-f5afe1895cae", + "metadata": {}, + "outputs": [], + "source": [ + "entries = h4_sma.vbt.crossed_above(d1_h4_sma)\n", + "exits = h4_sma.vbt.crossed_below(d1_h4_sma)\n", + "\n", + "def plot_date_range(date_range):\n", + " fig = h4_sma[date_range].rename(\"H4\").vbt.plot()\n", + " d1_h4_sma[date_range].rename(\"D1_H4\").vbt.plot(fig=fig)\n", + " entries[date_range].rename(\"Entry\").vbt.signals.plot_as_entries(\n", + " y=h4_sma[date_range], fig=fig)\n", + " exits[date_range].rename(\"Exit\").vbt.signals.plot_as_exits(\n", + " y=h4_sma[date_range], fig=fig)\n", + " return fig\n", + "\n", + "plot_date_range(slice(\"2020-02-01\", \"2020-03-01\")).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dfa7eae9-caff-49c9-b6cd-49216374cda5", + "metadata": {}, + "outputs": [], + "source": [ + "d1_open_sma = vbt.talib(\"SMA\").run(\n", + " d1_data.get(\"Open\"), \n", + " skipna=True\n", + ").real\n", + "d1_open_sma = d1_open_sma.ffill()\n", + "\n", + "d1_h4_open_sma = d1_open_sma.vbt.realign(\n", + " resampler, \n", + " source_rbound=False,\n", + " target_rbound=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab960235-1f29-4558-9e3b-c8f1d4ade177", + "metadata": {}, + "outputs": [], + "source": [ + "d1_open_sma[\"2020-12-30\":]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5cd93da8-2e9e-46a3-b2cc-576561baca66", + "metadata": {}, + "outputs": [], + "source": [ + "d1_h4_open_sma[\"2020-12-30\":]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b7e7a74-9d26-486d-b1e9-47844c4c0d26", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_bandwidths(freqs):\n", + " bandwidths = []\n", + " for freq in freqs:\n", + " close = h1_data.resample(freq).get(\"Close\")\n", + " bbands = vbt.talib(\"BBANDS\").run(close, skipna=True)\n", + " upperband = bbands.upperband.ffill()\n", + " middleband = bbands.middleband.ffill()\n", + " lowerband = bbands.lowerband.ffill()\n", + " bandwidth = (upperband - lowerband) / middleband\n", + " bandwidths.append(bandwidth.vbt.realign_closing(\"1h\"))\n", + " df = pd.concat(bandwidths, axis=1, keys=pd.Index(freqs, name=\"timeframe\"))\n", + " return df.ffill()\n", + "\n", + "bandwidths = generate_bandwidths([\"1h\", \"4h\", \"1d\", \"7d\"])\n", + "print(bandwidths)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5349999-74d9-4106-924d-b8bfa44557b3", + "metadata": {}, + "outputs": [], + "source": [ + "bandwidths.loc[:, ::-1].vbt.ts_heatmap().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5474bcdd-1096-46a4-97da-cb16ea310c26", + "metadata": {}, + "outputs": [], + "source": [ + ">>> bbands = vbt.talib(\"BBANDS\").run(\n", + "... h1_data.get(\"Close\"), \n", + "... skipna=True, \n", + "... timeframe=[\"1h\", \"4h\", \"1d\", \"7d\"],\n", + "... broadcast_kwargs=dict(wrapper_kwargs=dict(freq=\"1h\"))\n", + "... )\n", + ">>> bandwidth = (bbands.upperband - bbands.lowerband) / bbands.middleband\n", + ">>> print(bandwidths)" + ] + }, + { + "cell_type": "markdown", + "id": "d0ab34e7-6b65-420a-b7ad-41626267a852", + "metadata": {}, + "source": [ + "### Testing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d012722-e263-4361-a2be-f77fd57ee53c", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_signals(data, freq, fast_window, slow_window):\n", + " open_price = data.get(\"Open\").resample(freq).first()\n", + " fast_sma = vbt.talib(\"SMA\")\\\n", + " .run(\n", + " open_price, \n", + " fast_window, \n", + " skipna=True, \n", + " short_name=\"fast_sma\"\n", + " )\\\n", + " .real.ffill()\\\n", + " .vbt.realign(data.wrapper.index)\n", + " slow_sma = vbt.talib(\"SMA\")\\\n", + " .run(\n", + " open_price, \n", + " slow_window, \n", + " skipna=True, \n", + " short_name=\"slow_sma\"\n", + " )\\\n", + " .real.ffill()\\\n", + " .vbt.realign(data.wrapper.index)\n", + " entries = fast_sma.vbt.crossed_above(slow_sma)\n", + " exits = fast_sma.vbt.crossed_below(slow_sma)\n", + " return entries, exits\n", + "\n", + "fast_window = [10, 20]\n", + "slow_window = [20, 30]\n", + "h1_entries, h1_exits = generate_signals(h1_data, \"1h\", fast_window, slow_window)\n", + "h4_entries, h4_exits = generate_signals(h1_data, \"4h\", fast_window, slow_window)\n", + "d1_entries, d1_exits = generate_signals(h1_data, \"1d\", fast_window, slow_window)\n", + "\n", + "entries = pd.concat(\n", + " (h1_entries, h4_entries, d1_entries), \n", + " axis=1, \n", + " keys=pd.Index([\"1h\", \"4h\", \"1d\"], name=\"timeframe\")\n", + ")\n", + "exits = pd.concat(\n", + " (h1_exits, h4_exits, d1_exits), \n", + " axis=1, \n", + " keys=pd.Index([\"1h\", \"4h\", \"1d\"], name=\"timeframe\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be4a1fb7-231e-42a3-b113-a1e71173c96d", + "metadata": {}, + "outputs": [], + "source": [ + "(entries.astype(int) - exits.astype(int))\\\n", + " .resample(\"1d\").sum()\\\n", + " .vbt.ts_heatmap(\n", + " trace_kwargs=dict(\n", + " colorscale=[\"#ef553b\", \"rgba(0, 0, 0, 0)\", \"#17becf\"],\n", + " colorbar=dict(\n", + " tickvals=[-1, 0, 1], \n", + " ticktext=[\"Exit\", \"\", \"Entry\"]\n", + " )\n", + " )\n", + " ).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8cbd506a-8911-400e-bbe3-fc439ae5e31d", + "metadata": {}, + "outputs": [], + "source": [ + "pf = vbt.Portfolio.from_signals(\n", + " h1_data,\n", + " entries,\n", + " exits,\n", + " sl_stop=0.1,\n", + " freq=\"1h\"\n", + ")\n", + "\n", + "pf.orders.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79f8734e-cfd3-4bb3-836f-5e4c96af1078", + "metadata": {}, + "outputs": [], + "source": [ + "pf.sharpe_ratio" + ] + }, + { + "cell_type": "markdown", + "id": "594dd6b4-814a-459a-9a68-a004471cbf55", + "metadata": {}, + "source": [ + "## Aggregation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1a727a9-be21-4859-81c7-f660932dbfef", + "metadata": {}, + "outputs": [], + "source": [ + "ms_data = h1_data.resample(\"M\")\n", + "ms_data.get(\"Low\") / ms_data.get(\"High\") - 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba41908f-77bb-41fd-92dc-3ddfb66660c7", + "metadata": {}, + "outputs": [], + "source": [ + "h1_high = h1_data.get(\"High\")\n", + "h1_low = h1_data.get(\"Low\")\n", + "ms_high = h1_high.resample(vbt.offset(\"M\")).max()\n", + "ms_low = h1_low.resample(vbt.offset(\"M\")).min()\n", + "ms_low / ms_high - 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4054b7a6-002d-40f7-8d81-906491355f2c", + "metadata": {}, + "outputs": [], + "source": [ + "ms_high = h1_high.vbt.resample_apply(\"M\", vbt.nb.max_reduce_nb)\n", + "ms_low = h1_low.vbt.resample_apply(\"M\", vbt.nb.min_reduce_nb)\n", + "ms_low / ms_high - 1" + ] + }, + { + "cell_type": "markdown", + "id": "5b8bc23f-95a4-4dd9-92cd-87e997979eb9", + "metadata": {}, + "source": [ + "### Custom index" + ] + }, + { + "cell_type": "markdown", + "id": "49646098-0314-4c0d-8ed2-361dc5316346", + "metadata": {}, + "source": [ + "#### Using target index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "286732d7-1ae7-4bd9-9a8e-8a7c21e7caab", + "metadata": {}, + "outputs": [], + "source": [ + "target_index = pd.Index([\n", + " \"2020-01-01\",\n", + " \"2020-02-01\",\n", + "])\n", + "h1_high.vbt.resample_to_index(\n", + " target_index, \n", + " vbt.nb.max_reduce_nb\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c8f7530-3669-4b50-9cd8-933c638b47f9", + "metadata": {}, + "outputs": [], + "source": [ + "target_rbound_index = vbt.Resampler.get_rbound_index(\n", + " target_index, \n", + " pd.offsets.MonthBegin(1)\n", + ")\n", + "h1_high.vbt.resample_to_index(\n", + " target_index.append(target_rbound_index[[-1]]), \n", + " vbt.nb.max_reduce_nb\n", + ").iloc[:-1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41025b95-36f3-40cb-ac84-8fd0168ef481", + "metadata": {}, + "outputs": [], + "source": [ + "h1_high[:\"2020-03-01\"].resample(vbt.offset(\"M\")).max().iloc[:-1]" + ] + }, + { + "cell_type": "markdown", + "id": "a2f6f226-2cdd-48b6-9a5c-9379dd2c6ec9", + "metadata": {}, + "source": [ + "#### Using group-by" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ae265d2-195c-4045-9dcd-4180bb7ddb7b", + "metadata": {}, + "outputs": [], + "source": [ + "pd_resampler = h1_high.resample(vbt.offset(\"M\"))\n", + "ms_high = h1_high.vbt.groupby_apply(pd_resampler, vbt.nb.max_reduce_nb)\n", + "ms_low = h1_low.vbt.groupby_apply(pd_resampler, vbt.nb.min_reduce_nb)\n", + "ms_low / ms_high - 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9a38ab9-18a0-4716-84cf-d276c945f2fd", + "metadata": {}, + "outputs": [], + "source": [ + "target_lbound_index = pd.Index([\n", + " \"2020-01-01\",\n", + " \"2020-02-01\",\n", + "])\n", + "target_rbound_index = pd.Index([\n", + " \"2020-02-01\",\n", + " \"2020-03-01\",\n", + "])\n", + "h1_high.vbt.resample_between_bounds(\n", + " target_lbound_index, \n", + " target_rbound_index,\n", + " vbt.nb.max_reduce_nb\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b315822-a70c-4385-b0c4-2bc5c2b488eb", + "metadata": {}, + "outputs": [], + "source": [ + "h1_high.vbt.resample_between_bounds(\n", + " \"2020-01-01\", \n", + " vbt.date_range(\"2020-01-02\", \"2021-01-01\", freq=\"M\", inclusive=\"both\"),\n", + " vbt.nb.max_reduce_nb\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "170017dd-fb6d-47ca-b819-6498d018e9e8", + "metadata": {}, + "outputs": [], + "source": [ + "h1_high.expanding().max().resample(vbt.offset(\"M\")).max()" + ] + }, + { + "cell_type": "markdown", + "id": "24e72852-be2b-4b5b-bc60-d921491d1ab3", + "metadata": {}, + "source": [ + "### Meta methods" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "986fe4e4-f9f6-47d2-b415-a22e49723af4", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def mdd_nb(from_i, to_i, col, high, low):\n", + " highest = np.nanmax(high[from_i:to_i, col])\n", + " lowest = np.nanmin(low[from_i:to_i, col])\n", + " return lowest / highest - 1\n", + "\n", + "vbt.pd_acc.resample_apply(\n", + " \"M\",\n", + " mdd_nb,\n", + " vbt.Rep(\"high\"),\n", + " vbt.Rep(\"low\"),\n", + " broadcast_named_args=dict(\n", + " high=h1_high,\n", + " low=h1_low\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a83cb89f-90b9-4563-8124-79ea7938da70", + "metadata": {}, + "outputs": [], + "source": [ + "h1_high.iloc[0:744]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41414c25-d863-4f83-a140-c1dfef1e4226", + "metadata": {}, + "outputs": [], + "source": [ + "h1_low.iloc[0:744].min() / h1_high.iloc[0:744].max() - 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98949b71-5900-407f-b117-de2b7b50cd2e", + "metadata": {}, + "outputs": [], + "source": [ + ">>> target_lbound_index = vbt.date_range(\"2020-01-01\", \"2020-12-01\", freq=\"M\", tz=\"UTC\", inclusive=\"both\")\n", + ">>> target_rbound_index = vbt.date_range(\"2020-02-01\", \"2021-01-01\", freq=\"M\", tz=\"UTC\", inclusive=\"both\")\n", + ">>> vbt.pd_acc.resample_between_bounds(\n", + "... target_lbound_index,\n", + "... target_rbound_index,\n", + "... mdd_nb,\n", + "... vbt.Rep(\"high\"),\n", + "... vbt.Rep(\"low\"),\n", + "... broadcast_named_args=dict(\n", + "... high=h1_high,\n", + "... low=h1_low\n", + "... )\n", + "... )" + ] + }, + { + "cell_type": "markdown", + "id": "d039d5e0-2562-44e1-b572-0345064bacfe", + "metadata": {}, + "source": [ + "### Numba" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf993314-d0dc-42ae-a97c-3f38c237d67a", + "metadata": {}, + "outputs": [], + "source": [ + ">>> from vectorbtpro.base.resampling.nb import map_bounds_to_source_ranges_nb\n", + "\n", + ">>> range_starts, range_ends = map_bounds_to_source_ranges_nb(\n", + "... source_index=h1_high.index.values,\n", + "... target_lbound_index=target_lbound_index.values,\n", + "... target_rbound_index=target_rbound_index.values,\n", + "... closed_lbound=True,\n", + "... closed_rbound=False,\n", + "... )\n", + ">>> np.column_stack((range_starts, range_ends))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad3643dd-c3a4-4bc4-92b8-0c976fea9636", + "metadata": {}, + "outputs": [], + "source": [ + ">>> ms_mdd_arr = vbt.nb.reduce_index_ranges_meta_nb(\n", + "... 1,\n", + "... range_starts,\n", + "... range_ends,\n", + "... mdd_nb,\n", + "... vbt.to_2d_array(h1_high),\n", + "... vbt.to_2d_array(h1_low)\n", + "... )\n", + ">>> ms_mdd_arr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00aeb4d1-6f86-4470-9fe0-695823118b31", + "metadata": {}, + "outputs": [], + "source": [ + ">>> pd.Series(ms_mdd_arr[:, 0], index=target_lbound_index)" + ] + }, + { + "cell_type": "markdown", + "id": "e8df7bde-fef1-421d-b09e-b454eb2286d9", + "metadata": {}, + "source": [ + "### Caveats" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "911d934a-5e38-4a62-a7f1-9a538f7b09a2", + "metadata": {}, + "outputs": [], + "source": [ + "h4_close_2d = h4_close.iloc[:12]\n", + "h4_close_2d" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "57f8392d-8fa2-4927-923f-ceb3bf1e9349", + "metadata": {}, + "outputs": [], + "source": [ + "h4_close_2d.resample(\"1d\").last()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e7e3b5d-e6e7-463c-ae65-3b0b8915afa0", + "metadata": {}, + "outputs": [], + "source": [ + "h5_close = h1_close.resample(\"5h\").last()\n", + "h5_close_2d = h5_close.iloc[:10]\n", + "h5_close_2d" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5fafb16-c6c3-4a6a-ab92-6f1d0122b603", + "metadata": {}, + "outputs": [], + "source": [ + "h5_close_2d.resample(\"1d\").last()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a45bfe1-fdcf-4e0b-b208-78aa32cc88b9", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.timedelta(\"1d\") % vbt.timedelta(\"1h\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab874a51-5c3f-442d-9b2f-238d3fc810ab", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.timedelta(\"1d\") % vbt.timedelta(\"4h\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f8129ea-1f41-412c-a86b-bde7bc42af2b", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.timedelta(\"1d\") % vbt.timedelta(\"5h\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcd15b36-44f9-4642-a976-9a341d12f2f5", + "metadata": {}, + "outputs": [], + "source": [ + "h5_close_time = h5_close_2d.index.shift() - pd.Timedelta(nanoseconds=1)\n", + "h5_close_time.name = \"Close time\"\n", + "h5_close_2d.index = h5_close_time\n", + "h5_close_2d" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62befe68-5743-4901-b6eb-b654be33148b", + "metadata": {}, + "outputs": [], + "source": [ + "h5_close_2d.resample(\"1d\").last()" + ] + }, + { + "cell_type": "markdown", + "id": "6f5ad0b0-fe77-44c1-bcc2-4364c7ca15a3", + "metadata": {}, + "source": [ + "### Portfolio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c8b2cf1-a1e2-4acf-8eee-5568d93005eb", + "metadata": {}, + "outputs": [], + "source": [ + "fast_sma = vbt.talib(\"SMA\").run(h1_close, timeperiod=vbt.Default(10))\n", + "slow_sma = vbt.talib(\"SMA\").run(h1_close, timeperiod=vbt.Default(20))\n", + "entries = fast_sma.real_crossed_above(slow_sma.real)\n", + "exits = fast_sma.real_crossed_below(slow_sma.real)\n", + "\n", + "pf = vbt.Portfolio.from_signals(h1_close, entries, exits)\n", + "pf.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e50332b-8357-4b8d-9236-b2099eb13b62", + "metadata": {}, + "outputs": [], + "source": [ + "ms_pf = pf.resample(\"M\")\n", + "ms_pf.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "695b0602-710b-49ac-bf01-8eb981481dc7", + "metadata": {}, + "outputs": [], + "source": [ + "pf.total_return" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c10ebcb-64ab-439d-b71e-0ab9e2645c6e", + "metadata": {}, + "outputs": [], + "source": [ + "ms_pf.total_return" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9821141f-89ad-465d-980c-632f0e8a4c17", + "metadata": {}, + "outputs": [], + "source": [ + "(1 + pf.returns).resample(vbt.offset(\"M\")).apply(lambda x: x.prod() - 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac16b57c-2148-4e85-a2a9-21909209c463", + "metadata": {}, + "outputs": [], + "source": [ + "ms_pf.returns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5725b28-494d-4748-8959-27906374a239", + "metadata": {}, + "outputs": [], + "source": [ + "ms_pf.trades.pnl.to_pd(reduce_func_nb=\"sum\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a175390c-359d-49d9-8892-cfcce10e464b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/to_explore/notebooks/PQN_MTF.ipynb b/to_explore/notebooks/PQN_MTF.ipynb new file mode 100644 index 0000000..94a51e0 --- /dev/null +++ b/to_explore/notebooks/PQN_MTF.ipynb @@ -0,0 +1,372 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2000b27d-9007-44ad-af8a-8a070d3d0e72", + "metadata": {}, + "source": [ + "# How to backtest a multi-timeframe strategy" + ] + }, + { + "cell_type": "markdown", + "id": "44334242-c4cd-4656-987b-7e407ab99c76", + "metadata": {}, + "source": [ + "Multi-timeframe (MTF) analysis is an essential trading approach that involves analyzing an asset's price in different timeframes." + ] + }, + { + "cell_type": "markdown", + "id": "208ebc3d-f05a-4f28-95cb-3a26c89583cd", + "metadata": {}, + "source": [ + "Despite its popularity, MTF analysis comes with several pitfalls when working with arrays, including look-ahead bias and information loss." + ] + }, + { + "cell_type": "markdown", + "id": "0a915599-672d-4609-b800-c3b32c38bcdd", + "metadata": {}, + "source": [ + "Many native pandas implementations mistakenly assume that events, such as indicator calculations, take place at the same timestamp as the data provided by the exchange, which is typically the opening time of a bar." + ] + }, + { + "cell_type": "markdown", + "id": "b0a48b90-2dbe-4f1a-8924-6cddb8ba9e8c", + "metadata": {}, + "source": [ + "VBT operates under the assumption that the exact timing of most events is unknown and occurs at some point between the opening (best-case) and closing (worst-case) times of a bar. Consequently, VBT employs a set of features designed to resample data in the most sensitive way, without looking into the future." + ] + }, + { + "cell_type": "markdown", + "id": "e964afe5-96c3-4048-96b8-81ed67bdd138", + "metadata": {}, + "source": [ + "In today's newsletter, we'll use VectorBT PRO to backtest trading on multiple timeframes simultaneously." + ] + }, + { + "cell_type": "markdown", + "id": "f759738c-4e8e-471b-b668-65962de10b2f", + "metadata": {}, + "source": [ + "## Imports and set up" + ] + }, + { + "cell_type": "markdown", + "id": "4b057b37-ca17-4430-bec6-58181442610c", + "metadata": {}, + "source": [ + "In the newer versions of VBT PRO, the star-import (*) loads all the relevant stuff for us, such as `np` for NumPy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "369e2d5c-1daa-4c54-ac86-ab91455b6f75", + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro import *" + ] + }, + { + "cell_type": "markdown", + "id": "efd90f21-1fa5-4c34-b903-73ec2fb5e163", + "metadata": {}, + "source": [ + "Configure our graphs to be dark and gap-free." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "765bee33-e30a-4dde-ae59-40c0ab5b5b3d", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.settings.set_theme(\"dark\")\n", + "vbt.settings.plotting.auto_rangebreaks = True" + ] + }, + { + "cell_type": "markdown", + "id": "dacf0e6a-061c-4c45-9a2d-edb919cce63e", + "metadata": {}, + "source": [ + "Grab the data of a higher frequency for your favorite asset. We'll use hourly TSLA." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16bf34cc-5386-44fa-9af1-7a8969d3f4b9", + "metadata": {}, + "outputs": [], + "source": [ + "data = vbt.YFData.pull(\"TSLA\", start=\"2023\", end=\"2024\", timeframe=\"hourly\")" + ] + }, + { + "cell_type": "markdown", + "id": "4681de1a-5101-4df7-90cf-ceffa53a6112", + "metadata": {}, + "source": [ + "## Multi-timeframe indicators" + ] + }, + { + "cell_type": "markdown", + "id": "8c7c88cb-43dd-459f-ac82-5a5614e26dd2", + "metadata": {}, + "source": [ + "Instruct VBT to calculate the fast and slow SMA indicators across multiple timeframes." + ] + }, + { + "cell_type": "markdown", + "id": "ad633ac3-6820-4d64-ba6f-8a480f3eaeb8", + "metadata": {}, + "source": [ + "Under the hood, data is first resampled to the target timeframe; then, the actual TA-Lib indicator is applied exclusively to non-missing values. Finally, the result is realigned back to the original timeframe in a manner that eliminates the possibility of look-ahead bias." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6780b81-129f-40b3-8708-679c0fc91f29", + "metadata": {}, + "outputs": [], + "source": [ + "fast_sma = data.run(\n", + " \"talib:sma\", \n", + " timeframe=[\"1h\", \"4h\", \"1d\"], \n", + " timeperiod=vbt.Default(20),\n", + " skipna=True\n", + ")\n", + "slow_sma = data.run(\n", + " \"talib:sma\", \n", + " timeframe=[\"1h\", \"4h\", \"1d\"], \n", + " timeperiod=vbt.Default(50),\n", + " skipna=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d6ad5284-3fc3-4340-b925-28ac252bde69", + "metadata": {}, + "source": [ + "The result of each call is a DataFrame with three columns, one for each timeframe." + ] + }, + { + "cell_type": "markdown", + "id": "f855ae03-5539-4d4d-8bd3-fe2709a33258", + "metadata": {}, + "source": [ + "If we plot the DataFrame, we'll observe that the line corresponding to the highest frequency is smooth, whereas the line representing the lowest frequency appears stepped since the indicator values are updated less frequently." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec9149ca-2c71-4b4a-97e5-6970cd3b0f44", + "metadata": {}, + "outputs": [], + "source": [ + "fast_sma.real.vbt.plot().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "4ac4ea89-6cc9-48e6-91a6-522fd4922279", + "metadata": {}, + "source": [ + "## Unified portfolio" + ] + }, + { + "cell_type": "markdown", + "id": "015840e4-cd91-4fb3-97cc-389d39cf9e9d", + "metadata": {}, + "source": [ + "Next, we'll set up a portfolio in which we go long whenever the fast SMA crosses above the slow SMA and go short when the opposite occurs, across each timeframe." + ] + }, + { + "cell_type": "markdown", + "id": "6d998b85-66d7-4149-8160-ee5fff38dc6f", + "metadata": {}, + "source": [ + "However, since hourly signals occur more frequently than daily signals, we'll allocate less capital to more frequent signals. For instance, we'll allocate 5% of the equity to hourly signals, 10% to 4-hour signals, and 20% to daily signals." + ] + }, + { + "cell_type": "markdown", + "id": "9aa20269-769d-48ec-ba70-da1c73b508bb", + "metadata": {}, + "source": [ + "We'll begin with a cash balance of $10,000, shared across all timeframes. Additionally, we'll implement a 20% trailing stop loss (TSL)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a8eb991-51de-46de-8650-06e1d1c141d2", + "metadata": {}, + "outputs": [], + "source": [ + "pf = vbt.PF.from_signals(\n", + " data, \n", + " long_entries=fast_sma.real_crossed_above(slow_sma), \n", + " short_entries=fast_sma.real_crossed_below(slow_sma), \n", + " size=[[0.05, 0.1, 0.2]],\n", + " size_type=\"valuepercent\",\n", + " init_cash=10_000,\n", + " group_by=[\"pf\", \"pf\", \"pf\"],\n", + " cash_sharing=True,\n", + " tsl_stop=0.2\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d613367f-2eab-4b8e-8fb0-3ae9b4c278e6", + "metadata": {}, + "source": [ + "Plot the cumulative return for each timeframe and compare these to the cumulative return of the entire portfolio." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d39b31f1-3f88-458b-ac20-88ed7c383bdb", + "metadata": {}, + "outputs": [], + "source": [ + "fig = pf.get_cumulative_returns().vbt.plot(trace_kwargs=dict(line_color=\"gray\", line_dash=\"dot\"))\n", + "fig = pf.get_cumulative_returns(group_by=False).vbt.plot(fig=fig)\n", + "fig.show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "9ab33146-d1ac-48ad-bd0b-948c67342dd6", + "metadata": {}, + "source": [ + "To delve deeper into one of the timeframes, we can plot the indicators alongside the executed trade signals." + ] + }, + { + "cell_type": "markdown", + "id": "5c0f7a7c-8a40-4528-869a-a2ffdfd60735", + "metadata": {}, + "source": [ + "Here, we can observe that the majority of positions on the daily timeframe were closed out by the TSL." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0dbd82e-1b70-4373-904f-a023d9983782", + "metadata": {}, + "outputs": [], + "source": [ + "fig = fast_sma.real.vbt.plot(column=\"1d\", trace_kwargs=dict(name=\"Fast\", line_color=\"limegreen\"))\n", + "fig = slow_sma.real.vbt.plot(column=\"1d\", trace_kwargs=dict(name=\"Slow\", line_color=\"orangered\"), fig=fig)\n", + "fig = pf.plot_trade_signals(column=\"1d\", fig=fig)\n", + "fig.show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "9f1fd30d-a43a-4435-a64c-35eb98c40b6c", + "metadata": {}, + "source": [ + "## Timeframe product" + ] + }, + { + "cell_type": "markdown", + "id": "989f08bb-14b2-4630-84a7-126920c69918", + "metadata": {}, + "source": [ + "Since our MTF indicators share the same index, we can combine one timeframe with another. For instance, we can generate signals from the crossover of two timeframes and identify the pair of timeframes that yield the highest expectancy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05d70299-4215-49bd-82ec-d52dbc29f678", + "metadata": {}, + "outputs": [], + "source": [ + "fast_sma_real = fast_sma.real.vbt.rename_levels({\"sma_timeframe\": \"fast_sma_timeframe\"})\n", + "slow_sma_real = slow_sma.real.vbt.rename_levels({\"sma_timeframe\": \"slow_sma_timeframe\"})\n", + "fast_sma_real, slow_sma_real = fast_sma_real.vbt.x(slow_sma_real)\n", + "long_entries = fast_sma_real.vbt.crossed_above(slow_sma_real)\n", + "short_entries = fast_sma_real.vbt.crossed_below(slow_sma_real)\n", + "pf = vbt.PF.from_signals(data, long_entries=long_entries, short_entries=short_entries)\n", + "pf.trades.expectancy.sort_values(ascending=False)" + ] + }, + { + "cell_type": "markdown", + "id": "2e573717-8f4c-402b-aef6-cab06f9df044", + "metadata": {}, + "source": [ + "## Next steps" + ] + }, + { + "cell_type": "markdown", + "id": "9b042ef0-9041-4722-bde7-bea35c9ca2ec", + "metadata": {}, + "source": [ + "Timeframe is yet another parameter of your strategy that can be tweaked. For example, you can go to uncharted territory and test more unconventional timeframes like \"1h 30min\" to discover potentially novel insights. Similar to other parameters, timeframes should also undergo cross-validation." + ] + }, + { + "cell_type": "markdown", + "id": "9233c24e-59f5-4947-b1d5-5c4b5c409ebe", + "metadata": {}, + "source": [ + "However, unlike regular parameters, timeframes should be regarded as a distinct dimension that provides a unique perspective on your strategy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9395716f-2f53-4e0a-a48b-205bdbe6c1fd", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/to_explore/notebooks/PQN_ParamCV.ipynb b/to_explore/notebooks/PQN_ParamCV.ipynb new file mode 100644 index 0000000..656ea73 --- /dev/null +++ b/to_explore/notebooks/PQN_ParamCV.ipynb @@ -0,0 +1,316 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b9d33f66-67f4-483f-9613-87b3298a8fc1", + "metadata": {}, + "source": [ + "# How to cross-validate a parameterized trading strategy" + ] + }, + { + "cell_type": "markdown", + "id": "23954d08-d7cf-43ec-84b2-cd6dbbfc5c15", + "metadata": {}, + "source": [ + "Trading strategies often rely on parameters. Enhancing and effectively cross-validating these parameters can provide a competitive advantage in the market. However, creating a reliable cross-validation schema is challenging due to risks like look-ahead bias and other pitfalls that can lead to overestimating a strategy's performance. With [VectorBT PRO](https://vectorbt.pro/), you can easily access and implement a variety of sophisticated cross-validation methods with just a few lines of code." + ] + }, + { + "cell_type": "markdown", + "id": "d522552e-dd90-46de-a579-6df68154f91b", + "metadata": {}, + "source": [ + "## Imports and data" + ] + }, + { + "cell_type": "markdown", + "id": "1d482486-ce8d-4474-8623-7a08dfba157c", + "metadata": {}, + "source": [ + "Let's import VBT PRO and the few libraries relevant for our analysis." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "039c1b36-bb5a-4272-8887-d2ff7770184e", + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro import *\n", + "# whats_imported()\n", + "\n", + "vbt.settings.set_theme(\"dark\")" + ] + }, + { + "cell_type": "markdown", + "id": "f0b6912a-09c7-4c72-b381-c35135ca627f", + "metadata": {}, + "source": [ + "The first step involves acquiring data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f432668-8698-4c21-a9b4-d8c4c40972e9", + "metadata": {}, + "outputs": [], + "source": [ + "SYMBOL = \"AAPL\"\n", + "START = \"2010\"\n", + "END = \"now\"\n", + "TIMEFRAME = \"day\"\n", + "\n", + "data = vbt.YFData.pull(SYMBOL, start=START, end=END, timeframe=TIMEFRAME)" + ] + }, + { + "cell_type": "markdown", + "id": "9058618c-6395-4bbc-8e7a-b0cdc6394a85", + "metadata": {}, + "source": [ + "## Cross-validation schema" + ] + }, + { + "cell_type": "markdown", + "id": "ea50af21-ff28-4d40-870b-7fcdf6e155e1", + "metadata": {}, + "source": [ + "Next, we'll set up a \"splitter,\" which divides a date range into smaller segments according to a chosen schema. For instance, let's allocate 12 months for training data and another 12 months for testing data, with this cycle repeating every 3 months." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc405ebe-ae7d-4687-8470-7d1dbf91aad6", + "metadata": {}, + "outputs": [], + "source": [ + "TRAIN = 12\n", + "TEST = 12\n", + "EVERY = 3\n", + "OFFSET = vbt.offset(\"M\")\n", + "\n", + "splitter = vbt.Splitter.from_ranges(\n", + " data.index, \n", + " every=EVERY * OFFSET, \n", + " lookback_period=(TRAIN + TEST) * OFFSET,\n", + " split=(\n", + " vbt.RepFunc(lambda index: index < index[0] + TRAIN * OFFSET),\n", + " vbt.RepFunc(lambda index: index >= index[0] + TRAIN * OFFSET),\n", + " ),\n", + " set_labels=[\"train\", \"test\"]\n", + ")\n", + "splitter.plots().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "7a749de2-6f58-4246-b308-ad64c6b03f90", + "metadata": {}, + "source": [ + "In the first subplot, we see that each split (or row) contains adjacent training and testing sets, progressively rolling from past to present. The second subplot illustrates the overlap of each data point across different ranges. Tip: For non-overlapping testing sets, use the setting `EVERY = TRAIN`." + ] + }, + { + "cell_type": "markdown", + "id": "ab6ed776-f682-43c8-a6d7-489101f82c70", + "metadata": {}, + "source": [ + "## Objective function" + ] + }, + { + "cell_type": "markdown", + "id": "9ea17ccf-77e0-4d6c-af87-4c2cdb788d4f", + "metadata": {}, + "source": [ + "Next, we'll create a function to execute a trading strategy within a specified date range using a single parameter set, returning one key metric. Our strategy will be a simple EMA crossover combined with an ATR trailing stop." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6a0ac65-5336-4600-870d-dd9202ef78ec", + "metadata": {}, + "outputs": [], + "source": [ + "def objective(data, fast_period=10, slow_period=20, atr_period=14, atr_mult=3):\n", + " fast_ema = data.run(\"talib:ema\", fast_period, short_name=\"fast_ema\", unpack=True)\n", + " slow_ema = data.run(\"talib:ema\", slow_period, short_name=\"slow_ema\", unpack=True)\n", + " atr = data.run(\"talib:atr\", atr_period, unpack=True)\n", + " pf = vbt.PF.from_signals(\n", + " data, \n", + " entries=fast_ema.vbt.crossed_above(slow_ema), \n", + " exits=fast_ema.vbt.crossed_below(slow_ema), \n", + " tsl_stop=atr * atr_mult, \n", + " save_returns=True,\n", + " freq=TIMEFRAME\n", + " )\n", + " return pf.sharpe_ratio\n", + "\n", + "print(objective(data))" + ] + }, + { + "cell_type": "markdown", + "id": "dc93d52c-a4e5-4122-a8e6-51ef5d52adbb", + "metadata": {}, + "source": [ + "## Parameter optimization" + ] + }, + { + "cell_type": "markdown", + "id": "41b0f21c-e99e-416a-b2da-dcdcbf7e7fed", + "metadata": {}, + "source": [ + "Let's harness the power of VBT PRO! By decorating (or wrapping) our function with `parameterized`, we enable `objective` to accept a list of parameters and execute them across all combinations. We'll then further enhance the function with another decorator, `split`, which runs the strategy on each date range specified by the splitter. This approach allows us to apply our strategy across every possible date range and parameter combination, compiling the outcomes into a single Pandas Series." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e150f68a-9ce3-432f-81ee-566acdfdd2dc", + "metadata": {}, + "outputs": [], + "source": [ + "param_objective = vbt.parameterized(\n", + " objective,\n", + " merge_func=\"concat\",\n", + " mono_n_chunks=\"auto\", # merge parameter combinations into chunks\n", + " execute_kwargs=dict(warmup=True, engine=\"pathos\") # run chunks in parallel using Pathos\n", + ")\n", + "cv_objective = vbt.split(\n", + " param_objective,\n", + " splitter=splitter, \n", + " takeable_args=[\"data\"], # select date range from data\n", + " merge_func=\"concat\", \n", + ")\n", + "\n", + "sharpe_ratio = cv_objective(\n", + " data,\n", + " vbt.Param(np.arange(10, 50), condition=\"slow_period - fast_period >= 5\"),\n", + " vbt.Param(np.arange(10, 50)),\n", + " vbt.Param(np.arange(10, 50), condition=\"fast_period <= atr_period <= slow_period\"),\n", + " vbt.Param(np.arange(2, 5))\n", + ")\n", + "print(sharpe_ratio)" + ] + }, + { + "cell_type": "markdown", + "id": "a5558952-d14d-49a3-9a2d-21152eb3bcdd", + "metadata": {}, + "source": [ + "We tested over 3 million combinations of date ranges and parameters in just a few minutes." + ] + }, + { + "cell_type": "markdown", + "id": "dbb436c2-6428-48d3-9c58-67e9293c7333", + "metadata": {}, + "source": [ + "## Analysis" + ] + }, + { + "cell_type": "markdown", + "id": "d92c269d-9641-4bf4-a86d-c415343615d5", + "metadata": {}, + "source": [ + "Let's find out if there's a correlation between the results of the training and testing sets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d52a439-cee9-4e6d-9ef6-48dd050e8c7a", + "metadata": {}, + "outputs": [], + "source": [ + "train_sharpe_ratio = sharpe_ratio.xs(\"train\", level=\"set\")\n", + "test_sharpe_ratio = sharpe_ratio.xs(\"test\", level=\"set\")\n", + "print(train_sharpe_ratio.corr(test_sharpe_ratio))" + ] + }, + { + "cell_type": "markdown", + "id": "2c2950f5-bd78-4bbc-b78d-bf3bb7f31f02", + "metadata": {}, + "source": [ + "The analysis indicates a weak negative correlation or no substantial correlation. This suggests that the strategy tends to perform oppositely compared to its results in previous months." + ] + }, + { + "cell_type": "markdown", + "id": "f565e4ac-2cac-4d81-8b82-8f375333e1ad", + "metadata": {}, + "source": [ + "And here's an analysis segmented by fast and slow EMA periods. It highlights the minimal variation in the Sharpe ratio from the training to the testing set across at least 50% of the splits, where blue indicates a positive change." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58be0727-3e27-45bf-bca9-de24674eb5ea", + "metadata": {}, + "outputs": [], + "source": [ + "sharpe_ratio_diff = test_sharpe_ratio - train_sharpe_ratio\n", + "sharpe_ratio_diff_median = sharpe_ratio_diff.groupby([\"fast_period\", \"slow_period\"]).median()\n", + "sharpe_ratio_diff_median.vbt.heatmap(trace_kwargs=dict(colorscale=\"RdBu\")).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "c5423a9e-289e-421f-886e-753577b4ba13", + "metadata": {}, + "source": [ + "## Conclusion" + ] + }, + { + "cell_type": "markdown", + "id": "cfdc909c-6559-4eba-801c-d0792068aba2", + "metadata": {}, + "source": [ + "Although you might have developed a promising strategy on paper, cross-validating it is essential to confirm its consistent performance over time and to ensure it's not merely a result of random fluctuations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1eb4e7a-4698-4c2c-896f-1c8d18a4c3ee", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/to_explore/notebooks/PQN_Patterns.ipynb b/to_explore/notebooks/PQN_Patterns.ipynb new file mode 100644 index 0000000..3c251c4 --- /dev/null +++ b/to_explore/notebooks/PQN_Patterns.ipynb @@ -0,0 +1,499 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a7f13baa-bf3a-41e2-b4f4-bef957746b6a", + "metadata": {}, + "source": [ + "# How to backtest chart patterns with VectorBT PRO" + ] + }, + { + "cell_type": "markdown", + "id": "1e5237a6-cb1e-42b2-8b74-841af2e8859a", + "metadata": {}, + "source": [ + "VectorBT PRO (https://vectorbt.pro/) is a proprietary Python package designed for backtesting and analyzing quantitative trading strategies. It provides a comprehensive suite of tools for every stage of an algorithmic trading workflow, including data acquisition, signal generation and analysis, portfolio optimization, strategy simulation, hyperparameter tuning, and cross-validation. These modular components empower users to flexibly customize their analysis, setting it apart from monolithic backtesting frameworks." + ] + }, + { + "cell_type": "markdown", + "id": "51ad2b2b-3ffa-4600-9f03-547f83d8babb", + "metadata": {}, + "source": [ + "One of these components is a data pattern detector that efficiently scans data using variable-length windows, assessing their similarity to a specified pattern. This process, optimized with Numba (https://numba.pydata.org/), operates on any hardware without the need for machine learning. To showcase the detector's capabilities, we will conduct backtesting on a range of patterns and their combinations on a single dataset." + ] + }, + { + "cell_type": "markdown", + "id": "36f9e6a9-eedf-4595-b214-2d00f02d9c90", + "metadata": {}, + "source": [ + "## Imports and set up" + ] + }, + { + "cell_type": "markdown", + "id": "33459b0c-c21f-4251-b13b-6492c9171f6c", + "metadata": {}, + "source": [ + "Due to VectorBT PRO's self-contained design, only minimal imports are necessary." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc8d53d7-0290-4e6c-b760-6c9ba8a6873e", + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro import *\n", + "# whats_imported()\n", + "\n", + "vbt.settings.set_theme(\"dark\")" + ] + }, + { + "cell_type": "markdown", + "id": "0aec0980-6ee2-41b1-a713-4a062a823fe5", + "metadata": {}, + "source": [ + "VectorBT PRO features built-in data downloading from sources such as Yahoo Finance, Alpaca, Polygon, TradingView, and many more. We will perform pattern detection on hourly price data pulled from TradingView." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d12bcb35-95ba-424e-8dfc-0e9edff8df99", + "metadata": {}, + "outputs": [], + "source": [ + "symbols = [\n", + " \"NASDAQ:META\",\n", + " \"NASDAQ:AMZN\",\n", + " \"NASDAQ:AAPL\",\n", + " \"NASDAQ:NFLX\",\n", + " \"NASDAQ:GOOG\",\n", + "]\n", + "\n", + "data = vbt.TVData.pull(symbols, timeframe=\"hourly\")" + ] + }, + { + "cell_type": "markdown", + "id": "77e48d78-436d-4a52-95d4-8ff8c1e8ff4c", + "metadata": {}, + "source": [ + "TradingView does not offer the option to specify a date range in advance, so we will need to select it afterward." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b96e37aa-3c00-4373-8030-ca3d97f872b1", + "metadata": {}, + "outputs": [], + "source": [ + "start_date = \"2020\"\n", + "end_date = None\n", + "\n", + "data = data.xloc[start_date:end_date]" + ] + }, + { + "cell_type": "markdown", + "id": "3c9c8009-3a78-4799-bc98-2bd191e22851", + "metadata": {}, + "source": [ + "Ensure that our data spans the correct date period and is free of NaN values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76b530eb-f42e-4bdf-b270-20298a66eb6b", + "metadata": {}, + "outputs": [], + "source": [ + "print(data.stats())" + ] + }, + { + "cell_type": "markdown", + "id": "4cf31468-ce25-4284-b0c6-dec873e62268", + "metadata": {}, + "source": [ + "As pattern detection requires only a single time series, we must choose the suitable feature. We'll utilize HLC/3, which effectively captures price fluctuations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "650c3662-684c-4e57-b7fa-45ba8b2f7f1d", + "metadata": {}, + "outputs": [], + "source": [ + "price = data.hlc3" + ] + }, + { + "cell_type": "markdown", + "id": "74dcad43-dd64-435a-9a9d-591681514209", + "metadata": {}, + "source": [ + "## Define patterns" + ] + }, + { + "cell_type": "markdown", + "id": "f387c42a-1224-46d9-9397-b6479e6e21e7", + "metadata": {}, + "source": [ + "Numerous chart patterns can be translated into numerical sequences, like the \"Double Top\" pattern (https://www.investopedia.com/terms/d/doubletop.asp) represented as [1, 3, 2, 3, 1]. It's important to note that while the numbers themselves can be arbitrary, their relative spacing should mirror the relative distance between the pattern's chart points. For instance, in this sequence, 2 aligns with the midpoint between valley point 1 and peak point 3. The same principle applies to temporal distribution: points should be equidistant from one another." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40827a2a-ee12-4feb-9f6a-4505ed24060d", + "metadata": {}, + "outputs": [], + "source": [ + "bullish_patterns = {\n", + " \"double_bottom\": [5, 1, 3, 1, 5],\n", + " \"exp_triangle\": [3, 4, 2, 5, 1, 6],\n", + " \"asc_triangle\": [1, 5, 2, 5, 3, 6],\n", + " \"symm_triangle\": [1, 6, 2, 5, 3, 6],\n", + " \"pennant\": [6, 1, 5, 2, 4, 3, 6]\n", + "}\n", + "bearish_patterns = {\n", + " \"head_and_shoulders\": [1, 4, 2, 6, 2, 4, 1],\n", + " \"double_top\": [1, 5, 3, 5, 1],\n", + " \"desc_triangle\": [6, 2, 5, 2, 4, 1],\n", + " \"symm_triangle\": [6, 1, 5, 2, 4, 1],\n", + " \"pennant\": [1, 6, 2, 5, 3, 4, 1]\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "af76a114-d588-443a-8c62-19274c97c416", + "metadata": {}, + "source": [ + "Confirm the visual representation of a pattern by plotting its corresponding line graph." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0114e669-fff0-48b1-922b-412ad6941914", + "metadata": {}, + "outputs": [], + "source": [ + "pd.Series(bullish_patterns[\"double_bottom\"]).vbt.plot().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "11172c01-2675-4c12-ab51-ae21137c097a", + "metadata": {}, + "source": [ + "Each generated sequence serves as a rough approximation of the desired chart pattern, and there's no need for precise adjustments: VectorBT PRO's similarity-based algorithm is flexible and can identify patterns, even if they are not perfectly consistent in their design." + ] + }, + { + "cell_type": "markdown", + "id": "4292665d-4168-436a-a59d-94b42bfd9482", + "metadata": {}, + "source": [ + "## Detect patterns in data" + ] + }, + { + "cell_type": "markdown", + "id": "0a355587-347a-4f4f-9f7a-fa041127f36a", + "metadata": {}, + "source": [ + "Iterate through each pattern, dataset, and timestamp within the dataset. Search for matches within windows spanning from 1 to 30 days, and create a record for each match that exceeds a pre-defined minimum similarity score, which is set by default to 85%." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1a9af95-b7d1-4f29-9a6b-b40d57e5f597", + "metadata": {}, + "outputs": [], + "source": [ + "min_window = 24\n", + "max_window = 24 * 30\n", + "\n", + "def detect_patterns(patterns):\n", + " return vbt.PatternRanges.from_pattern_search(\n", + " price,\n", + " open=data.open, # OHLC for plotting\n", + " high=data.high,\n", + " low=data.low,\n", + " close=data.close,\n", + " pattern=patterns,\n", + " window=min_window,\n", + " max_window=max_window,\n", + " execute_kwargs=dict( # multithreading\n", + " engine=\"threadpool\", \n", + " chunk_len=\"auto\", \n", + " )\n", + " )\n", + "\n", + "bullish_matches = detect_patterns(vbt.Param(bullish_patterns, name=\"bullish_pattern\"))\n", + "bearish_matches = detect_patterns(vbt.Param(bearish_patterns, name=\"bearish_pattern\"))" + ] + }, + { + "cell_type": "markdown", + "id": "12733006-548c-4c28-a4ac-902aa066f0b3", + "metadata": {}, + "source": [ + "In just several minutes, VectorBT PRO seamlessly detected matches among all patterns. This process, involving around 230 million unique pattern and window combinations, was executed in parallel." + ] + }, + { + "cell_type": "markdown", + "id": "714ddd1f-f5a5-420e-9d4d-707e4b5e4685", + "metadata": {}, + "source": [ + "Get the number of matches for each pattern and dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77c5957e-a906-4c0c-998a-5b2e92fd652d", + "metadata": {}, + "outputs": [], + "source": [ + "print(bullish_matches.count())" + ] + }, + { + "cell_type": "markdown", + "id": "88b7627b-f48c-4d51-986e-cc269abf9604", + "metadata": {}, + "source": [ + "Plot the pattern and dataset with the most matches." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "708b0d74-8c5e-4921-87e2-4704050ce7ed", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.settings.plotting.auto_rangebreaks = True # for stocks\n", + "\n", + "display_column = bullish_matches.count().idxmax()\n", + "\n", + "bullish_matches.plot(column=display_column, fit_ranges=True).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "af6a60cf-0d98-49e5-ad48-cc872f6d2ce9", + "metadata": {}, + "source": [ + "Zoom in on a match." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f22c6c82-cc7c-4d5d-94b9-e14753e82072", + "metadata": {}, + "outputs": [], + "source": [ + "display_match = 3\n", + "\n", + "bullish_matches.plot(column=display_column, fit_ranges=display_match).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "45f49c51-bd6f-4952-8ff4-76f6ebc00f7f", + "metadata": {}, + "source": [ + "The window data closely aligns with the pattern. This functionality is highly comprehensive, offering the flexibility to adjust fitness levels, modify rescaling and interpolation algorithms, and more to suit specific requirements." + ] + }, + { + "cell_type": "markdown", + "id": "16779944-3cae-44e8-a63d-36194479217c", + "metadata": {}, + "source": [ + "## Transform matches to signals" + ] + }, + { + "cell_type": "markdown", + "id": "26c9e03d-95ff-44a3-bd56-2a581673aa27", + "metadata": {}, + "source": [ + "To conduct backtesting on the identified patterns, we will convert them into signals, triggering a signal once a pattern has fully developed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce91720c-dd56-496e-aadb-faad71e1a529", + "metadata": {}, + "outputs": [], + "source": [ + "entries = bullish_matches.last_pd_mask\n", + "exits = bearish_matches.last_pd_mask" + ] + }, + { + "cell_type": "markdown", + "id": "d049c224-03b7-42fa-8927-51a502812e54", + "metadata": {}, + "source": [ + "Generate a Cartesian product of bullish and bearish patterns to systematically test each bullish pattern against each bearish pattern." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5f0a9db-632d-4705-af3f-1c33dfb6f884", + "metadata": {}, + "outputs": [], + "source": [ + "entries, exits = entries.vbt.x(exits)" + ] + }, + { + "cell_type": "markdown", + "id": "90a043fe-c990-4358-94be-b8f4b92dec4f", + "metadata": {}, + "source": [ + "Both arrays have been converted into equally-shaped DataFrames, each comprising 125 columns. Each column represents an individual backtest, encompassing three parameters: bullish pattern, bearish pattern, and symbol." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b3a1466-246b-42b5-a61d-a0ae1137c54d", + "metadata": {}, + "outputs": [], + "source": [ + "print(entries.columns)" + ] + }, + { + "cell_type": "markdown", + "id": "05669332-15a4-4ac5-b376-bdc08006d952", + "metadata": {}, + "source": [ + "## Backtest signals" + ] + }, + { + "cell_type": "markdown", + "id": "a44e90d0-f172-445a-9f4b-865444ae0cb3", + "metadata": {}, + "source": [ + "Establish a portfolio by simulating signals." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d3ec70d-73e4-407d-8ea7-2a6b0f4436ba", + "metadata": {}, + "outputs": [], + "source": [ + "pf = vbt.Portfolio.from_signals(data, entries, exits)" + ] + }, + { + "cell_type": "markdown", + "id": "ff7d821d-f20e-45c3-83d5-ad1aa2ba109b", + "metadata": {}, + "source": [ + "Get the mean total return for every combination of bullish and bearish patterns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad8b789d-c543-4e42-8df7-1a351cceda5f", + "metadata": {}, + "outputs": [], + "source": [ + "mean_total_return = pf.total_return.groupby([\"bullish_pattern\", \"bearish_pattern\"]).mean()\n", + "\n", + "print(mean_total_return)" + ] + }, + { + "cell_type": "markdown", + "id": "946aa00a-b183-496e-8a63-7f11485ad3dc", + "metadata": {}, + "source": [ + "As visual beings, let's represent these values as a heatmap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d74ac965-0461-4563-813e-56b9cce979c8", + "metadata": {}, + "outputs": [], + "source": [ + "mean_total_return.vbt.heatmap(x_level=\"bearish_pattern\", y_level=\"bullish_pattern\").show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "5d9f9706-d3da-480e-8b65-5eaa47196049", + "metadata": {}, + "source": [ + "Although the displayed performance of each pattern combination does not guarantee future results, it provides insight into how the market responded to pattern events in the past. For instance, it's noteworthy that the \"Bearish Symmetrical Triangle\" exhibited a notably bullish trend. Cross-validation and robustness testing are next essential steps for a comprehensive assessment." + ] + }, + { + "cell_type": "markdown", + "id": "2b5b8516-8620-41aa-a11c-96b48798c343", + "metadata": {}, + "source": [ + "Read more at https://vectorbt.pro/tutorials/patterns-and-projections/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9efd6597-880f-4769-a486-65e17b1c5475", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/to_explore/notebooks/PQN_Projections.ipynb b/to_explore/notebooks/PQN_Projections.ipynb new file mode 100644 index 0000000..870f05b --- /dev/null +++ b/to_explore/notebooks/PQN_Projections.ipynb @@ -0,0 +1,266 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9d9b5c91-f3a3-4709-a36f-40ecc86595d6", + "metadata": {}, + "source": [ + "# Forecasting future price trends by projecting historical price patterns" + ] + }, + { + "cell_type": "markdown", + "id": "2cffb873-e431-44f3-b243-e35969bbd2c1", + "metadata": {}, + "source": [ + "In our previous newsletter focusing on VectorBT PRO (VBT), we dived into the pattern detection capabilities of this powerful library. An additional key functionality is VBT's capacity to extrapolate identified price segments into the future and aggregate them for statistical analysis. This feature can be an invaluable tool for real-time decision-making in market analysis." + ] + }, + { + "cell_type": "markdown", + "id": "c472968b-1863-4d79-a299-ec67c1757455", + "metadata": {}, + "source": [ + "## Imports and set up" + ] + }, + { + "cell_type": "markdown", + "id": "ddf68612-622b-4803-87fc-a1ad80341536", + "metadata": {}, + "source": [ + "Given the self-contained design of VBT, a single import suffices." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a42ccb91-bc73-4ad5-9327-18c7c22af598", + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro import *\n", + "# whats_imported()\n", + "\n", + "vbt.settings.set_theme(\"dark\")" + ] + }, + { + "cell_type": "markdown", + "id": "15412fda-c27f-4820-9273-17366164b2b3", + "metadata": {}, + "source": [ + "Let's define a set of variables for our analysis." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc016fe0-5ae6-416f-bb4d-84a33a91fce8", + "metadata": {}, + "outputs": [], + "source": [ + "SYMBOL = \"BTCUSDT\"\n", + "TIMEFRAME = \"1 hour\"\n", + "START = \"one year ago\"\n", + "\n", + "LAST_N_BARS = 24\n", + "PRED_N_BARS = 12\n", + "\n", + "GIF_FNAME = \"projections.gif\"\n", + "GIF_N_BARS = 72\n", + "GIF_FPS = 4\n", + "GIF_PAD = 0.01" + ] + }, + { + "cell_type": "markdown", + "id": "e4667d70-f1d9-4f34-81ff-fdf8320477ae", + "metadata": {}, + "source": [ + "We will execute the analysis using price data retrieved from BinanceData, based on the parameters we previously defined." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b797e0ff-320d-456b-91df-1e0e369d83a9", + "metadata": {}, + "outputs": [], + "source": [ + "data = vbt.BinanceData.pull(SYMBOL, timeframe=TIMEFRAME, start=START)" + ] + }, + { + "cell_type": "markdown", + "id": "43fade8d-2d1f-492b-88bb-95facd21ceda", + "metadata": {}, + "source": [ + "## Find and plot projections" + ] + }, + { + "cell_type": "markdown", + "id": "0013fab2-d1fa-4777-99e9-2081a90444e3", + "metadata": {}, + "source": [ + "Let's write a function that analyzes the most recent price trend and employs it as a pattern to identify similar price movements in historical data. This pattern recognition function will focus exclusively on segments of price history having a comparable percentage change from their respective starting points." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f7f4ead3-c4db-47d5-8a30-3f7dbe4347dc", + "metadata": {}, + "outputs": [], + "source": [ + "def find_patterns(data):\n", + " price = data.hlc3\n", + " pattern = price.values[-LAST_N_BARS:]\n", + " pattern_ranges = price.vbt.find_pattern(\n", + " pattern=pattern,\n", + " rescale_mode=\"rebase\",\n", + " overlap_mode=\"allow\",\n", + " wrapper_kwargs=dict(freq=TIMEFRAME)\n", + " )\n", + " pattern_ranges = pattern_ranges.status_closed\n", + " return pattern_ranges\n", + "\n", + "pattern_ranges = find_patterns(data)\n", + "print(pattern_ranges.count())" + ] + }, + { + "cell_type": "markdown", + "id": "6dc1f00c-f0a2-4b74-831f-3043c14f1195", + "metadata": {}, + "source": [ + "We have identified a number of price segments that closely resemble the latest price trend. Now, we'll write a function that extracts the price data immediately succeeding each identified segment and plots these as extensions of the price trend. These subsequent segments are known as \"projections.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9fb7b02c-190a-488e-bfa6-843db23c324e", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_projections(data, pattern_ranges, **kwargs):\n", + " projection_ranges = pattern_ranges.with_delta(\n", + " PRED_N_BARS,\n", + " open=data.open,\n", + " high=data.high,\n", + " low=data.low,\n", + " close=data.close,\n", + " )\n", + " projection_ranges = projection_ranges.status_closed\n", + " return projection_ranges.plot_projections(\n", + " plot_past_period=LAST_N_BARS, \n", + " **kwargs,\n", + " )\n", + "\n", + "plot_projections(data, pattern_ranges, plot_bands=False).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "8df73436-c6ae-411b-8c44-e5764f9c1812", + "metadata": {}, + "source": [ + "As we can see, similar price movements have historically branched into a diverse set of trajectories. For a visually compelling and statistically robust forecast, we will display the confidence bands encompassing all the projections, with 60% of these projections falling between the upper and lower bands." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b97458a5-7428-4877-80c6-a522aef4b5ce", + "metadata": {}, + "outputs": [], + "source": [ + "plot_projections(data, pattern_ranges, plot_bands=True).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "9011e2c5-1745-480c-b9da-c031f6ba9ae2", + "metadata": {}, + "source": [ + "## Generate animation" + ] + }, + { + "cell_type": "markdown", + "id": "ac05a0ea-6883-4736-a815-619f76607966", + "metadata": {}, + "source": [ + "Lastly, we will compile a GIF animation that iterates through a specified range of bars, applying the aforementioned procedure to each bar within that range." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6238530e-9d06-4da4-a71d-3ae7489c2c9a", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_frame(frame_index, **kwargs):\n", + " sub_data = data.loc[:frame_index[-1]]\n", + " pattern_ranges = find_patterns(sub_data)\n", + " if pattern_ranges.count() < 3:\n", + " return None\n", + " return plot_projections(sub_data, pattern_ranges, **kwargs)\n", + "\n", + "vbt.save_animation(\n", + " GIF_FNAME,\n", + " data.index[-GIF_N_BARS:],\n", + " plot_frame,\n", + " plot_projections=False,\n", + " delta=1,\n", + " fps=GIF_FPS,\n", + " writer_kwargs=dict(loop=0),\n", + " yaxis_range=[\n", + " data.low.iloc[-GIF_N_BARS:].min() * (1 - GIF_PAD), \n", + " data.high.iloc[-GIF_N_BARS:].max() * (1 + GIF_PAD)\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "91b825fb-7e4c-4d48-ae73-bffe633a6f52", + "metadata": {}, + "source": [ + "Bear in mind that while the confidence bands describe past performance, they should not be used as guarantees of future results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "319a24bb-e210-4d02-ab2c-0ce58b3dc82c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/to_explore/notebooks/PairsTrading.ipynb b/to_explore/notebooks/PairsTrading.ipynb new file mode 100644 index 0000000..286e76c --- /dev/null +++ b/to_explore/notebooks/PairsTrading.ipynb @@ -0,0 +1,1519 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pairs trading" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro import *\n", + "# whats_imported()\n", + "\n", + "vbt.settings.set_theme(\"dark\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Selection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "SYMBOLS = vbt.BinanceData.list_symbols(\"*USDT\")\n", + "POOL_FILE = \"temp/data_pool.h5\"\n", + "START = \"2018\"\n", + "END = \"2023\"\n", + "\n", + "# vbt.remove_dir(\"temp\", with_contents=True, missing_ok=True)\n", + "vbt.make_dir(\"temp\")\n", + "\n", + "if not vbt.file_exists(POOL_FILE):\n", + " with vbt.ProgressBar(total=len(SYMBOLS)) as pbar:\n", + " collected = 0\n", + " for symbol in SYMBOLS:\n", + " try:\n", + " data = vbt.BinanceData.pull(\n", + " symbol, \n", + " start=START,\n", + " end=END,\n", + " show_progress=False,\n", + " silence_warnings=True\n", + " )\n", + " data.to_hdf(POOL_FILE)\n", + " collected += 1\n", + " except Exception:\n", + " pass\n", + " pbar.set_prefix(f\"{symbol} ({collected})\")\n", + " pbar.update()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "SELECT_START = \"2020\"\n", + "SELECT_END = \"2021\"\n", + "\n", + "data = vbt.HDFData.pull(\n", + " POOL_FILE, \n", + " start=SELECT_START, \n", + " end=SELECT_END, \n", + " silence_warnings=True\n", + ")\n", + "\n", + "print(len(data.symbols))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = data.select([\n", + " k \n", + " for k, v in data.data.items() \n", + " if not v.isnull().any().any()\n", + "])\n", + "\n", + "print(len(data.symbols))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@vbt.parameterized(\n", + " merge_func=\"concat\", \n", + " engine=\"pathos\",\n", + " distribute=\"chunks\",\n", + " n_chunks=\"auto\"\n", + ")\n", + "def coint_pvalue(close, s1, s2):\n", + " import statsmodels.tsa.stattools as ts\n", + " import numpy as np\n", + " return ts.coint(np.log(close[s1]), np.log(close[s2]))[1]\n", + "\n", + "COINT_FILE = \"temp/coint_pvalues.pickle\"\n", + "\n", + "# vbt.remove_file(COINT_FILE, missing_ok=True)\n", + "if not vbt.file_exists(COINT_FILE):\n", + " coint_pvalues = coint_pvalue(\n", + " data.close,\n", + " vbt.Param(data.symbols, condition=\"s1 != s2\"),\n", + " vbt.Param(data.symbols)\n", + " )\n", + " vbt.save(coint_pvalues, COINT_FILE)\n", + "else:\n", + " coint_pvalues = vbt.load(COINT_FILE)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "coint_pvalues = coint_pvalues.sort_values()\n", + "\n", + "print(coint_pvalues)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "S1, S2 = \"ALGOUSDT\", \"QTUMUSDT\"\n", + "\n", + "data.plot(column=\"Close\", symbol=[S1, S2], base=1).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "S1_log = np.log(data.get(\"Close\", S1))\n", + "S2_log = np.log(data.get(\"Close\", S2))\n", + "log_diff = (S2_log - S1_log).rename(\"Log diff\")\n", + "fig = log_diff.vbt.plot()\n", + "fig.add_hline(y=log_diff.mean(), line_color=\"yellow\", line_dash=\"dot\")\n", + "fig.show_svg()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "DATA_FILE = \"temp/data.pickle\"\n", + "\n", + "# vbt.remove_file(DATA_FILE, missing_ok=True)\n", + "if not vbt.file_exists(DATA_FILE):\n", + " data = vbt.BinanceData.pull(\n", + " [S1, S2], \n", + " start=SELECT_END,\n", + " end=END, \n", + " timeframe=\"hourly\"\n", + " )\n", + " vbt.save(data, DATA_FILE)\n", + "else:\n", + " data = vbt.load(DATA_FILE)\n", + "\n", + "print(len(data.index))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Level: Researcher" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import scipy.stats as st\n", + "\n", + "WINDOW = 24 * 30\n", + "UPPER = st.norm.ppf(1 - 0.05 / 2)\n", + "LOWER = -st.norm.ppf(1 - 0.05 / 2)\n", + "\n", + "S1_close = data.get(\"Close\", S1)\n", + "S2_close = data.get(\"Close\", S2)\n", + "ols = vbt.OLS.run(S1_close, S2_close, window=vbt.Default(WINDOW))\n", + "spread = ols.error.rename(\"Spread\")\n", + "zscore = ols.zscore.rename(\"Z-score\")\n", + "print(pd.concat((spread, zscore), axis=1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "upper_crossed = zscore.vbt.crossed_above(UPPER)\n", + "lower_crossed = zscore.vbt.crossed_below(LOWER)\n", + "\n", + "fig = zscore.vbt.plot()\n", + "fig.add_hline(y=UPPER, line_color=\"orangered\", line_dash=\"dot\")\n", + "fig.add_hline(y=0, line_color=\"yellow\", line_dash=\"dot\")\n", + "fig.add_hline(y=LOWER, line_color=\"limegreen\", line_dash=\"dot\")\n", + "upper_crossed.vbt.signals.plot_as_exits(zscore, fig=fig)\n", + "lower_crossed.vbt.signals.plot_as_entries(zscore, fig=fig)\n", + "fig.show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "long_entries = data.symbol_wrapper.fill(False)\n", + "short_entries = data.symbol_wrapper.fill(False)\n", + "\n", + "short_entries.loc[upper_crossed, S1] = True\n", + "long_entries.loc[upper_crossed, S2] = True\n", + "long_entries.loc[lower_crossed, S1] = True\n", + "short_entries.loc[lower_crossed, S2] = True\n", + "\n", + "print(long_entries.sum())\n", + "print(short_entries.sum())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf = vbt.Portfolio.from_signals(\n", + " data,\n", + " entries=long_entries,\n", + " short_entries=short_entries,\n", + " size=10,\n", + " size_type=\"valuepercent100\",\n", + " group_by=True,\n", + " cash_sharing=True,\n", + " call_seq=\"auto\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = pf.plot_allocations()\n", + "rebalancing_dates = data.index[np.unique(pf.orders.idx.values)]\n", + "for date in rebalancing_dates:\n", + " fig.add_vline(x=date, line_color=\"teal\", line_dash=\"dot\")\n", + "fig.show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "allocations = data.symbol_wrapper.fill()\n", + "allocations.loc[upper_crossed, S1] = -0.1\n", + "allocations.loc[upper_crossed, S2] = 0.1\n", + "allocations.loc[lower_crossed, S1] = 0.1\n", + "allocations.loc[lower_crossed, S2] = -0.1\n", + "pfo = vbt.PortfolioOptimizer.from_filled_allocations(allocations)\n", + "pfo.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(pfo.allocations)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf = pfo.simulate(data, pf_method=\"from_signals\")\n", + "pf.total_return" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "PTS_expr = \"\"\"\n", + " PTS:\n", + " x = @in_close.iloc[:, 0]\n", + " y = @in_close.iloc[:, 1]\n", + " ols = vbt.OLS.run(x, y, window=@p_window, hide_params=True)\n", + " upper = st.norm.ppf(1 - @p_upper_alpha / 2)\n", + " lower = -st.norm.ppf(1 - @p_lower_alpha / 2)\n", + " upper_crossed = ols.zscore.vbt.crossed_above(upper)\n", + " lower_crossed = ols.zscore.vbt.crossed_below(lower)\n", + " long_entries = wrapper.fill(False)\n", + " short_entries = wrapper.fill(False)\n", + " short_entries.loc[upper_crossed, x.name] = True\n", + " long_entries.loc[upper_crossed, y.name] = True\n", + " long_entries.loc[lower_crossed, x.name] = True\n", + " short_entries.loc[lower_crossed, y.name] = True\n", + " long_entries, short_entries\n", + "\"\"\"\n", + "\n", + "PTS = vbt.IF.from_expr(PTS_expr, keep_pd=True, st=st)\n", + "vbt.phelp(PTS.run)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "WINDOW_SPACE = np.arange(5, 50).tolist()\n", + "ALPHA_SPACE = (np.arange(1, 100) / 1000).tolist()\n", + "\n", + "long_entries, short_entries = data.run(\n", + " PTS, \n", + " window=WINDOW_SPACE,\n", + " upper_alpha=ALPHA_SPACE,\n", + " lower_alpha=ALPHA_SPACE,\n", + " param_product=True,\n", + " random_subset=1000,\n", + " seed=42,\n", + " unpack=True\n", + ")\n", + "print(long_entries.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf = vbt.Portfolio.from_signals(\n", + " data,\n", + " entries=long_entries,\n", + " short_entries=short_entries,\n", + " size=10,\n", + " size_type=\"valuepercent100\",\n", + " group_by=vbt.ExceptLevel(\"symbol\"),\n", + " cash_sharing=True,\n", + " call_seq=\"auto\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "opt_results = pd.concat((\n", + " pf.total_return,\n", + " pf.trades.expectancy,\n", + "), axis=1)\n", + "print(opt_results.sort_values(by=\"total_return\", ascending=False))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_index = opt_results.idxmax()[\"expectancy\"]\n", + "best_long_entries = long_entries[best_index]\n", + "best_short_entries = short_entries[best_index]\n", + "STOP_SPACE = [np.nan] + np.arange(1, 100).tolist()\n", + "\n", + "pf = vbt.Portfolio.from_signals(\n", + " data,\n", + " entries=best_long_entries,\n", + " short_entries=best_short_entries,\n", + " size=10,\n", + " size_type=\"valuepercent100\",\n", + " group_by=vbt.ExceptLevel(\"symbol\"),\n", + " cash_sharing=True,\n", + " call_seq=\"auto\",\n", + " sl_stop=vbt.Param(STOP_SPACE),\n", + " tsl_stop=vbt.Param(STOP_SPACE),\n", + " tp_stop=vbt.Param(STOP_SPACE),\n", + " delta_format=\"percent100\",\n", + " stop_exit_price=\"close\",\n", + " broadcast_kwargs=dict(random_subset=1000, seed=42)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "opt_results = pd.concat((\n", + " pf.total_return,\n", + " pf.trades.expectancy,\n", + "), axis=1)\n", + "print(opt_results.sort_values(by=\"total_return\", ascending=False))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_metric_by_stop(stop_name, metric_name, stat_name, smooth):\n", + " from scipy.signal import savgol_filter\n", + "\n", + " values = pf.deep_getattr(metric_name)\n", + " values = values.vbt.select_levels(stop_name)\n", + " values = getattr(values.groupby(values.index), stat_name)()\n", + " smooth_values = savgol_filter(values, smooth, 1)\n", + " smooth_values = values.vbt.wrapper.wrap(smooth_values)\n", + " fig = values.rename(metric_name).vbt.plot()\n", + " smooth_values.rename(f\"{metric_name} (smoothed)\").vbt.plot(\n", + " trace_kwargs=dict(line=dict(dash=\"dot\", color=\"yellow\")),\n", + " fig=fig, \n", + " )\n", + " return fig\n", + "\n", + "plot_metric_by_stop(\n", + " \"sl_stop\", \n", + " \"trades.expectancy\", \n", + " \"median\",\n", + " 10\n", + ").show_svg()\n", + "plot_metric_by_stop(\n", + " \"tsl_stop\", \n", + " \"trades.expectancy\", \n", + " \"median\",\n", + " 10\n", + ").show_svg()\n", + "plot_metric_by_stop(\n", + " \"tp_stop\", \n", + " \"trades.expectancy\", \n", + " \"median\",\n", + " 10\n", + ").show_svg()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Level: Engineer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@njit(nogil=True)\n", + "def pt_signals_nb(close, window=WINDOW, upper=UPPER, lower=LOWER):\n", + " x = np.expand_dims(close[:, 0], 1)\n", + " y = np.expand_dims(close[:, 1], 1)\n", + " _, _, zscore = vbt.ind_nb.ols_nb(x, y, window)\n", + " zscore_1d = zscore[:, 0]\n", + " upper_ts = np.full_like(zscore_1d, upper, dtype=np.float_)\n", + " lower_ts = np.full_like(zscore_1d, lower, dtype=np.float_)\n", + " upper_crossed = vbt.nb.crossed_above_1d_nb(zscore_1d, upper_ts)\n", + " lower_crossed = vbt.nb.crossed_above_1d_nb(lower_ts, zscore_1d)\n", + " long_entries = np.full_like(close, False, dtype=np.bool_)\n", + " short_entries = np.full_like(close, False, dtype=np.bool_)\n", + " short_entries[upper_crossed, 0] = True\n", + " long_entries[upper_crossed, 1] = True\n", + " long_entries[lower_crossed, 0] = True\n", + " short_entries[lower_crossed, 1] = True\n", + " return long_entries, short_entries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "long_entries, short_entries = pt_signals_nb(data.close.values)\n", + "long_entries = data.symbol_wrapper.wrap(long_entries)\n", + "short_entries = data.symbol_wrapper.wrap(short_entries)\n", + "\n", + "print(long_entries.sum())\n", + "print(short_entries.sum())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@njit(nogil=True)\n", + "def pt_portfolio_nb(\n", + " open, \n", + " high, \n", + " low, \n", + " close,\n", + " long_entries,\n", + " short_entries,\n", + " sl_stop=np.nan,\n", + " tsl_stop=np.nan,\n", + " tp_stop=np.nan,\n", + "):\n", + " target_shape = close.shape\n", + " group_lens = np.array([2])\n", + " sim_out = vbt.pf_nb.from_signals_nb(\n", + " target_shape=target_shape,\n", + " group_lens=group_lens,\n", + " auto_call_seq=True,\n", + " open=open,\n", + " high=high,\n", + " low=low,\n", + " close=close,\n", + " long_entries=long_entries,\n", + " short_entries=short_entries,\n", + " size=10,\n", + " size_type=vbt.pf_enums.SizeType.ValuePercent100,\n", + " sl_stop=sl_stop,\n", + " tsl_stop=tsl_stop,\n", + " tp_stop=tp_stop,\n", + " delta_format=vbt.pf_enums.DeltaFormat.Percent100,\n", + " stop_exit_price=vbt.pf_enums.StopExitPrice.Close\n", + " )\n", + " return sim_out" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sim_out = pt_portfolio_nb(\n", + " data.open.values,\n", + " data.high.values,\n", + " data.low.values,\n", + " data.close.values,\n", + " long_entries.values,\n", + " short_entries.values\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf = vbt.Portfolio(\n", + " data.symbol_wrapper.regroup(group_by=True),\n", + " sim_out,\n", + " open=data.open,\n", + " high=data.high,\n", + " low=data.low,\n", + " close=data.close,\n", + " cash_sharing=True,\n", + " init_cash=100\n", + ")\n", + "\n", + "print(pf.total_return)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@njit(nogil=True)\n", + "def pt_metrics_nb(close, sim_out):\n", + " target_shape = close.shape\n", + " group_lens = np.array([2])\n", + " filled_close = vbt.nb.fbfill_nb(close)\n", + " col_map = vbt.rec_nb.col_map_nb(\n", + " col_arr=sim_out.order_records[\"col\"], \n", + " n_cols=target_shape[1]\n", + " )\n", + " total_profit = vbt.pf_nb.total_profit_nb(\n", + " target_shape=target_shape,\n", + " close=filled_close,\n", + " order_records=sim_out.order_records,\n", + " col_map=col_map\n", + " )\n", + " total_profit_grouped = vbt.pf_nb.total_profit_grouped_nb(\n", + " total_profit=total_profit,\n", + " group_lens=group_lens,\n", + " )[0]\n", + " total_return = total_profit_grouped / 100\n", + " trade_records = vbt.pf_nb.get_exit_trades_nb(\n", + " order_records=sim_out.order_records, \n", + " close=filled_close, \n", + " col_map=col_map\n", + " )\n", + " trade_records = trade_records[\n", + " trade_records[\"status\"] == vbt.pf_enums.TradeStatus.Closed\n", + " ]\n", + " expectancy = vbt.pf_nb.expectancy_reduce_nb(\n", + " pnl_arr=trade_records[\"pnl\"]\n", + " )\n", + " return total_return, expectancy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pt_metrics_nb(data.close.values, sim_out)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@njit(nogil=True)\n", + "def pt_pipeline_nb(\n", + " open, \n", + " high, \n", + " low, \n", + " close,\n", + " window=WINDOW,\n", + " upper=UPPER,\n", + " lower=LOWER,\n", + " sl_stop=np.nan,\n", + " tsl_stop=np.nan,\n", + " tp_stop=np.nan,\n", + "):\n", + " long_entries, short_entries = pt_signals_nb(\n", + " close, \n", + " window=window, \n", + " upper=upper, \n", + " lower=lower\n", + " )\n", + " sim_out = pt_portfolio_nb(\n", + " open,\n", + " high,\n", + " low,\n", + " close,\n", + " long_entries,\n", + " short_entries,\n", + " sl_stop=sl_stop,\n", + " tsl_stop=tsl_stop,\n", + " tp_stop=tp_stop\n", + " )\n", + " return pt_metrics_nb(close, sim_out)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pt_pipeline_nb(\n", + " data.open.values,\n", + " data.high.values,\n", + " data.low.values,\n", + " data.close.values\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "pt_pipeline_nb(\n", + " data.open.values,\n", + " data.high.values,\n", + " data.low.values,\n", + " data.close.values\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "param_pt_pipeline = vbt.parameterized(\n", + " pt_pipeline_nb, \n", + " merge_func=\"concat\",\n", + " seed=42,\n", + " engine=\"threadpool\",\n", + " chunk_len=\"auto\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "UPPER_SPACE = [st.norm.ppf(1 - x / 2) for x in ALPHA_SPACE]\n", + "LOWER_SPACE = [-st.norm.ppf(1 - x / 2) for x in ALPHA_SPACE]\n", + "POPT_FILE = \"temp/param_opt.pickle\"\n", + "\n", + "# vbt.remove_file(POPT_FILE, missing_ok=True)\n", + "if not vbt.file_exists(POPT_FILE):\n", + " param_opt = param_pt_pipeline(\n", + " data.open.values,\n", + " data.high.values,\n", + " data.low.values,\n", + " data.close.values,\n", + " window=vbt.Param(WINDOW_SPACE),\n", + " upper=vbt.Param(UPPER_SPACE),\n", + " lower=vbt.Param(LOWER_SPACE)\n", + " )\n", + " vbt.save(param_opt, POPT_FILE)\n", + "else:\n", + " param_opt = vbt.load(POPT_FILE)\n", + "\n", + "total_return, expectancy = param_opt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(total_return)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "grouped_metric = total_return.groupby(level=[\"upper\", \"lower\"]).mean()\n", + "grouped_metric.vbt.heatmap(\n", + " trace_kwargs=dict(colorscale=\"RdBu\", zmid=0),\n", + " yaxis=dict(autorange=\"reversed\")\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@njit(nogil=True)\n", + "def pt_pipeline_mult_nb(\n", + " n_params: int,\n", + " open: tp.Array2d, \n", + " high: tp.Array2d, \n", + " low: tp.Array2d, \n", + " close: tp.Array2d,\n", + " window: tp.FlexArray1dLike = WINDOW,\n", + " upper: tp.FlexArray1dLike = UPPER,\n", + " lower: tp.FlexArray1dLike = LOWER,\n", + " sl_stop: tp.FlexArray1dLike = np.nan,\n", + " tsl_stop: tp.FlexArray1dLike = np.nan,\n", + " tp_stop: tp.FlexArray1dLike = np.nan,\n", + "):\n", + " window_ = vbt.to_1d_array_nb(np.asarray(window))\n", + " upper_ = vbt.to_1d_array_nb(np.asarray(upper))\n", + " lower_ = vbt.to_1d_array_nb(np.asarray(lower))\n", + " sl_stop_ = vbt.to_1d_array_nb(np.asarray(sl_stop))\n", + " tsl_stop_ = vbt.to_1d_array_nb(np.asarray(tsl_stop))\n", + " tp_stop_ = vbt.to_1d_array_nb(np.asarray(tp_stop))\n", + "\n", + " total_return = np.empty(n_params, dtype=np.float_)\n", + " expectancy = np.empty(n_params, dtype=np.float_)\n", + "\n", + " for i in range(n_params):\n", + " total_return[i], expectancy[i] = pt_pipeline_nb(\n", + " open,\n", + " high,\n", + " low,\n", + " close,\n", + " window=vbt.flex_select_1d_nb(window_, i),\n", + " upper=vbt.flex_select_1d_nb(upper_, i),\n", + " lower=vbt.flex_select_1d_nb(lower_, i),\n", + " sl_stop=vbt.flex_select_1d_nb(sl_stop_, i),\n", + " tsl_stop=vbt.flex_select_1d_nb(tsl_stop_, i),\n", + " tp_stop=vbt.flex_select_1d_nb(tp_stop_, i),\n", + " )\n", + " return total_return, expectancy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pt_pipeline_mult_nb(\n", + " 3,\n", + " data.open.values,\n", + " data.high.values,\n", + " data.low.values,\n", + " data.close.values,\n", + " window=np.array([10, 20, 30])\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chunked_pt_pipeline = vbt.chunked(\n", + " pt_pipeline_mult_nb,\n", + " size=vbt.ArgSizer(arg_query=\"n_params\"),\n", + " arg_take_spec=dict(\n", + " n_params=vbt.CountAdapter(),\n", + " open=None,\n", + " high=None,\n", + " low=None,\n", + " close=None,\n", + " window=vbt.FlexArraySlicer(),\n", + " upper=vbt.FlexArraySlicer(),\n", + " lower=vbt.FlexArraySlicer(),\n", + " sl_stop=vbt.FlexArraySlicer(),\n", + " tsl_stop=vbt.FlexArraySlicer(),\n", + " tp_stop=vbt.FlexArraySlicer()\n", + " ),\n", + " chunk_len=1000,\n", + " merge_func=\"concat\",\n", + " execute_kwargs=dict(\n", + " chunk_len=\"auto\",\n", + " engine=\"threadpool\",\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "param_product, param_index = vbt.combine_params(\n", + " dict(\n", + " window=vbt.Param(WINDOW_SPACE),\n", + " upper=vbt.Param(UPPER_SPACE),\n", + " lower=vbt.Param(LOWER_SPACE)\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "COPT_FILE = \"temp/chunked_opt.pickle\"\n", + "\n", + "# vbt.remove_file(COPT_FILE, missing_ok=True)\n", + "if not vbt.file_exists(COPT_FILE):\n", + " chunked_opt = chunked_pt_pipeline(\n", + " len(param_index),\n", + " data.open.values,\n", + " data.high.values,\n", + " data.low.values,\n", + " data.close.values,\n", + " window=param_product[\"window\"],\n", + " upper=param_product[\"upper\"],\n", + " lower=param_product[\"lower\"]\n", + " )\n", + " vbt.save(chunked_opt, COPT_FILE)\n", + "else:\n", + " chunked_opt = vbt.load(COPT_FILE)\n", + "\n", + "total_return, expectancy = chunked_opt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "total_return = pd.Series(total_return, index=param_index)\n", + "expectancy = pd.Series(expectancy, index=param_index)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "GRID_LEN = len(WINDOW_SPACE) * \\\n", + " len(UPPER_SPACE) * \\\n", + " len(LOWER_SPACE) * \\\n", + " len(STOP_SPACE) ** 3\n", + "print(GRID_LEN)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "GRID = dict(\n", + " window=WINDOW_SPACE,\n", + " upper=UPPER_SPACE,\n", + " lower=LOWER_SPACE,\n", + " sl_stop=STOP_SPACE,\n", + " tsl_stop=STOP_SPACE,\n", + " tp_stop=STOP_SPACE,\n", + ")\n", + "vbt.pprint(vbt.pick_from_param_grid(GRID, 123_456_789))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "FOUND_FILE = \"temp/found.pickle\"\n", + "BEST_N = 100\n", + "BEST_TH = 1.0\n", + "CHUNK_LEN = 10_000\n", + "\n", + "# vbt.remove_file(FOUND_FILE, missing_ok=True)\n", + "if vbt.file_exists(FOUND_FILE):\n", + " found = vbt.load(FOUND_FILE)\n", + "else:\n", + " found = None\n", + "with (\n", + " vbt.ProgressBar(\n", + " desc=\"Found\", \n", + " initial=0 if found is None else len(found),\n", + " total=BEST_N\n", + " ) as pbar1,\n", + " vbt.ProgressBar(\n", + " desc=\"Processed\"\n", + " ) as pbar2\n", + "):\n", + " while found is None or len(found) < BEST_N:\n", + " param_df = pd.DataFrame([\n", + " vbt.pick_from_param_grid(GRID) \n", + " for _ in range(CHUNK_LEN)\n", + " ])\n", + " param_index = pd.MultiIndex.from_frame(param_df)\n", + " _, expectancy = chunked_pt_pipeline(\n", + " CHUNK_LEN,\n", + " data.open.values,\n", + " data.high.values,\n", + " data.low.values,\n", + " data.close.values,\n", + " window=param_df[\"window\"],\n", + " upper=param_df[\"upper\"],\n", + " lower=param_df[\"lower\"],\n", + " sl_stop=param_df[\"sl_stop\"],\n", + " tsl_stop=param_df[\"tsl_stop\"],\n", + " tp_stop=param_df[\"tp_stop\"],\n", + " _chunk_len=None,\n", + " _execute_kwargs=dict(\n", + " chunk_len=None\n", + " )\n", + " )\n", + " expectancy = pd.Series(expectancy, index=param_index)\n", + " best_mask = expectancy >= BEST_TH\n", + " if best_mask.any():\n", + " best = expectancy[best_mask]\n", + " if found is None:\n", + " found = best\n", + " else:\n", + " found = pd.concat((found, best))\n", + " found = found[~found.index.duplicated(keep=\"first\")]\n", + " vbt.save(found, FOUND_FILE)\n", + " pbar1.update_to(len(found))\n", + " pbar1.refresh()\n", + " pbar2.update(len(expectancy))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_param_median(param):\n", + " return found.index.get_level_values(param).to_series().median()\n", + "\n", + "pt_pipeline_nb(\n", + " data.open.values, \n", + " data.high.values, \n", + " data.low.values, \n", + " data.close.values,\n", + " window=int(get_param_median(\"window\")),\n", + " upper=get_param_median(\"upper\"),\n", + " lower=get_param_median(\"lower\"),\n", + " sl_stop=get_param_median(\"sl_stop\"),\n", + " tsl_stop=get_param_median(\"tsl_stop\"),\n", + " tp_stop=get_param_median(\"tp_stop\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import optuna\n", + "\n", + "optuna.logging.disable_default_handler()\n", + "optuna.logging.set_verbosity(optuna.logging.WARNING)\n", + "\n", + "def objective(trial):\n", + " window = trial.suggest_categorical(\"window\", WINDOW_SPACE)\n", + " upper = trial.suggest_categorical(\"upper\", UPPER_SPACE)\n", + " lower = trial.suggest_categorical(\"lower\", LOWER_SPACE)\n", + " sl_stop = trial.suggest_categorical(\"sl_stop\", STOP_SPACE)\n", + " tsl_stop = trial.suggest_categorical(\"tsl_stop\", STOP_SPACE)\n", + " tp_stop = trial.suggest_categorical(\"tp_stop\", STOP_SPACE)\n", + " total_return, expectancy = pt_pipeline_nb(\n", + " data.open.values,\n", + " data.high.values,\n", + " data.low.values,\n", + " data.close.values,\n", + " window=window,\n", + " upper=upper,\n", + " lower=lower,\n", + " sl_stop=sl_stop,\n", + " tsl_stop=tsl_stop,\n", + " tp_stop=tp_stop\n", + " )\n", + " if np.isnan(total_return):\n", + " raise optuna.TrialPruned()\n", + " if np.isnan(expectancy):\n", + " raise optuna.TrialPruned()\n", + " return total_return, expectancy\n", + "\n", + "study = optuna.create_study(directions=[\"maximize\", \"maximize\"])\n", + "study.optimize(objective, n_trials=1000)\n", + "\n", + "trials_df = study.trials_dataframe(attrs=[\"params\", \"values\"])\n", + "trials_df.set_index([\n", + " \"params_window\", \n", + " \"params_upper\", \n", + " \"params_lower\",\n", + " \"params_sl_stop\",\n", + " \"params_tsl_stop\",\n", + " \"params_tp_stop\"\n", + "], inplace=True)\n", + "trials_df.index.rename([\n", + " \"window\", \n", + " \"upper\", \n", + " \"lower\",\n", + " \"sl_stop\",\n", + " \"tsl_stop\",\n", + " \"tp_stop\"\n", + "], inplace=True)\n", + "trials_df.columns = [\"total_return\", \"expectancy\"]\n", + "trials_df = trials_df[~trials_df.index.duplicated(keep=\"first\")]\n", + "print(trials_df.sort_values(by=\"total_return\", ascending=False))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Level: Architect" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "InOutputs = namedtuple(\"InOutputs\", [\"spread\", \"zscore\"])\n", + "\n", + "@njit(nogil=True, boundscheck=True)\n", + "def can_execute_nb(c, wait_days):\n", + " if c.order_counts[c.col] == 0:\n", + " return True\n", + " last_order = c.order_records[c.order_counts[c.col] - 1, c.col]\n", + " ns_delta = c.index[c.i] - c.index[last_order.idx]\n", + " if ns_delta >= wait_days * vbt.dt_nb.d_ns:\n", + " return True\n", + " return False\n", + "\n", + "@njit(nogil=True, boundscheck=True)\n", + "def create_signals_nb(c, upper, lower, wait_days):\n", + " _upper = vbt.pf_nb.select_nb(c, upper)\n", + " _lower = vbt.pf_nb.select_nb(c, lower)\n", + " _wait_days = vbt.pf_nb.select_nb(c, wait_days)\n", + "\n", + " if c.i > 0:\n", + " prev_zscore = c.in_outputs.zscore[c.i - 1, c.group]\n", + " zscore = c.in_outputs.zscore[c.i, c.group]\n", + " if prev_zscore < _upper and zscore > _upper:\n", + " if can_execute_nb(c, _wait_days):\n", + " if c.col % 2 == 0:\n", + " return False, False, True, False\n", + " return True, False, False, False\n", + " if prev_zscore > _lower and zscore < _lower:\n", + " if can_execute_nb(c, _wait_days):\n", + " if c.col % 2 == 0:\n", + " return True, False, False, False\n", + " return False, False, True, False\n", + " return False, False, False, False\n", + "\n", + "@njit(nogil=True, boundscheck=True)\n", + "def signal_func_nb(c, window, upper, lower, wait_days):\n", + " _window = vbt.pf_nb.select_nb(c, window)\n", + " \n", + " if c.col % 2 == 0:\n", + " x = vbt.pf_nb.select_nb(c, c.close, col=c.col)\n", + " y = vbt.pf_nb.select_nb(c, c.close, col=c.col + 1)\n", + " c.in_outputs.spread[c.i, c.group] = np.log(y) - np.log(x)\n", + " \n", + " window_start = c.i - _window + 1\n", + " window_end = c.i + 1\n", + " if window_start >= 0:\n", + " s = c.in_outputs.spread[window_start : window_end, c.group]\n", + " s_mean = np.nanmean(s)\n", + " s_std = np.nanstd(s)\n", + " c.in_outputs.zscore[c.i, c.group] = (s[-1] - s_mean) / s_std\n", + " return create_signals_nb(c, upper, lower, wait_days)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "WAIT_DAYS = 30\n", + "\n", + "def iter_pt_portfolio(\n", + " window=WINDOW, \n", + " upper=UPPER, \n", + " lower=LOWER, \n", + " wait_days=WAIT_DAYS,\n", + " signal_func_nb=signal_func_nb,\n", + " more_signal_args=(),\n", + " **kwargs\n", + "):\n", + " return vbt.Portfolio.from_signals(\n", + " data,\n", + " broadcast_named_args=dict(\n", + " window=window,\n", + " upper=upper,\n", + " lower=lower,\n", + " wait_days=wait_days\n", + " ),\n", + " in_outputs=vbt.RepEval(\"\"\"\n", + " InOutputs(\n", + " np.full((target_shape[0], target_shape[1] // 2), np.nan), \n", + " np.full((target_shape[0], target_shape[1] // 2), np.nan)\n", + " )\n", + " \"\"\", context=dict(InOutputs=InOutputs)),\n", + " signal_func_nb=signal_func_nb,\n", + " signal_args=(\n", + " vbt.Rep(\"window\"),\n", + " vbt.Rep(\"upper\"),\n", + " vbt.Rep(\"lower\"),\n", + " vbt.Rep(\"wait_days\"),\n", + " *more_signal_args\n", + " ),\n", + " size=10,\n", + " size_type=\"valuepercent100\",\n", + " group_by=vbt.ExceptLevel(\"symbol\"),\n", + " cash_sharing=True,\n", + " call_seq=\"auto\",\n", + " delta_format=\"percent100\",\n", + " stop_exit_price=\"close\",\n", + " **kwargs\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf = iter_pt_portfolio()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = vbt.make_subplots(\n", + " rows=2, \n", + " cols=1, \n", + " vertical_spacing=0,\n", + " shared_xaxes=True\n", + ")\n", + "zscore = pf.get_in_output(\"zscore\").rename(\"Z-score\")\n", + "zscore.vbt.plot(\n", + " add_trace_kwargs=dict(row=1, col=1),\n", + " fig=fig\n", + ")\n", + "fig.add_hline(row=1, y=UPPER, line_color=\"orangered\", line_dash=\"dot\")\n", + "fig.add_hline(row=1, y=0, line_color=\"yellow\", line_dash=\"dot\")\n", + "fig.add_hline(row=1, y=LOWER, line_color=\"limegreen\", line_dash=\"dot\")\n", + "orders = pf.orders.regroup(group_by=False).iloc[:, 0]\n", + "exit_mask = orders.side_sell.get_pd_mask(idx_arr=\"signal_idx\")\n", + "entry_mask = orders.side_buy.get_pd_mask(idx_arr=\"signal_idx\")\n", + "upper_crossed = zscore.vbt.crossed_above(UPPER)\n", + "lower_crossed = zscore.vbt.crossed_below(LOWER)\n", + "(upper_crossed & ~exit_mask).vbt.signals.plot_as_exits(\n", + " pf.get_in_output(\"zscore\"),\n", + " trace_kwargs=dict(\n", + " name=\"Exits (ignored)\", \n", + " marker=dict(color=\"lightgray\"), \n", + " opacity=0.5\n", + " ),\n", + " add_trace_kwargs=dict(row=1, col=1),\n", + " fig=fig\n", + ")\n", + "(lower_crossed & ~entry_mask).vbt.signals.plot_as_entries(\n", + " pf.get_in_output(\"zscore\"),\n", + " trace_kwargs=dict(\n", + " name=\"Entries (ignored)\", \n", + " marker=dict(color=\"lightgray\"), \n", + " opacity=0.5\n", + " ),\n", + " add_trace_kwargs=dict(row=1, col=1),\n", + " fig=fig\n", + ")\n", + "exit_mask.vbt.signals.plot_as_exits(\n", + " pf.get_in_output(\"zscore\"),\n", + " add_trace_kwargs=dict(row=1, col=1),\n", + " fig=fig\n", + ")\n", + "entry_mask.vbt.signals.plot_as_entries(\n", + " pf.get_in_output(\"zscore\"),\n", + " add_trace_kwargs=dict(row=1, col=1),\n", + " fig=fig\n", + ")\n", + "pf.plot_allocations(\n", + " add_trace_kwargs=dict(row=2, col=1),\n", + " fig=fig\n", + ")\n", + "rebalancing_dates = data.index[np.unique(orders.idx.values)]\n", + "for date in rebalancing_dates:\n", + " fig.add_vline(row=2, x=date, line_color=\"teal\", line_dash=\"dot\")\n", + "fig.update_layout(height=600)\n", + "fig.show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "WAIT_SPACE = np.arange(30, 370, 5).tolist()\n", + "\n", + "pf = iter_pt_portfolio(wait_days=vbt.Param(WAIT_SPACE))\n", + "pf.orders.count().vbt.scatterplot(\n", + " xaxis_title=\"Wait days\",\n", + " yaxis_title=\"Order count\"\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with (vbt.Timer() as timer, vbt.MemTracer() as tracer):\n", + " iter_pt_portfolio(wait_days=vbt.Param(WAIT_SPACE))\n", + "print(timer.elapsed())\n", + "print(tracer.peak_usage())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "zscore_state_dt = np.dtype(\n", + " [\n", + " (\"cumsum\", np.float_),\n", + " (\"cumsum_sq\", np.float_),\n", + " (\"nancnt\", np.int_)\n", + " ],\n", + " align=True,\n", + ")\n", + "\n", + "@njit(nogil=True, boundscheck=True)\n", + "def stream_signal_func_nb(\n", + " c, \n", + " window, \n", + " upper, \n", + " lower, \n", + " wait_days, \n", + " zscore_state\n", + "):\n", + " _window = vbt.pf_nb.select_nb(c, window)\n", + " \n", + " if c.col % 2 == 0:\n", + " x = vbt.pf_nb.select_nb(c, c.close, col=c.col)\n", + " y = vbt.pf_nb.select_nb(c, c.close, col=c.col + 1)\n", + " c.in_outputs.spread[c.i, c.group] = np.log(y) - np.log(x)\n", + " \n", + " value = c.in_outputs.spread[c.i, c.group]\n", + " pre_i = c.i - _window\n", + " if pre_i >= 0:\n", + " pre_window_value = c.in_outputs.spread[pre_i, c.group]\n", + " else:\n", + " pre_window_value = np.nan\n", + " zscore_in_state = vbt.enums.RollZScoreAIS(\n", + " i=c.i,\n", + " value=value,\n", + " pre_window_value=pre_window_value,\n", + " cumsum=zscore_state[\"cumsum\"][c.group],\n", + " cumsum_sq=zscore_state[\"cumsum_sq\"][c.group],\n", + " nancnt=zscore_state[\"nancnt\"][c.group],\n", + " window=_window,\n", + " minp=_window,\n", + " ddof=0\n", + " )\n", + " zscore_out_state = vbt.nb.rolling_zscore_acc_nb(zscore_in_state)\n", + " c.in_outputs.zscore[c.i, c.group] = zscore_out_state.value\n", + " zscore_state[\"cumsum\"][c.group] = zscore_out_state.cumsum\n", + " zscore_state[\"cumsum_sq\"][c.group] = zscore_out_state.cumsum_sq\n", + " zscore_state[\"nancnt\"][c.group] = zscore_out_state.nancnt\n", + " \n", + " return create_signals_nb(c, upper, lower, wait_days)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from functools import partial\n", + "\n", + "stream_pt_portfolio = partial(\n", + " iter_pt_portfolio,\n", + " signal_func_nb=stream_signal_func_nb,\n", + " more_signal_args=(\n", + " vbt.RepEval(\n", + " \"\"\"\n", + " zscore_state = np.empty(target_shape[1] // 2, dtype=zscore_state_dt)\n", + " zscore_state[\"cumsum\"] = 0.0\n", + " zscore_state[\"cumsum_sq\"] = 0.0\n", + " zscore_state[\"nancnt\"] = 0\n", + " zscore_state\n", + " \"\"\", \n", + " context=dict(zscore_state_dt=zscore_state_dt)\n", + " ),\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "stream_pf = stream_pt_portfolio()\n", + "print(stream_pf.total_return)\n", + "\n", + "pf = iter_pt_portfolio()\n", + "print(pf.total_return)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "stream_pt_portfolio(wait_days=vbt.Param(WAIT_SPACE)) # compilation\n", + "with (vbt.Timer() as timer, vbt.MemTracer() as tracer):\n", + " stream_pt_portfolio(wait_days=vbt.Param(WAIT_SPACE))\n", + "print(timer.elapsed())\n", + "print(tracer.peak_usage())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chunked_stream_pt_portfolio = partial(\n", + " stream_pt_portfolio,\n", + " chunked=dict(\n", + " engine=\"threadpool\",\n", + " arg_take_spec=dict(\n", + " signal_args=vbt.ArgsTaker(\n", + " vbt.flex_array_gl_slicer,\n", + " vbt.flex_array_gl_slicer,\n", + " vbt.flex_array_gl_slicer,\n", + " vbt.flex_array_gl_slicer,\n", + " vbt.ArraySlicer(axis=0)\n", + " ),\n", + " in_outputs=vbt.SequenceTaker([\n", + " vbt.ArraySlicer(axis=1),\n", + " vbt.ArraySlicer(axis=1)\n", + " ])\n", + " )\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chunked_stream_pt_portfolio(wait_days=vbt.Param(WAIT_SPACE)) # compilation\n", + "with (vbt.Timer() as timer, vbt.MemTracer() as tracer):\n", + " chunked_stream_pt_portfolio(wait_days=vbt.Param(WAIT_SPACE))\n", + "\n", + "print(timer.elapsed())\n", + "print(tracer.peak_usage())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/to_explore/notebooks/PatternsProjections.ipynb b/to_explore/notebooks/PatternsProjections.ipynb new file mode 100644 index 0000000..a9c8a82 --- /dev/null +++ b/to_explore/notebooks/PatternsProjections.ipynb @@ -0,0 +1,2742 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "16f4ed9b-5a03-4225-b54c-9a8be3f32cfa", + "metadata": {}, + "source": [ + "# Patterns and projections" + ] + }, + { + "cell_type": "markdown", + "id": "09993824-ddc7-4e30-8bb6-a1e86f6bf78c", + "metadata": {}, + "source": [ + "## Patterns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f28adb62-732c-4fdb-90eb-553650314736", + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro import *\n", + "# whats_imported()\n", + "\n", + "vbt.settings.set_theme(\"dark\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2745757-3cce-42f9-ac4c-2efe41ec4d20", + "metadata": {}, + "outputs": [], + "source": [ + "data = vbt.BinanceData.pull(\n", + " \"BTCUSDT\", \n", + " start=\"2020-06-01 UTC\", \n", + " end=\"2022-06-01 UTC\"\n", + ")\n", + "data.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d28352b-b24d-4d9e-b2ff-8b893dc09da7", + "metadata": {}, + "outputs": [], + "source": [ + "data_window = data.loc[\"2021-09-25\":\"2021-11-25\"]\n", + "data_window.plot(plot_volume=False).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7dfcdf27-18b3-4fe1-bcc1-8ebf681827e7", + "metadata": {}, + "outputs": [], + "source": [ + "price_window = data_window.hlc3\n", + "price_window.vbt.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a65950da-0df5-4371-8b50-54ded8fbfa7f", + "metadata": {}, + "outputs": [], + "source": [ + "pattern = np.array([1, 2, 3, 2, 3, 2])\n", + "pd.Series(pattern).vbt.plot().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "ec71f516-e9ca-4238-befd-c2a30b9018ce", + "metadata": {}, + "source": [ + "### Interpolation" + ] + }, + { + "cell_type": "markdown", + "id": "9b36945d-dc5b-4cc1-a53b-683ac294a657", + "metadata": {}, + "source": [ + "#### Linear" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b2eceec-da77-4cf5-bb5b-2355fdff5db2", + "metadata": {}, + "outputs": [], + "source": [ + "resized_pattern = vbt.nb.interp_resize_1d_nb(\n", + " pattern, 10, vbt.enums.InterpMode.Linear\n", + ")\n", + "resized_pattern" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "56ed8dd0-0192-4242-9a86-906c1cdf9982", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_linear(n):\n", + " resized_pattern = vbt.nb.interp_resize_1d_nb(\n", + " pattern, n, vbt.enums.InterpMode.Linear\n", + " )\n", + " return pd.Series(resized_pattern).vbt.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e4d8724-6fc6-436e-b9aa-abdf9d97b12b", + "metadata": {}, + "outputs": [], + "source": [ + "plot_linear(7).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f377c765-e433-4059-a757-50ba4da8a9c0", + "metadata": {}, + "outputs": [], + "source": [ + "resized_pattern = vbt.nb.interp_resize_1d_nb(\n", + " pattern, 7, vbt.enums.InterpMode.Linear\n", + ")\n", + "ratio = (len(pattern) - 1) / (len(resized_pattern) - 1)\n", + "new_points = np.arange(len(resized_pattern)) * ratio\n", + "fig = pd.Series(pattern).vbt.plot()\n", + "pd.Series(resized_pattern, index=new_points).vbt.scatterplot(fig=fig).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "f0090d25-94e2-4be3-a4ae-e86f6f766228", + "metadata": {}, + "source": [ + "#### Nearest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7727b0c3-2d09-43c1-be29-f4c0b40a6e36", + "metadata": {}, + "outputs": [], + "source": [ + "resized_pattern = vbt.nb.interp_resize_1d_nb(\n", + " pattern, 10, vbt.enums.InterpMode.Nearest\n", + ")\n", + "resized_pattern" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13433f4a-b921-4be1-9f5f-8d3cc8b07eb4", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_nearest(n):\n", + " resized_pattern = vbt.nb.interp_resize_1d_nb(\n", + " pattern, n, vbt.enums.InterpMode.Nearest\n", + " )\n", + " return pd.Series(resized_pattern).vbt.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6a581f9-446f-4424-b6a1-535aa9bd6248", + "metadata": {}, + "outputs": [], + "source": [ + "plot_nearest(7).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "07bcf923-36ac-41c3-b660-a70d78917584", + "metadata": {}, + "source": [ + "#### Discrete" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40e606f5-9bf1-41ef-acf2-86e595e73407", + "metadata": {}, + "outputs": [], + "source": [ + "resized_pattern = vbt.nb.interp_resize_1d_nb(\n", + " pattern, 10, vbt.enums.InterpMode.Discrete\n", + ")\n", + "resized_pattern" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be23bbd3-9ab7-458b-bfcd-a8b0d8625420", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_discrete(n):\n", + " resized_pattern = vbt.nb.interp_resize_1d_nb(\n", + " pattern, n, vbt.enums.InterpMode.Discrete\n", + " )\n", + " return pd.Series(resized_pattern).vbt.plot(\n", + " trace_kwargs=dict(\n", + " line=dict(dash=\"dot\"), \n", + " connectgaps=True\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1091a1ec-3be9-49a3-a600-d47a45536131", + "metadata": {}, + "outputs": [], + "source": [ + "plot_discrete(7).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "8d730164-6099-42cf-8bd0-f4b8b531a221", + "metadata": {}, + "source": [ + "#### Mixed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9f7a8c5-0361-43b3-bcdd-d68173155404", + "metadata": {}, + "outputs": [], + "source": [ + "resized_pattern = vbt.nb.interp_resize_1d_nb(\n", + " pattern, 10, vbt.enums.InterpMode.Mixed\n", + ")\n", + "resized_pattern" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c7bc0c0-8f09-4191-a4e4-644fd067b8e1", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_mixed(n):\n", + " lin_resized_pattern = vbt.nb.interp_resize_1d_nb(\n", + " pattern, n, vbt.enums.InterpMode.Linear\n", + " )\n", + " mix_resized_pattern = vbt.nb.interp_resize_1d_nb(\n", + " pattern, n, vbt.enums.InterpMode.Mixed\n", + " )\n", + " fig = pd.Series(lin_resized_pattern, name=\"Linear\").vbt.plot()\n", + " return pd.Series(mix_resized_pattern, name=\"Mixed\").vbt.plot(fig=fig)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f44cc40-77ee-4e1d-9c4c-acb926793cf0", + "metadata": {}, + "outputs": [], + "source": [ + "plot_mixed(7).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a3ce6d1-8a17-4320-9421-91b6baf63672", + "metadata": {}, + "outputs": [], + "source": [ + "resized_pattern = vbt.nb.interp_resize_1d_nb(\n", + " pattern, len(price_window), vbt.enums.InterpMode.Mixed\n", + ")\n", + "resized_pattern.shape" + ] + }, + { + "cell_type": "markdown", + "id": "dd584ad8-1021-440f-8560-05e5bb00773f", + "metadata": {}, + "source": [ + "### Rescaling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c86abd-dfa5-474c-bad9-d8027dc6f7f1", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_scale = (resized_pattern.min(), resized_pattern.max())\n", + "price_window_scale = (price_window.min(), price_window.max())\n", + "rescaled_pattern = vbt.utils.array_.rescale_nb(\n", + " resized_pattern, pattern_scale, price_window_scale\n", + ")\n", + "rescaled_pattern = pd.Series(rescaled_pattern, index=price_window.index)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "118dc7e7-a701-4ca9-986a-f3d0ba840a0d", + "metadata": {}, + "outputs": [], + "source": [ + "fig = price_window.vbt.plot()\n", + "rescaled_pattern.vbt.plot(\n", + " trace_kwargs=dict(\n", + " fill=\"tonexty\", \n", + " fillcolor=\"rgba(255, 100, 0, 0.25)\"\n", + " ), \n", + " fig=fig\n", + ").show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "3f2b7762-2194-48c3-9c01-6b545edfa00f", + "metadata": {}, + "source": [ + "#### Rebasing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3cd899eb-3a74-4a7d-ac80-f84f70d2c582", + "metadata": {}, + "outputs": [], + "source": [ + "pct_pattern = np.array([1, 1.3, 1.6, 1.3, 1.6, 1.3])\n", + "resized_pct_pattern = vbt.nb.interp_resize_1d_nb(\n", + " pct_pattern, len(price_window), vbt.enums.InterpMode.Mixed\n", + ")\n", + "rebased_pattern = resized_pct_pattern / resized_pct_pattern[0]\n", + "rebased_pattern *= price_window.values[0]\n", + "rebased_pattern = pd.Series(rebased_pattern, index=price_window.index)\n", + "fig = price_window.vbt.plot()\n", + "rebased_pattern.vbt.plot(\n", + " trace_kwargs=dict(\n", + " fill=\"tonexty\", \n", + " fillcolor=\"rgba(255, 100, 0, 0.25)\"\n", + " ), \n", + " fig=fig\n", + ").show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "04a1cec7-cbd2-4653-8908-04e8439f3079", + "metadata": {}, + "source": [ + "### Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a91fb91-2c80-49ec-a3ba-fd548952b2b2", + "metadata": {}, + "outputs": [], + "source": [ + "abs_distances = np.abs(rescaled_pattern - price_window.values)\n", + "mae = abs_distances.sum()\n", + "max_abs_distances = np.column_stack((\n", + " (price_window.max() - rescaled_pattern), \n", + " (rescaled_pattern - price_window.min())\n", + ")).max(axis=1)\n", + "max_mae = max_abs_distances.sum()\n", + "similarity = 1 - mae / max_mae\n", + "similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c95949-c983-46b5-9393-e8d842d9652b", + "metadata": {}, + "outputs": [], + "source": [ + "quad_distances = (rescaled_pattern - price_window.values) ** 2\n", + "rmse = np.sqrt(quad_distances.sum())\n", + "max_quad_distances = np.column_stack((\n", + " (price_window.max() - rescaled_pattern), \n", + " (rescaled_pattern - price_window.min())\n", + ")).max(axis=1) ** 2\n", + "max_rmse = np.sqrt(max_quad_distances.sum())\n", + "similarity = 1 - rmse / max_rmse\n", + "similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a6017cf-5a86-4b3d-9aa9-18d5729817cf", + "metadata": {}, + "outputs": [], + "source": [ + "quad_distances = (rescaled_pattern - price_window.values) ** 2\n", + "mse = quad_distances.sum()\n", + "max_quad_distances = np.column_stack((\n", + " (price_window.max() - rescaled_pattern), \n", + " (rescaled_pattern - price_window.min())\n", + ")).max(axis=1) ** 2\n", + "max_mse = max_quad_distances.sum()\n", + "similarity = 1 - mse / max_mse\n", + "similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8eb54f6-9562-4a95-b793-c9f661835d01", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(price_window.values, pattern)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ff3ef28-c13d-4eb2-804f-bb4a008c60a0", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(\n", + " price_window.values, \n", + " pct_pattern, \n", + " rescale_mode=vbt.enums.RescaleMode.Rebase\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbba899c-eb0a-4c10-ba0a-da3d92db43ac", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(\n", + " price_window.values, \n", + " pct_pattern, \n", + " interp_mode=vbt.enums.InterpMode.Nearest,\n", + " rescale_mode=vbt.enums.RescaleMode.Rebase,\n", + " distance_measure=vbt.enums.DistanceMeasure.RMSE\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58aca67c-c274-4d11-a6b8-c8d68d45dc44", + "metadata": {}, + "outputs": [], + "source": [ + "price_window.vbt.plot_pattern(\n", + " pct_pattern, \n", + " interp_mode=\"nearest\",\n", + " rescale_mode=\"rebase\",\n", + " fill_distance=True\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dc007d9-1cdf-4783-9217-4f032fee6284", + "metadata": {}, + "outputs": [], + "source": [ + "adj_pct_pattern = np.array([1, 1.3, 1.6, 1.45, 1.6, 1.3])\n", + "vbt.nb.pattern_similarity_nb(\n", + " price_window.values, \n", + " adj_pct_pattern, \n", + " interp_mode=vbt.enums.InterpMode.Nearest,\n", + " rescale_mode=vbt.enums.RescaleMode.Rebase,\n", + " distance_measure=vbt.enums.DistanceMeasure.RMSE\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21f3b1f9-d97e-4c34-8f36-a316a8a26221", + "metadata": {}, + "outputs": [], + "source": [ + "price_window.vbt.plot_pattern(\n", + " adj_pct_pattern, \n", + " interp_mode=\"discrete\",\n", + " rescale_mode=\"rebase\",\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "deb354ad-8971-4d44-bbc5-fabccb5afc2e", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(\n", + " price_window.values, \n", + " adj_pct_pattern, \n", + " interp_mode=vbt.enums.InterpMode.Discrete,\n", + " rescale_mode=vbt.enums.RescaleMode.Rebase,\n", + " distance_measure=vbt.enums.DistanceMeasure.RMSE\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "71532171-5624-4c6b-9dd0-1853590b31f2", + "metadata": {}, + "source": [ + "#### Relative" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f72cd3c8-a561-485e-9ab6-30edefc53143", + "metadata": {}, + "outputs": [], + "source": [ + "abs_pct_distances = abs_distances / rescaled_pattern\n", + "pct_mae = abs_pct_distances.sum()\n", + "max_abs_pct_distances = max_abs_distances / rescaled_pattern\n", + "max_pct_mae = max_abs_pct_distances.sum()\n", + "similarity = 1 - pct_mae / max_pct_mae\n", + "similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4b8a531-000f-4837-a069-388897e709e1", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(\n", + " price_window.values, \n", + " pct_pattern, \n", + " error_type=vbt.enums.ErrorType.Relative\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7784e836-786a-4e92-a605-431beb635e12", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(\n", + " np.array([10, 30, 100]),\n", + " np.array([1, 2, 3]),\n", + " error_type=vbt.enums.ErrorType.Absolute\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bfcec566-fb2e-42b6-b8f1-03dc347f2986", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(\n", + " np.array([10, 30, 100]),\n", + " np.array([1, 2, 3]),\n", + " error_type=vbt.enums.ErrorType.Relative\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e2a99c01-cafb-443e-9851-a54cd04114c7", + "metadata": {}, + "source": [ + "#### Inverse" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc894117-9148-4e6f-a812-c01f15aa8d66", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(price_window.values, pattern, invert=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f932cc4d-885f-46b0-b306-e05eb3329d6b", + "metadata": {}, + "outputs": [], + "source": [ + "price_window.vbt.plot_pattern(pattern, invert=True).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5faa78d-c7c7-47f3-8847-bf19c4b4ee04", + "metadata": {}, + "outputs": [], + "source": [ + "pattern.max() + pattern.min() - pattern" + ] + }, + { + "cell_type": "markdown", + "id": "5e8bd3c8-4112-4c57-9b37-05b79a0f483c", + "metadata": {}, + "source": [ + "#### Max error" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f2e0182-3e5e-4809-bde4-ff4aeaf33ce2", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(\n", + " price_window.values, \n", + " pattern,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cbd19ab7-16d6-4c94-9ca1-bbba7ea8f88d", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(\n", + " price_window.values, \n", + " pattern, \n", + " max_error=np.array([0.5, 0.5, 0.5, 0.5, 0.5, 0.5]),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf4b4ec8-9665-4400-9fa5-bebf204eeb4a", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(\n", + " price_window.values, \n", + " pattern, \n", + " max_error=np.array([0.5]),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e562481d-6e22-4d6f-99af-4a6e763fdbcd", + "metadata": {}, + "outputs": [], + "source": [ + "price_window.vbt.plot_pattern(\n", + " pattern, \n", + " max_error=0.5\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b697b1e-6806-4813-84c0-59fc58b7c1ab", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(\n", + " price_window.values, \n", + " pattern, \n", + " max_error=np.array([0.5]),\n", + " max_error_strict=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a20ef9a-11fe-418b-a6c5-1334826176bd", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(\n", + " price_window.values, \n", + " adj_pct_pattern, \n", + " rescale_mode=vbt.enums.RescaleMode.Rebase,\n", + " max_error=np.array([0.2, 0.1, 0.05, 0.1, 0.05, 0.1]),\n", + " max_error_strict=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab8bbd05-b4f0-421f-8180-05ad7c84087e", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(\n", + " price_window.values, \n", + " pattern, \n", + " max_error=np.array([0.1]),\n", + " error_type=vbt.enums.ErrorType.Relative\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6ff62e3-ccee-40bb-9e03-ab1e78f12547", + "metadata": {}, + "outputs": [], + "source": [ + "price_window.vbt.plot_pattern(\n", + " pattern, \n", + " max_error=0.1,\n", + " error_type=\"relative\"\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a741780d-857d-4111-9731-00ac63e8bdcb", + "metadata": {}, + "outputs": [], + "source": [ + "price_window.vbt.plot_pattern(\n", + " adj_pct_pattern, \n", + " rescale_mode=\"rebase\",\n", + " max_error=np.array([0.2, 0.1, 0.05, 0.1, 0.05, 0.1])\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71b9ca1e-8074-45f2-b62a-9e8fddb560a5", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(\n", + " price_window.values, \n", + " adj_pct_pattern, \n", + " rescale_mode=vbt.enums.RescaleMode.Rebase,\n", + " max_error=np.array([0.2, 0.1, 0.05, 0.1, 0.05, 0.1]) + 0.05,\n", + " max_error_strict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "bacece86-cc55-4499-959b-49eb7c99a8c8", + "metadata": {}, + "source": [ + "##### Interpolation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f92c333c-fe6e-41bc-9f17-fcd9d8846b24", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(\n", + " price_window.values, \n", + " adj_pct_pattern, \n", + " rescale_mode=vbt.enums.RescaleMode.Rebase,\n", + " max_error=np.array([np.nan, np.nan, 0.1, np.nan, 0.1, np.nan]),\n", + " max_error_interp_mode=vbt.enums.InterpMode.Discrete,\n", + " max_error_strict=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b4ea56d-a9fd-4b5e-b8be-4212ae0a2815", + "metadata": {}, + "outputs": [], + "source": [ + "price_window.vbt.plot_pattern(\n", + " adj_pct_pattern, \n", + " rescale_mode=\"rebase\",\n", + " max_error=np.array([np.nan, np.nan, 0.1, np.nan, 0.1, np.nan]),\n", + " max_error_interp_mode=\"discrete\"\n", + ").show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "f42d2d89-09a2-469c-9490-84267a973202", + "metadata": {}, + "source": [ + "##### Max distance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc55ea4c-35b3-4fef-952a-ece601444a72", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(price_window.values, pattern)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29587bc3-998e-4096-be6b-e4e91902e4ae", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(\n", + " price_window.values, \n", + " pattern, \n", + " max_error=np.array([0.5]),\n", + " max_error_as_maxdist=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "481524c4-d6c6-4f2e-a862-f60f65149a84", + "metadata": {}, + "source": [ + "#### Further filters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "112660d1-e7be-4f51-9db4-93b79a22fca9", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(price_window.values, pattern, max_pct_change=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3fe67b2-0532-45d3-a757-e03a8f05f7f5", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.nb.pattern_similarity_nb(price_window.values, pattern, min_similarity=0.9)" + ] + }, + { + "cell_type": "markdown", + "id": "df50b2a9-7bf7-47c8-8292-91043252f9cc", + "metadata": {}, + "source": [ + "### Rolling similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97dc4e9d-9285-4909-9499-6792d44379d9", + "metadata": {}, + "outputs": [], + "source": [ + "price = data.hlc3\n", + "\n", + "similarity = price.vbt.rolling_pattern_similarity(\n", + " pattern, \n", + " window=30,\n", + " error_type=\"relative\",\n", + " max_error=0.05,\n", + " max_error_interp_mode=\"discrete\"\n", + ")\n", + "similarity.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27d7ae88-d60d-4be7-be44-d148d3dcef1b", + "metadata": {}, + "outputs": [], + "source": [ + "end_row = similarity.argmax() + 1\n", + "start_row = end_row - 30\n", + "fig = data.iloc[start_row:end_row].plot(plot_volume=False)\n", + "price.iloc[start_row:end_row].vbt.plot_pattern(\n", + " pattern, \n", + " error_type=\"relative\",\n", + " max_error=0.05,\n", + " max_error_interp_mode=\"discrete\",\n", + " plot_obj=False, \n", + " fig=fig\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a018943-adff-4249-a2b4-d6a7cb34e62f", + "metadata": {}, + "outputs": [], + "source": [ + "end_row = similarity.argmin() + 1\n", + "start_row = end_row - 30\n", + "fig = data.iloc[start_row:end_row].plot(plot_volume=False)\n", + "price.iloc[start_row:end_row].vbt.plot_pattern(\n", + " pattern, \n", + " invert=True,\n", + " error_type=\"relative\",\n", + " max_error=0.05,\n", + " max_error_interp_mode=\"discrete\",\n", + " plot_obj=False, \n", + " fig=fig\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c98cfea9-5b10-4c9a-a0de-c526ba4cb7ab", + "metadata": {}, + "outputs": [], + "source": [ + "inv_similarity = price.vbt.rolling_pattern_similarity(\n", + " pattern, \n", + " window=30,\n", + " invert=True,\n", + " error_type=\"relative\",\n", + " max_error=0.05,\n", + " max_error_interp_mode=\"discrete\"\n", + ")\n", + "inv_similarity.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c76c75f-1eb8-44dd-a633-ec4574c02ce7", + "metadata": {}, + "outputs": [], + "source": [ + "end_row = inv_similarity.argmax() + 1\n", + "start_row = end_row - 30\n", + "fig = data.iloc[start_row:end_row].plot(plot_volume=False)\n", + "price.iloc[start_row:end_row].vbt.plot_pattern(\n", + " pattern, \n", + " invert=True,\n", + " error_type=\"relative\",\n", + " max_error=0.05,\n", + " max_error_interp_mode=\"discrete\",\n", + " plot_obj=False, \n", + " fig=fig\n", + ").show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "b0f936be-2cb1-483a-8388-36bcd2292c95", + "metadata": {}, + "source": [ + "#### Indicator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c66d09c-3d29-4656-bb90-6d3e60010e85", + "metadata": {}, + "outputs": [], + "source": [ + "patsim = vbt.PATSIM.run(\n", + " price, \n", + " vbt.Default(pattern),\n", + " error_type=vbt.Default(\"relative\"),\n", + " max_error=vbt.Default(0.05),\n", + " max_error_interp_mode=vbt.Default(\"discrete\"),\n", + " window=[30, 45, 60, 75, 90]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "789f1c55-f448-4d74-9619-2e45c5d25ae7", + "metadata": {}, + "outputs": [], + "source": [ + "patsim.wrapper.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4f9c2a7-f251-49ea-90e2-1b8d3be2fdc2", + "metadata": {}, + "outputs": [], + "source": [ + "patsim.plot(column=60).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2341e8a-539c-4426-ba18-abda070b0c0f", + "metadata": {}, + "outputs": [], + "source": [ + "patsim.overlay_with_heatmap(column=60).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18d865d5-3942-47fd-b01c-0837d2bad35e", + "metadata": {}, + "outputs": [], + "source": [ + "exits = patsim.similarity >= 0.8\n", + "exits.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08cc861a-5d5a-4b43-80ce-00b18e7fd3e3", + "metadata": {}, + "outputs": [], + "source": [ + "patsim = vbt.PATSIM.run(\n", + " price, \n", + " vbt.Default(pattern),\n", + " error_type=vbt.Default(\"relative\"),\n", + " max_error=vbt.Default(0.05),\n", + " max_error_interp_mode=vbt.Default(\"discrete\"),\n", + " window=[30, 45, 60, 75, 90],\n", + " invert=[False, True],\n", + " min_similarity=[0.7, 0.8],\n", + " param_product=True\n", + ")\n", + "exits = ~patsim.similarity.isnull()\n", + "exits.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5911f6c0-29dc-466e-be6d-367e69d4c507", + "metadata": {}, + "outputs": [], + "source": [ + "groupby = [\n", + " name for name in patsim.wrapper.columns.names \n", + " if name != \"patsim_window\"\n", + "]\n", + "max_sim = patsim.similarity.groupby(groupby, axis=1).max()\n", + "entries = ~max_sim.xs(True, level=\"patsim_invert\", axis=1).isnull()\n", + "exits = ~max_sim.xs(False, level=\"patsim_invert\", axis=1).isnull()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4155041-905e-42d1-87d4-bcc09e06e266", + "metadata": {}, + "outputs": [], + "source": [ + "fig = data.plot(ohlc_trace_kwargs=dict(opacity=0.5))\n", + "entries[0.8].vbt.signals.plot_as_entries(price, fig=fig)\n", + "exits[0.8].vbt.signals.plot_as_exits(price, fig=fig).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "f4cab670-88b0-4c00-9170-ca310b7624ab", + "metadata": {}, + "source": [ + "### Search" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c039275-8a26-435f-ac58-ba3f418931cd", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_range_records = vbt.nb.find_pattern_1d_nb(\n", + " price.values,\n", + " pattern,\n", + " window=30,\n", + " max_window=90,\n", + " error_type=vbt.enums.ErrorType.Relative,\n", + " max_error=np.array([0.05]),\n", + " max_error_interp_mode=vbt.enums.InterpMode.Discrete,\n", + " min_similarity=0.85\n", + ")\n", + "pattern_range_records" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dbed8af-829e-4ee3-ab21-416ce30b5356", + "metadata": {}, + "outputs": [], + "source": [ + "start_row = pattern_range_records[1][\"start_idx\"]\n", + "end_row = pattern_range_records[1][\"end_idx\"]\n", + "fig = data.iloc[start_row:end_row + 30].plot(plot_volume=False)\n", + "price.iloc[start_row:end_row].vbt.plot_pattern(\n", + " pattern, \n", + " error_type=\"relative\",\n", + " max_error=0.05,\n", + " max_error_interp_mode=\"discrete\",\n", + " plot_obj=False, \n", + " fig=fig\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9b75568-2605-40cf-a303-203d0d4a99d0", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges = vbt.PatternRanges.from_pattern_search(\n", + " price,\n", + " pattern,\n", + " window=30,\n", + " max_window=120,\n", + " error_type=\"relative\",\n", + " max_error=0.05,\n", + " max_error_interp_mode=\"discrete\",\n", + " min_similarity=0.85\n", + ")\n", + "pattern_ranges" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4173fd82-22f4-4f7f-afac-64d17836a608", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges = price.vbt.find_pattern(\n", + " pattern,\n", + " window=30,\n", + " max_window=90,\n", + " error_type=\"relative\",\n", + " max_error=0.05,\n", + " max_error_interp_mode=\"discrete\",\n", + " min_similarity=0.85\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f58a06d1-9df6-4e26-9541-fa5c45e52368", + "metadata": {}, + "outputs": [], + "source": [ + "print(pattern_ranges.records_readable)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d83797be-ea9a-4f66-b27b-13a70e121735", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges.wrapper.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2d620cb-8deb-4fda-a62c-ccf8e0b126d1", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges.search_configs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db1df1d9-69ae-4a84-8a45-57df9f16d9bb", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e006d13-e92d-4e72-999d-442ec8cd867a", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges.loc[\"2021-09-01\":\"2022-01-01\"].plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31895f9e-9dab-44b4-b810-02549c01a3b6", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges.stats()" + ] + }, + { + "cell_type": "markdown", + "id": "099ecd8c-d9dd-40fc-b766-53c619240991", + "metadata": {}, + "source": [ + "#### Overlapping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b33eb7c-2e4c-4a66-8ebf-6ff6fe82639c", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges = price.vbt.find_pattern(\n", + " pattern,\n", + " window=30,\n", + " max_window=120,\n", + " error_type=\"relative\",\n", + " max_error=0.05,\n", + " max_error_interp_mode=\"discrete\",\n", + " min_similarity=0.85,\n", + " overlap_mode=\"allow\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c924f37-e0c3-4588-8515-3684ef4eb46c", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37de96f6-27f2-4538-88a5-2758c5442ea4", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges.overlap_coverage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9a5bc35-4f8d-4add-af11-c2ac583fe70a", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges.plot(plot_zones=False, plot_patterns=False).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "532a3db0-a81e-46f5-945c-37a0774a17a7", + "metadata": {}, + "source": [ + "#### Random selection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c163259-3d64-4d06-a824-834d21043434", + "metadata": {}, + "outputs": [], + "source": [ + "def run_prob_search(row_select_prob, window_select_prob):\n", + " return price.vbt.find_pattern(\n", + " pattern,\n", + " window=30,\n", + " max_window=120,\n", + " row_select_prob=row_select_prob,\n", + " window_select_prob=window_select_prob,\n", + " error_type=\"relative\",\n", + " max_error=0.05,\n", + " max_error_interp_mode=\"discrete\",\n", + " min_similarity=0.8,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f08fb98-3b7c-41b2-a8b9-ba9ba7f4d475", + "metadata": {}, + "outputs": [], + "source": [ + "%timeit run_prob_search(1.0, 1.0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3cefffe3-5d70-4a3d-822a-d39ea0f3b30a", + "metadata": {}, + "outputs": [], + "source": [ + "%timeit run_prob_search(0.5, 0.25)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3a04c29-284e-4840-a77d-f23638962cd9", + "metadata": {}, + "outputs": [], + "source": [ + "run_prob_search(1.0, 1.0).count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1ff96a9-7c06-4f27-9e88-90e8902aae8a", + "metadata": {}, + "outputs": [], + "source": [ + "pd.Series([\n", + " run_prob_search(0.5, 0.25).count() \n", + " for i in range(100)\n", + "]).vbt.plot().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "292d8003-d997-4826-b2cf-827786c66d55", + "metadata": {}, + "source": [ + "#### Params" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f47a2b9-3d72-4b4a-a544-17bb08600836", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges = price.vbt.find_pattern(\n", + " vbt.Param([\n", + " [1, 2, 1],\n", + " [2, 1, 2],\n", + " [1, 2, 3],\n", + " [3, 2, 1]\n", + " ]),\n", + " window=30,\n", + " max_window=120,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2dffbc9-e64b-4629-83ce-af6bc96c701b", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "294ee0c1-74f1-4d08-9627-9d7f78bb7c1d", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges = price.vbt.find_pattern(\n", + " vbt.Param([\n", + " [1, 2, 1],\n", + " [2, 1, 2],\n", + " [1, 2, 3],\n", + " [3, 2, 1]\n", + " ], keys=[\"v-top\", \"v-bottom\", \"rising\", \"falling\"]),\n", + " window=30,\n", + " max_window=120,\n", + ")\n", + "pattern_ranges.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "596fe13e-d557-4115-871f-393d993e1922", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges.plot(column=\"falling\").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e484ac50-c92c-4287-bd7a-f7ecdd3ca523", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges = price.vbt.find_pattern(\n", + " vbt.Param([\n", + " [1, 2, 1],\n", + " [2, 1, 2],\n", + " [1, 2, 3],\n", + " [3, 2, 1]\n", + " ], keys=[\"v-top\", \"v-bottom\", \"rising\", \"falling\"]),\n", + " window=30,\n", + " max_window=120,\n", + " min_similarity=vbt.Param([0.8, 0.85])\n", + ")\n", + "pattern_ranges.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bcf16c45-3021-49d6-9033-82ddd96a0f30", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges.plot(column=(\"v-bottom\", 0.8)).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7da6b0a3-899b-41a7-a514-68ccedea422f", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges = price.vbt.find_pattern(\n", + " vbt.Param([\n", + " [1, 2, 1],\n", + " [2, 1, 2],\n", + " [1, 2, 3],\n", + " [3, 2, 1]\n", + " ], keys=[\"v-top\", \"v-bottom\", \"rising\", \"falling\"], level=0),\n", + " window=vbt.Param([30, 30, 7, 7], level=0),\n", + " max_window=vbt.Param([120, 120, 30, 30], level=0),\n", + " min_similarity=vbt.Param([0.8, 0.85], level=1)\n", + ")\n", + "pattern_ranges.count()" + ] + }, + { + "cell_type": "markdown", + "id": "c289b606-0c93-48d2-9eb4-4ee571bc4636", + "metadata": {}, + "source": [ + "#### Configs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c3bbcfc-6336-4241-a259-4cbf45a37d72", + "metadata": {}, + "outputs": [], + "source": [ + "mult_data = vbt.BinanceData.pull(\n", + " [\"BTCUSDT\", \"ETHUSDT\"], \n", + " start=\"2020-06-01 UTC\", \n", + " end=\"2022-06-01 UTC\"\n", + ")\n", + "mult_price = mult_data.hlc3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c15e655-b3be-4223-992d-a4b27d82a5eb", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges = mult_price.vbt.find_pattern(\n", + " search_configs=[\n", + " vbt.PSC(pattern=[1, 2, 3, 2, 3, 2], window=30),\n", + " [\n", + " vbt.PSC(pattern=mult_price.iloc[-30:, 0]),\n", + " vbt.PSC(pattern=mult_price.iloc[-30:, 1]),\n", + " ]\n", + " ],\n", + " min_similarity=0.8\n", + ")\n", + "pattern_ranges.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c19e67e-70ab-4a0e-ad63-46e24b9121ed", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges = mult_price.vbt.find_pattern(\n", + " search_configs=[\n", + " vbt.PSC(pattern=[1, 2, 3, 2, 3, 2], window=30, name=\"double_top\"),\n", + " [\n", + " vbt.PSC(pattern=mult_price.iloc[-30:, 0], name=\"last\"),\n", + " vbt.PSC(pattern=mult_price.iloc[-30:, 1], name=\"last\"),\n", + " ]\n", + " ],\n", + " min_similarity=0.8\n", + ")\n", + "pattern_ranges.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97bdb94f-ea9b-4321-b858-3b49e29dd04b", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges = mult_price.vbt.find_pattern(\n", + " search_configs=[\n", + " vbt.PSC(pattern=[1, 2, 3, 2, 3, 2], window=30, name=\"double_top\"),\n", + " [\n", + " vbt.PSC(pattern=mult_price.iloc[-30:, 0], name=\"last\"),\n", + " vbt.PSC(pattern=mult_price.iloc[-30:, 1], name=\"last\"),\n", + " ]\n", + " ],\n", + " rescale_mode=vbt.Param([\"minmax\", \"rebase\"]),\n", + " min_similarity=0.8,\n", + " open=mult_data.open,\n", + " high=mult_data.high,\n", + " low=mult_data.low,\n", + " close=mult_data.close,\n", + ")\n", + "pattern_ranges.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5f0ee35-b0be-48f8-9087-1b81cc3867ce", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges.plot(column=(\"rebase\", \"last\", \"ETHUSDT\")).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "c75c253d-e60f-4cd5-8cab-6a610f82bc9c", + "metadata": {}, + "source": [ + "#### Mask" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fbd7240-7dee-4ee4-b371-445abe564fd8", + "metadata": {}, + "outputs": [], + "source": [ + "mask = pattern_ranges.last_pd_mask\n", + "mask.sum()" + ] + }, + { + "cell_type": "markdown", + "id": "9451fb73-03e4-4287-acce-df905eec8fa4", + "metadata": {}, + "source": [ + "#### Indicator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "add9ccd5-966a-4262-bb18-ace8abbf9cba", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges = price.vbt.find_pattern(\n", + " pattern,\n", + " window=30,\n", + " max_window=120,\n", + " row_select_prob=0.5,\n", + " window_select_prob=0.5,\n", + " overlap_mode=\"allow\",\n", + " seed=42\n", + ")\n", + "pr_mask = pattern_ranges.map_field(\n", + " \"similarity\", \n", + " idx_arr=pattern_ranges.last_idx.values\n", + ").to_pd()\n", + "pr_mask[~pr_mask.isnull()]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86147e26-3814-4f0c-915f-cff77c5d4c28", + "metadata": {}, + "outputs": [], + "source": [ + "patsim = vbt.PATSIM.run(\n", + " price,\n", + " vbt.Default(pattern),\n", + " window=vbt.Default(30),\n", + " max_window=vbt.Default(120),\n", + " row_select_prob=vbt.Default(0.5),\n", + " window_select_prob=vbt.Default(0.5),\n", + " min_similarity=vbt.Default(0.85),\n", + " seed=42\n", + ")\n", + "ind_mask = patsim.similarity\n", + "ind_mask[~ind_mask.isnull()]" + ] + }, + { + "cell_type": "markdown", + "id": "f89ddd9d-d8a4-4bd8-800e-3dd2e962c229", + "metadata": {}, + "source": [ + "### Combination" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27387a58-44cc-4885-abfc-bb93e40931e0", + "metadata": {}, + "outputs": [], + "source": [ + "price_highs = vbt.PATSIM.run(\n", + " data.high, \n", + " pattern=np.array([1, 3, 2, 4]), \n", + " window=40,\n", + " max_window=50\n", + ")\n", + "macd = data.run(\"talib_macd\").macd\n", + "macd_lows = vbt.PATSIM.run(\n", + " macd, \n", + " pattern=np.array([4, 2, 3, 1]), \n", + " window=40,\n", + " max_window=50\n", + ")\n", + "\n", + "fig = vbt.make_subplots(\n", + " rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.02\n", + ")\n", + "fig.update_layout(height=500)\n", + "data.high.rename(\"Price\").vbt.plot(\n", + " add_trace_kwargs=dict(row=1, col=1), fig=fig\n", + ")\n", + "macd.rename(\"MACD\").vbt.plot(\n", + " add_trace_kwargs=dict(row=2, col=1), fig=fig\n", + ")\n", + "price_highs.similarity.rename(\"Price Sim\").vbt.plot(\n", + " add_trace_kwargs=dict(row=3, col=1), fig=fig\n", + ")\n", + "macd_lows.similarity.rename(\"MACD Sim\").vbt.plot(\n", + " add_trace_kwargs=dict(row=3, col=1), fig=fig\n", + ")\n", + "fig.show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4aa7cb3d-4000-443c-830c-2f5034a46c2d", + "metadata": {}, + "outputs": [], + "source": [ + "cond1 = (price_highs.similarity >= 0.8).vbt.rolling_any(10)\n", + "cond2 = (macd_lows.similarity >= 0.8).vbt.rolling_any(10)\n", + "exits = cond1 & cond2\n", + "fig = data.plot(ohlc_trace_kwargs=dict(opacity=0.5))\n", + "exits.vbt.signals.plot_as_exits(data.close, fig=fig).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "ea6d38dd-f8a7-4cfd-a0cd-0372ba13781f", + "metadata": {}, + "source": [ + "## Projections" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2454a1d5-d966-4511-a0d6-942eb479414b", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges = price.vbt.find_pattern(\n", + " [1, 1.2], \n", + " window=7, \n", + " rescale_mode=\"rebase\", \n", + " max_error=0.01,\n", + " max_error_interp_mode=\"discrete\",\n", + " max_error_strict=True\n", + ")\n", + "pattern_ranges.count()" + ] + }, + { + "cell_type": "markdown", + "id": "5a5e28f0-13c5-429e-a2c8-a6d249b5fbf5", + "metadata": {}, + "source": [ + "### Pattern projections" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db8fb47e-92a1-4476-becb-6677d6bac612", + "metadata": {}, + "outputs": [], + "source": [ + "range_idxs, raw_projections = vbt.nb.map_ranges_to_projections_nb(\n", + " vbt.to_2d_array(price),\n", + " pattern_ranges.get_field_arr(\"col\"),\n", + " pattern_ranges.get_field_arr(\"start_idx\"),\n", + " pattern_ranges.get_field_arr(\"end_idx\"),\n", + " pattern_ranges.get_field_arr(\"status\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42bc6ef9-55b8-4331-9b4e-acd42d6fa28d", + "metadata": {}, + "outputs": [], + "source": [ + "range_idxs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ef238f3-59b3-44c9-9164-8770077516ff", + "metadata": {}, + "outputs": [], + "source": [ + "raw_projections" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf6ec440-517b-4ec3-b5f2-88637429f270", + "metadata": {}, + "outputs": [], + "source": [ + "projections = pattern_ranges.get_projections()\n", + "print(projections)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2ecac89-f4b1-4d11-af3e-cceebf01770c", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges.duration.values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "350fc784-81ac-4d40-9a8e-81eb4d7e3f4d", + "metadata": {}, + "outputs": [], + "source": [ + "projections = pattern_ranges.get_projections(incl_end_idx=False)\n", + "print(projections)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d68f485-f5db-4af2-be32-b547446aaade", + "metadata": {}, + "outputs": [], + "source": [ + "projections.iloc[-1] / projections.iloc[0] - 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37ff3101-9abb-44e9-81f8-7bba26fc7890", + "metadata": {}, + "outputs": [], + "source": [ + "projections.vbt.plot().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "8107b799-c0cc-403f-9036-3fb69ce9d107", + "metadata": {}, + "source": [ + "### Delta projections" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5049cc27-b914-4a1d-b302-0bb1fd69ed21", + "metadata": {}, + "outputs": [], + "source": [ + "delta_ranges = pattern_ranges.with_delta(4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be87a7da-4d2d-4522-bb00-68c9a5bd2d8b", + "metadata": {}, + "outputs": [], + "source": [ + "fig = pattern_ranges.loc[\"2021-01\":\"2021-03\"].plot()\n", + "delta_ranges.loc[\"2021-01\":\"2021-03\"].plot(\n", + " plot_ohlc=False,\n", + " plot_close=False,\n", + " plot_markers=False,\n", + " closed_shape_kwargs=dict(fillcolor=\"DeepSkyBlue\"),\n", + " fig=fig\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eafc7432-212f-4029-9863-ce77680876a4", + "metadata": {}, + "outputs": [], + "source": [ + "projections = delta_ranges.get_projections()\n", + "print(projections)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71564e43-47b6-41fa-a00b-d5dfbbc597c9", + "metadata": {}, + "outputs": [], + "source": [ + "np.mean(projections.iloc[-1] / projections.iloc[0] - 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "458e3eb1-d963-4b63-b143-8e83a311b7e8", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges = mult_price.vbt.find_pattern(\n", + " [1, 1.2], \n", + " window=7, \n", + " max_window=30,\n", + " rescale_mode=\"rebase\",\n", + " max_error=0.01,\n", + " max_error_interp_mode=\"discrete\",\n", + " max_error_strict=True,\n", + " overlap_mode=\"allow\"\n", + ")\n", + "pattern_ranges.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962f00c1-d8d2-4f36-b9d0-b3f22d0e9b85", + "metadata": {}, + "outputs": [], + "source": [ + "delta_ranges = pattern_ranges.with_delta(4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6342f033-5867-4c10-aa52-572cc6fa4b1c", + "metadata": {}, + "outputs": [], + "source": [ + "projections = delta_ranges.get_projections()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d852b78-9631-471a-936b-e4ecde58306d", + "metadata": {}, + "outputs": [], + "source": [ + "(projections.iloc[-1] / projections.iloc[0] - 1).describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "83317097-d330-42bc-9914-e1771223d87d", + "metadata": {}, + "outputs": [], + "source": [ + "projections = delta_ranges.get_projections(id_level=\"end_idx\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2826f67-9bca-4fb0-869e-a82cdfd2241d", + "metadata": {}, + "outputs": [], + "source": [ + "print(projections.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81af5933-1c93-4d78-9fc1-caf11fbcebfa", + "metadata": {}, + "outputs": [], + "source": [ + "btc_projections = projections.xs(\"BTCUSDT\", level=\"symbol\", axis=1)\n", + "total_proj_return = btc_projections.iloc[-1] / btc_projections.iloc[0] - 1\n", + "total_proj_return.vbt.scatterplot(\n", + " trace_kwargs=dict(\n", + " marker=dict(\n", + " color=total_proj_return.values,\n", + " colorscale=\"Temps_r\",\n", + " cmid=0\n", + " )\n", + " )\n", + ").show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "3052612d-739e-4850-98c0-c95339abab89", + "metadata": {}, + "source": [ + "### Plotting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80393e11-1f0b-4677-8449-704c608fc663", + "metadata": {}, + "outputs": [], + "source": [ + "btc_projections.vbt.plot_projections(plot_bands=False).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "36245487-378f-485c-a8b5-943882c67d4a", + "metadata": {}, + "source": [ + "#### Colorization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7899b684-bb9f-466b-ab32-c8a458dcaaf3", + "metadata": {}, + "outputs": [], + "source": [ + "btc_projections[\"2020-08-03\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbae99b6-bd79-4761-a841-2c4332b40e89", + "metadata": {}, + "outputs": [], + "source": [ + "btc_projections[\"2020-08-03\"].median()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3dc3ff43-6652-43e6-8846-4395f4961778", + "metadata": {}, + "outputs": [], + "source": [ + "btc_projections.vbt.plot_projections(\n", + " plot_bands=False, colorize=np.std\n", + ").show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "cd341c89-03e5-49c0-bf5f-54e400e1006e", + "metadata": {}, + "source": [ + "#### Bands" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8289d408-7647-42e3-b031-4fca40126501", + "metadata": {}, + "outputs": [], + "source": [ + "projections.xs(\"ETHUSDT\", level=\"symbol\", axis=1).median(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43533726-0d4a-4ff1-8b6d-dcfd1cc47e6b", + "metadata": {}, + "outputs": [], + "source": [ + "print(projections.groupby(\"symbol\", axis=1).median())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f4059411-7aec-4ca6-ba4e-16fec33303f3", + "metadata": {}, + "outputs": [], + "source": [ + "projections.median(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "57d904bb-d80a-48b9-a518-c1302b16ea51", + "metadata": {}, + "outputs": [], + "source": [ + "btc_projections.vbt.plot_projections().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ac38bd4-62a9-4f47-ab1b-9387b5aef98d", + "metadata": {}, + "outputs": [], + "source": [ + "btc_projections.iloc[-1].quantile(0.8)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e66e93d8-8ff7-4f34-ae0d-ccadd66cebcc", + "metadata": {}, + "outputs": [], + "source": [ + "btc_projections.vbt.plot_projections(\n", + " plot_lower=False,\n", + " plot_middle=\"30%\", \n", + " plot_upper=False, \n", + " plot_aux_middle=False, \n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f949a129-4ec8-4ea8-923f-34ab2c3a1927", + "metadata": {}, + "outputs": [], + "source": [ + "btc_projections.iloc[-1].vbt.qqplot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d0e1932-7d85-4b5f-8b07-a720b50033cb", + "metadata": {}, + "outputs": [], + "source": [ + "btc_projections.vbt.plot_projections(\n", + " plot_lower=\"P=20%\",\n", + " plot_middle=\"mean\", \n", + " plot_upper=\"P=80%\", \n", + " plot_aux_middle=False, \n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6759119-3477-4a13-9127-3fca78fbf827", + "metadata": {}, + "outputs": [], + "source": [ + "def finishes_at_quantile(df, q):\n", + " nth_element = int(np.ceil(q * (df.shape[1] - 1)))\n", + " nth_index = np.argsort(df.iloc[-1])[nth_element]\n", + " return df.iloc[:, nth_index]\n", + "\n", + "btc_projections.vbt.plot_projections(\n", + " plot_lower=partial(finishes_at_quantile, q=0.2),\n", + " plot_middle=False, \n", + " plot_upper=partial(finishes_at_quantile, q=0.8), \n", + ").show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "94e7b8cb-9143-4fce-bd98-5a7f4cf45a88", + "metadata": {}, + "source": [ + "### Filtering" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de11af10-ca2e-43e9-a4ac-5264b7ef0bb0", + "metadata": {}, + "outputs": [], + "source": [ + "crossed_mask = projections.expanding().max().iloc[1] >= 1.05\n", + "filt_projections = projections.loc[:, crossed_mask]\n", + "filt_projections.iloc[-1].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09a8a128-dbbc-441a-b0b9-4c38728e4b66", + "metadata": {}, + "outputs": [], + "source": [ + "filt_projections.vbt.plot_projections().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "2baaec17-f214-45e1-9056-24efc36e8eb2", + "metadata": {}, + "source": [ + "### Latest projections" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30883240-2b3f-40bb-bfd0-3ec6a76cf638", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges = price.vbt.find_pattern(\n", + " pattern=data.close.iloc[-7:],\n", + " rescale_mode=\"rebase\",\n", + " overlap_mode=\"allow\"\n", + ")\n", + "pattern_ranges.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a82ef541-7b12-4f5b-af56-39ca78ea6c69", + "metadata": {}, + "outputs": [], + "source": [ + "pattern_ranges = pattern_ranges.status_closed\n", + "pattern_ranges.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "284df49d-5235-4450-af49-f31475eedab2", + "metadata": {}, + "outputs": [], + "source": [ + "projections = pattern_ranges.get_projections()\n", + "projections.vbt.plot_projections(plot_bands=False).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "463036d2-3b03-4aaf-ba51-17bbe30902db", + "metadata": {}, + "outputs": [], + "source": [ + "delta_ranges = pattern_ranges.with_delta(7)\n", + "projections = delta_ranges.get_projections(start_value=-1)\n", + "fig = data.iloc[-7:].plot(plot_volume=False)\n", + "projections.vbt.plot_projections(fig=fig).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40cd39a7-eee3-468f-953e-2e3e248a0a57", + "metadata": {}, + "outputs": [], + "source": [ + "projections.mean(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea7b7996-6040-4e22-a8a7-1b3158f3d3ae", + "metadata": {}, + "outputs": [], + "source": [ + "next_data = vbt.BinanceData.pull(\n", + " \"BTCUSDT\", \n", + " start=\"2022-05-31\", \n", + " end=\"2022-06-08\"\n", + ")\n", + "next_data.close" + ] + }, + { + "cell_type": "markdown", + "id": "17e50e87-4c45-4226-9c6e-d6f8c4fcc1ad", + "metadata": {}, + "source": [ + "#### Quick plotting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab44b469-25f6-4521-8597-508b0e90dcd0", + "metadata": {}, + "outputs": [], + "source": [ + "delta_ranges.plot_projections().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "0998e94d-1235-492f-8a43-7d5447c62ad0", + "metadata": {}, + "source": [ + "### Non-uniform projections" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d89bff85-7fb4-4199-acff-d3933e7a5bc1", + "metadata": {}, + "outputs": [], + "source": [ + "windows = np.arange(10, 31)\n", + "window_tuples = combinations(windows, 2)\n", + "window_tuples = filter(lambda x: abs(x[0] - x[1]) >= 5, window_tuples)\n", + "fast_windows, slow_windows = zip(*window_tuples)\n", + "fast_sma = data.run(\"sma\", fast_windows, short_name=\"fast_sma\")\n", + "slow_sma = data.run(\"sma\", slow_windows, short_name=\"slow_sma\")\n", + "entries = fast_sma.real_crossed_above(slow_sma.real)\n", + "exits = fast_sma.real_crossed_below(slow_sma.real)\n", + "\n", + "entries.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b467365-1c39-4df8-82f9-e187695d66e8", + "metadata": {}, + "outputs": [], + "source": [ + "entry_ranges = entries.vbt.signals.delta_ranges(30, close=data.close)\n", + "entry_ranges = entry_ranges.status_closed\n", + "entry_ranges.count().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dbd119c-1ca9-4baf-9464-247c375da1bd", + "metadata": {}, + "outputs": [], + "source": [ + "exit_ranges = exits.vbt.signals.delta_ranges(30, close=data.close)\n", + "exit_ranges = exit_ranges.status_closed\n", + "exit_ranges.count().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20a19b6e-3c65-4fba-8cbb-58fd37422420", + "metadata": {}, + "outputs": [], + "source": [ + "entry_projections = entry_ranges.get_projections()\n", + "entry_projections.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0ed13b1-f9b0-4b8d-a1a1-1f45e9384387", + "metadata": {}, + "outputs": [], + "source": [ + "exit_projections = exit_ranges.get_projections()\n", + "exit_projections.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d390a0d6-8371-4b45-a859-7c2d75cc3e1f", + "metadata": {}, + "outputs": [], + "source": [ + "fig = entry_projections.vbt.plot_projections(\n", + " plot_projections=False,\n", + " lower_trace_kwargs=dict(name=\"Lower (entry)\", line_color=\"green\"),\n", + " middle_trace_kwargs=dict(name=\"Middle (entry)\", line_color=\"green\"),\n", + " upper_trace_kwargs=dict(name=\"Upper (entry)\", line_color=\"green\"),\n", + " plot_aux_middle=False,\n", + " plot_fill=False\n", + ")\n", + "fig = exit_projections.vbt.plot_projections(\n", + " plot_projections=False,\n", + " lower_trace_kwargs=dict(name=\"Lower (exit)\", line_color=\"orangered\"),\n", + " middle_trace_kwargs=dict(name=\"Middle (exit)\", line_color=\"orangered\"),\n", + " upper_trace_kwargs=dict(name=\"Upper (exit)\", line_color=\"orangered\"),\n", + " plot_aux_middle=False,\n", + " plot_fill=False,\n", + " fig=fig\n", + ")\n", + "fig.show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9e3ff1d-8278-4715-9daf-c1bd779796fe", + "metadata": {}, + "outputs": [], + "source": [ + "entry_ranges = entries.vbt.signals.between_ranges(exits, close=data.close)\n", + "entry_ranges = entry_ranges.status_closed\n", + "entry_ranges.count().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd4ff156-429b-4815-a15d-2582ea0ce8c3", + "metadata": {}, + "outputs": [], + "source": [ + "exit_ranges = exits.vbt.signals.between_ranges(entries, close=data.close)\n", + "exit_ranges = exit_ranges.status_closed\n", + "exit_ranges.count().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19678630-7b6c-41df-8ff5-10987936cdbe", + "metadata": {}, + "outputs": [], + "source": [ + "entry_projections = entry_ranges.get_projections()\n", + "entry_projections.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1edfedd-8899-482d-8eae-fd452ea039b3", + "metadata": {}, + "outputs": [], + "source": [ + "exit_projections = exit_ranges.get_projections()\n", + "exit_projections.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "efe053ae-3fc2-4011-b6dd-073f1898b261", + "metadata": {}, + "outputs": [], + "source": [ + "rand_cols = np.random.choice(entry_projections.shape[1], 100)\n", + "entry_projections.iloc[:, rand_cols].vbt.plot_projections(plot_bands=False).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0307a36b-6cea-41b5-97fc-9d580c6e16fc", + "metadata": {}, + "outputs": [], + "source": [ + "rand_cols = np.random.choice(exit_projections.shape[1], 100)\n", + "exit_projections.iloc[:, rand_cols].vbt.plot_projections(plot_bands=False).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "efb5dbd7-92dc-4b11-9e17-53149dc69094", + "metadata": {}, + "source": [ + "#### Shrinking" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e8bc5c4-6468-4d22-83ad-5eba09672b54", + "metadata": {}, + "outputs": [], + "source": [ + "entry_projections = entry_ranges.get_projections(proj_period=\"30d\")\n", + "entry_projections.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "255f0a7f-b55c-4c66-bf7c-45cc0285fe84", + "metadata": {}, + "outputs": [], + "source": [ + "exit_projections = exit_ranges.get_projections(proj_period=\"30d\")\n", + "exit_projections.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31b7b35d-a419-4077-99a7-7138b0fdbfe2", + "metadata": {}, + "outputs": [], + "source": [ + "rand_cols = np.random.choice(entry_projections.shape[1], 100)\n", + "entry_projections.iloc[:, rand_cols].vbt.plot_projections().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6de42c1-2359-44d9-b7b4-f301dcb82877", + "metadata": {}, + "outputs": [], + "source": [ + "rand_cols = np.random.choice(exit_projections.shape[1], 100)\n", + "exit_projections.iloc[:, rand_cols].vbt.plot_projections().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "bc947af3-c7fe-4c91-925b-2cb5c8062217", + "metadata": {}, + "source": [ + "#### Stretching" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9cd9fc97-d3be-44b8-a907-99b9db01faa4", + "metadata": {}, + "outputs": [], + "source": [ + "entry_projections = entry_ranges.get_projections(\n", + " proj_period=\"30d\", extend=True\n", + ")\n", + "entry_projections.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0584041b-86cc-4ce5-a127-e0cfd80e9c4a", + "metadata": {}, + "outputs": [], + "source": [ + "exit_projections = exit_ranges.get_projections(\n", + " proj_period=\"30d\", extend=True\n", + ")\n", + "exit_projections.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "307b1297-555e-4db5-a739-ca5ddf4a7157", + "metadata": {}, + "outputs": [], + "source": [ + "rand_cols = np.random.choice(entry_projections.shape[1], 100)\n", + "entry_projections.iloc[:, rand_cols].vbt.plot_projections().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14a8234f-11e1-4841-b55e-a63483a3fce3", + "metadata": {}, + "outputs": [], + "source": [ + "rand_cols = np.random.choice(exit_projections.shape[1], 100)\n", + "exit_projections.iloc[:, rand_cols].vbt.plot_projections().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "ceac936f-10c8-41c3-8b42-fcac372e8b0b", + "metadata": {}, + "source": [ + "#### Quick plotting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f53ea678-3707-41af-807d-4690abde298e", + "metadata": {}, + "outputs": [], + "source": [ + "entry_ranges.wrapper.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c128064-5e25-4949-9ab6-d01867d7523d", + "metadata": {}, + "outputs": [], + "source": [ + "entry_ranges.plot_projections(\n", + " column=(25, 30),\n", + " last_n=10,\n", + " proj_period=\"30d\", \n", + " extend=True,\n", + " plot_lower=False,\n", + " plot_upper=False,\n", + " plot_aux_middle=False,\n", + " projection_trace_kwargs=dict(opacity=0.3)\n", + ").show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "86fa4fc5-010e-4102-979a-f5a0f39af5c7", + "metadata": {}, + "source": [ + "### Open projections" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55fd0811-5970-4fe4-88d4-852cb352430e", + "metadata": {}, + "outputs": [], + "source": [ + "exit_ranges = exits.vbt.signals.between_ranges(\n", + " entries, \n", + " incl_open=True, \n", + " close=data.close\n", + ")\n", + "exit_ranges.count().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6ee4056-46c3-48c3-a0c8-dd6c7fd96331", + "metadata": {}, + "outputs": [], + "source": [ + "exit_ranges.wrapper.columns[exit_ranges.status_open.col_arr]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6012e861-9761-4244-a94c-1c19726cad31", + "metadata": {}, + "outputs": [], + "source": [ + "exit_ranges.status_closed.plot_projections(\n", + " column=(20, 30), plot_bands=False\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d98c6dfb-80d6-4bab-89e6-9526397b9a39", + "metadata": {}, + "outputs": [], + "source": [ + "exit_ranges.plot_projections(\n", + " column=(20, 30), plot_bands=False\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93a55365-7167-408e-b5eb-b3260ce186e4", + "metadata": {}, + "outputs": [], + "source": [ + "column = (20, 30)\n", + "signal_index = data.wrapper.index[np.flatnonzero(exits[column])[-1]]\n", + "plot_start_index = signal_index - pd.Timedelta(days=10)\n", + "sub_close = data.close[plot_start_index:]\n", + "sub_exits = exits.loc[plot_start_index:, column]\n", + "\n", + "fig = sub_close.vbt.plot()\n", + "sub_exits.vbt.signals.plot_as_exits(sub_close, fig=fig)\n", + "projections = exit_ranges[column].status_closed.get_projections(\n", + " start_value=sub_close.loc[signal_index],\n", + " start_index=signal_index\n", + ")\n", + "projections.vbt.plot_projections(plot_bands=False, fig=fig).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06010e28-2a21-46df-a921-0b0533137762", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/to_explore/notebooks/PortfolioOptimization.ipynb b/to_explore/notebooks/PortfolioOptimization.ipynb new file mode 100644 index 0000000..6bcbab4 --- /dev/null +++ b/to_explore/notebooks/PortfolioOptimization.ipynb @@ -0,0 +1,3178 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "9b76874f-1446-4198-8695-386bbb22060f", + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro import *\n", + "# whats_imported()\n", + "\n", + "vbt.settings.set_theme(\"dark\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db16b4c1-1ffb-41fe-a04b-249161b80b3f", + "metadata": {}, + "outputs": [], + "source": [ + "# data = vbt.BinanceData.pull(\n", + "# [\"BTCUSDT\", \"ETHUSDT\", \"BNBUSDT\", \"XRPUSDT\", \"ADAUSDT\"], \n", + "# start=\"2020-01-01 UTC\", \n", + "# end=\"2021-01-01 UTC\",\n", + "# timeframe=\"1h\"\n", + "# )\n", + "\n", + "# data.to_hdf()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd78fe86-080e-49ab-bfd3-87634465cc86", + "metadata": {}, + "outputs": [], + "source": [ + "data = vbt.HDFData.pull(\"BinanceData.h5\")" + ] + }, + { + "cell_type": "markdown", + "id": "69590f7f-bde5-428c-a036-52d6bd207a72", + "metadata": {}, + "source": [ + "## Allocation" + ] + }, + { + "cell_type": "markdown", + "id": "4bec503b-c470-4255-aaf9-22cb67037586", + "metadata": {}, + "source": [ + "### Manually" + ] + }, + { + "cell_type": "markdown", + "id": "1ca8b280-a993-4501-b00e-01dbc86ceb21", + "metadata": {}, + "source": [ + "#### Index points" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc61be80-bc27-42be-80bf-8335285eef96", + "metadata": {}, + "outputs": [], + "source": [ + "ms_points = data.wrapper.get_index_points(every=\"M\")\n", + "ms_points" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ae7a4d6-ea1f-45ef-9653-3e0def80e242", + "metadata": {}, + "outputs": [], + "source": [ + "data.wrapper.index.get_indexer(\n", + " pd.Series(index=data.wrapper.index).resample(vbt.offset(\"M\")).asfreq().index, \n", + " method=\"bfill\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7a1475a-7cf1-42b9-aa29-454591557ce3", + "metadata": {}, + "outputs": [], + "source": [ + "data.wrapper.index[ms_points]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc620982-9a4b-4176-9d13-6088acac0ce6", + "metadata": {}, + "outputs": [], + "source": [ + "example_points = data.wrapper.get_index_points(every=24 * 30)\n", + "data.wrapper.index[example_points]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a43d5382-fe93-444d-ab7a-04736ab4b8e1", + "metadata": {}, + "outputs": [], + "source": [ + "date_offset = pd.offsets.WeekOfMonth(week=3, weekday=4)\n", + "example_points = data.wrapper.get_index_points(\n", + " every=date_offset, \n", + " add_delta=pd.Timedelta(hours=17)\n", + ")\n", + "data.wrapper.index[example_points]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6f534de-68f6-4e3c-b55c-2d80ac197d8e", + "metadata": {}, + "outputs": [], + "source": [ + "example_points = data.wrapper.get_index_points(\n", + " start=\"April 1st 2020\",\n", + " every=\"M\"\n", + ")\n", + "data.wrapper.index[example_points]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85ab2969-6a78-4869-8fcb-f498cd6430b2", + "metadata": {}, + "outputs": [], + "source": [ + "example_points = data.wrapper.get_index_points(\n", + " on=[\"April 1st 2020 19:45\", \"17 September 2020 00:01\"]\n", + ")\n", + "data.wrapper.index[example_points]" + ] + }, + { + "cell_type": "markdown", + "id": "5681f6ee-bb4b-473a-aaec-92af48e290ee", + "metadata": {}, + "source": [ + "#### Filling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4c86c70-3611-43c7-8536-2c2b4a903d35", + "metadata": {}, + "outputs": [], + "source": [ + "symbol_wrapper = data.get_symbol_wrapper(freq=\"1h\")\n", + "filled_allocations = symbol_wrapper.fill()\n", + "print(filled_allocations)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e81dcdf2-0079-4adb-9b15-99486a56fc4f", + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "\n", + "def random_allocate_func():\n", + " weights = np.random.uniform(size=symbol_wrapper.shape[1])\n", + " return weights / weights.sum()\n", + "\n", + "for idx in ms_points:\n", + " filled_allocations.iloc[idx] = random_allocate_func()\n", + "\n", + "allocations = filled_allocations[~filled_allocations.isnull().any(axis=1)]\n", + "allocations" + ] + }, + { + "cell_type": "markdown", + "id": "cb975a93-4739-4f3a-b3ba-068cdf82d27e", + "metadata": {}, + "source": [ + "#### Simulation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24345fec-cc7a-4bc1-a822-4794b5bd8995", + "metadata": {}, + "outputs": [], + "source": [ + "pf = vbt.Portfolio.from_orders(\n", + " close=data.get(\"Close\"),\n", + " size=filled_allocations,\n", + " size_type=\"targetpercent\",\n", + " group_by=True,\n", + " cash_sharing=True,\n", + " call_seq=\"auto\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40e232aa-6f52-41ff-b7bb-c8f073ced4fa", + "metadata": {}, + "outputs": [], + "source": [ + "sim_alloc = pf.get_asset_value(group_by=False).vbt / pf.value\n", + "print(sim_alloc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5817b120-0903-46a8-b96a-9e40b46e05be", + "metadata": {}, + "outputs": [], + "source": [ + "sim_alloc.vbt.plot(\n", + " trace_kwargs=dict(stackgroup=\"one\"),\n", + " use_gl=False\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2b2fa9e-4261-4f3e-b0fc-621567101a86", + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot_allocations().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3382d027-387f-4c6d-bd1c-e1e83ebe97e4", + "metadata": {}, + "outputs": [], + "source": [ + "np.isclose(allocations, sim_alloc.iloc[ms_points])" + ] + }, + { + "cell_type": "markdown", + "id": "6aa7e781-88cd-4fb9-b492-f249207d1d97", + "metadata": {}, + "source": [ + "### Allocation method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d38deafd-ca0a-487a-8109-cf99c6eb63fe", + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "\n", + "pfo = vbt.PortfolioOptimizer.from_allocate_func(\n", + " symbol_wrapper,\n", + " random_allocate_func,\n", + " every=\"M\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd07c105-1448-41b7-918d-209129790757", + "metadata": {}, + "outputs": [], + "source": [ + "print(pfo.allocations)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1f5f65c-efd5-45e8-869b-f0f208054ec7", + "metadata": {}, + "outputs": [], + "source": [ + "print(pfo.filled_allocations)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9309c8f5-79a7-477b-8e19-7953a86df612", + "metadata": {}, + "outputs": [], + "source": [ + "print(pfo.alloc_records.records_readable)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ca4765-ea6e-40d5-8f51-25ded1e76168", + "metadata": {}, + "outputs": [], + "source": [ + "pfo.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18d794b3-f011-42e7-a7e2-300cf9f3b195", + "metadata": {}, + "outputs": [], + "source": [ + "pfo.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "185132cb-120f-43c1-bf07-72b382e273ce", + "metadata": {}, + "outputs": [], + "source": [ + "pf = vbt.Portfolio.from_optimizer(data, pfo, freq=\"1h\")\n", + "\n", + "pf.sharpe_ratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a398f1b4-71b8-4f67-8ce1-fd9b5cc56d56", + "metadata": {}, + "outputs": [], + "source": [ + "pf = pfo.simulate(data, freq=\"1h\")\n", + "\n", + "pf.sharpe_ratio" + ] + }, + { + "cell_type": "markdown", + "id": "82efb592-393e-4c9e-9d7a-2c50c8d3fb44", + "metadata": {}, + "source": [ + "#### Once" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2db279f6-4ebf-4f8c-82a2-a2cf820b740d", + "metadata": {}, + "outputs": [], + "source": [ + "def const_allocate_func(target_alloc):\n", + " return target_alloc\n", + "\n", + "pfo = vbt.PortfolioOptimizer.from_allocate_func(\n", + " symbol_wrapper,\n", + " const_allocate_func,\n", + " [0.5, 0.2, 0.1, 0.1, 0.1]\n", + ")\n", + "\n", + "pfo.plot().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "a8fef0f6-976a-468a-8337-a9fc59847f96", + "metadata": {}, + "source": [ + "#### Parsing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5dbd26d9-8953-4f3d-9844-68c035a3d264", + "metadata": {}, + "outputs": [], + "source": [ + "custom_index = vbt.date_range(\"2020-01-01\", \"2021-01-01\", freq=\"Q\")\n", + "custom_allocations = pd.DataFrame(\n", + " [\n", + " [0.5, 0.2, 0.1, 0.1, 0.1],\n", + " [0.1, 0.5, 0.2, 0.1, 0.1],\n", + " [0.1, 0.1, 0.5, 0.2, 0.1],\n", + " [0.1, 0.1, 0.1, 0.5, 0.2]\n", + " ],\n", + " index=custom_index, \n", + " columns=symbol_wrapper.columns\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c2e9dc9-60c4-40ee-a725-c6d5634a7c4a", + "metadata": {}, + "outputs": [], + "source": [ + "pfo = vbt.PortfolioOptimizer.from_allocations(\n", + " symbol_wrapper,\n", + " custom_allocations\n", + ")\n", + "print(pfo.allocations)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd3f29d5-ddab-4ba9-abd3-c09807c072f4", + "metadata": {}, + "outputs": [], + "source": [ + "pfo = vbt.PortfolioOptimizer.from_allocations(\n", + " symbol_wrapper,\n", + " custom_allocations.values,\n", + " start=\"2020-01-01\",\n", + " end=\"2021-01-01\",\n", + " every=\"Q\"\n", + ")\n", + "print(pfo.allocations)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eeab75a5-6e4f-4751-b9e5-3fbaaf6717fe", + "metadata": {}, + "outputs": [], + "source": [ + "pfo = vbt.PortfolioOptimizer.from_filled_allocations(\n", + " pfo.fill_allocations()\n", + ")\n", + "print(pfo.allocations)" + ] + }, + { + "cell_type": "markdown", + "id": "c254a91a-85d7-4502-a208-606a60f3d8a4", + "metadata": {}, + "source": [ + "#### Templates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "128d6a58-e133-476a-a78a-2ab2eede62e7", + "metadata": {}, + "outputs": [], + "source": [ + "def rotation_allocate_func(wrapper, i):\n", + " weights = np.full(len(wrapper.columns), 0)\n", + " weights[i % len(wrapper.columns)] = 1\n", + " return weights\n", + "\n", + "pfo = vbt.PortfolioOptimizer.from_allocate_func(\n", + " symbol_wrapper,\n", + " rotation_allocate_func,\n", + " vbt.Rep(\"wrapper\"),\n", + " vbt.Rep(\"i\"),\n", + " every=\"M\"\n", + ")\n", + "\n", + "pfo.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0fc4975-fb15-4d56-a42e-f9a3686441fe", + "metadata": {}, + "outputs": [], + "source": [ + "def rotation_allocate_func(symbols, chosen_symbol):\n", + " return {s: 1 if s == chosen_symbol else 0 for s in symbols}\n", + "\n", + "pfo = vbt.PortfolioOptimizer.from_allocate_func(\n", + " symbol_wrapper,\n", + " rotation_allocate_func,\n", + " vbt.RepEval(\"wrapper.columns\"),\n", + " vbt.RepEval(\"wrapper.columns[i % len(wrapper.columns)]\"),\n", + " every=\"M\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0cac3042-ab4b-4f88-b361-5381b619e91e", + "metadata": {}, + "outputs": [], + "source": [ + "print(pfo.allocations)" + ] + }, + { + "cell_type": "markdown", + "id": "48324866-77d0-4743-9778-4468c6933941", + "metadata": {}, + "source": [ + "#### Groups" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "849f130f-aaaa-4b84-9e3d-002be887c103", + "metadata": {}, + "outputs": [], + "source": [ + "pfo = vbt.PortfolioOptimizer.from_allocate_func(\n", + " symbol_wrapper,\n", + " const_allocate_func,\n", + " [0.5, 0.2, 0.1, 0.1, 0.1],\n", + " every=vbt.Param([\"1M\", \"2M\", \"3M\"])\n", + ")\n", + "\n", + "pf = pfo.simulate(data, freq=\"1h\")\n", + "pf.total_return" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52154cba-8a20-43bf-934e-efcf8a07d14a", + "metadata": {}, + "outputs": [], + "source": [ + "pfo = vbt.PortfolioOptimizer.from_allocate_func(\n", + " symbol_wrapper,\n", + " const_allocate_func,\n", + " vbt.Param([\n", + " [0.5, 0.2, 0.1, 0.1, 0.1],\n", + " [0.2, 0.1, 0.1, 0.1, 0.5]\n", + " ], keys=pd.Index([\"w1\", \"w2\"], name=\"weights\")),\n", + " every=vbt.Param([\"1M\", \"2M\", \"3M\"])\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fcfe8e6-a6b3-4904-9e9a-44612af115ac", + "metadata": {}, + "outputs": [], + "source": [ + "pfo.wrapper.grouper.get_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "502b18b1-498a-459e-a11e-30b5e895e2ad", + "metadata": {}, + "outputs": [], + "source": [ + "pfo.wrapper.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64d0fa79-4c39-4b30-99e9-afeb05f496e0", + "metadata": {}, + "outputs": [], + "source": [ + "pfo[(\"3M\", \"w2\")].stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b03be1e-b989-4821-bd90-e4a4f9b9fd14", + "metadata": {}, + "outputs": [], + "source": [ + "pfo = vbt.PortfolioOptimizer.from_allocate_func(\n", + " symbol_wrapper,\n", + " const_allocate_func,\n", + " group_configs=[\n", + " dict(args=([0.5, 0.2, 0.1, 0.1, 0.1],), every=\"1M\"),\n", + " dict(args=([0.2, 0.1, 0.1, 0.1, 0.5],), every=\"2M\"),\n", + " dict(args=([0.1, 0.1, 0.1, 0.5, 0.2],), every=\"3M\"),\n", + " dict(args=([0.1, 0.1, 0.5, 0.2, 0.1],), every=\"1M\"),\n", + " dict(args=([0.1, 0.5, 0.2, 0.1, 0.1],), every=\"2M\"),\n", + " dict(args=([0.5, 0.2, 0.1, 0.1, 0.1],), every=\"3M\"),\n", + " ]\n", + ")\n", + "pfo.wrapper.grouper.get_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "286c19a7-487d-4461-9ac7-505408df2d00", + "metadata": {}, + "outputs": [], + "source": [ + "pfo = vbt.PortfolioOptimizer.from_allocate_func(\n", + " symbol_wrapper,\n", + " const_allocate_func,\n", + " group_configs=[\n", + " dict(\n", + " allocate_func=const_allocate_func, \n", + " args=([0.5, 0.2, 0.1, 0.1, 0.1],),\n", + " _name=\"const\"\n", + " ),\n", + " dict(\n", + " allocate_func=random_allocate_func,\n", + " every=\"M\",\n", + " _name=\"random\"\n", + " ),\n", + " ]\n", + ")\n", + "pfo.wrapper.grouper.get_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1370874-9993-4eb6-9561-8d4bdedcb93a", + "metadata": {}, + "outputs": [], + "source": [ + "pfo = vbt.PortfolioOptimizer.from_allocate_func(\n", + " symbol_wrapper,\n", + " const_allocate_func,\n", + " group_configs={\n", + " \"const\": dict(\n", + " allocate_func=const_allocate_func, \n", + " args=([0.5, 0.2, 0.1, 0.1, 0.1],)\n", + " ),\n", + " \"random\": dict(\n", + " allocate_func=random_allocate_func,\n", + " ),\n", + " },\n", + " every=vbt.Param([\"1M\", \"2M\", \"3M\"])\n", + ")\n", + "pfo.wrapper.grouper.get_index()" + ] + }, + { + "cell_type": "markdown", + "id": "084666b9-43f5-459a-950c-12092771572e", + "metadata": {}, + "source": [ + "#### Numba" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1540dfe-a610-4cec-9acc-6f8f88585f76", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def rotation_allocate_func_nb(i, idx, n_cols):\n", + " weights = np.full(n_cols, 0)\n", + " weights[i % n_cols] = 1\n", + " return weights\n", + "\n", + "pfo = vbt.PortfolioOptimizer.from_allocate_func(\n", + " symbol_wrapper,\n", + " rotation_allocate_func_nb,\n", + " vbt.RepEval(\"len(wrapper.columns)\"),\n", + " every=\"W\",\n", + " jitted_loop=True\n", + ")\n", + "\n", + "print(pfo.allocations.head())" + ] + }, + { + "cell_type": "markdown", + "id": "d6d5353d-99bd-4d8e-8bdd-fa8f32759b87", + "metadata": {}, + "source": [ + "#### Distribution" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fdc416bb-0457-4cb6-8838-9d9fce2e7551", + "metadata": {}, + "outputs": [], + "source": [ + "pfo = vbt.PortfolioOptimizer.from_allocate_func(\n", + " symbol_wrapper,\n", + " rotation_allocate_func_nb,\n", + " vbt.Rep(\"i\"),\n", + " vbt.Rep(\"index_point\"),\n", + " vbt.RepEval(\"len(wrapper.columns)\"),\n", + " every=\"D\",\n", + " execute_kwargs=dict(engine=\"dask\")\n", + ")\n", + "\n", + "print(pfo.allocations.head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88153e42-fbfa-47b9-bdc0-31c786948fa2", + "metadata": {}, + "outputs": [], + "source": [ + "pfo = vbt.PortfolioOptimizer.from_allocate_func(\n", + " symbol_wrapper,\n", + " rotation_allocate_func_nb,\n", + " vbt.RepEval(\"len(wrapper.columns)\"),\n", + " every=\"D\",\n", + " jitted_loop=True,\n", + " chunked=dict(\n", + " arg_take_spec=dict(args=vbt.ArgsTaker(None)),\n", + " engine=\"dask\"\n", + " )\n", + ")\n", + "\n", + "print(pfo.allocations.head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53abd71a-db25-46d9-b826-e4424bd713a9", + "metadata": {}, + "outputs": [], + "source": [ + "pfo = vbt.PortfolioOptimizer.from_allocate_func(\n", + " symbol_wrapper,\n", + " rotation_allocate_func_nb,\n", + " vbt.RepEval(\"len(wrapper.columns)\"),\n", + " every=\"D\",\n", + " jitted_loop=True,\n", + " jitted=dict(parallel=True)\n", + ")\n", + "\n", + "print(pfo.allocations.head())" + ] + }, + { + "cell_type": "markdown", + "id": "9c40824e-6c4c-4ba9-a01e-0288132fbccd", + "metadata": {}, + "source": [ + "## Optimization" + ] + }, + { + "cell_type": "markdown", + "id": "4fc8fe78-7c3a-458d-8c71-fcb0697ab748", + "metadata": {}, + "source": [ + "### Index ranges" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37bd154b-352e-431c-9958-276193afac4f", + "metadata": {}, + "outputs": [], + "source": [ + "example_ranges = data.wrapper.get_index_ranges(every=\"M\")\n", + "example_ranges[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ce4b815-90d1-4c66-a257-c7ca854375e1", + "metadata": {}, + "outputs": [], + "source": [ + "example_ranges[1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca7a3e1a-c959-4ec1-a39c-24b03bc77978", + "metadata": {}, + "outputs": [], + "source": [ + "data.wrapper.index[example_ranges[0][0]:example_ranges[1][0]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f655f008-a2b6-4861-b0b9-6eacc712f3a9", + "metadata": {}, + "outputs": [], + "source": [ + "example_ranges = data.wrapper.get_index_ranges(\n", + " every=\"M\", \n", + " lookback_period=\"3M\"\n", + ")\n", + "\n", + "def get_index_bounds(range_starts, range_ends):\n", + " for i in range(len(range_starts)):\n", + " range_index = data.wrapper.index[range_starts[i]:range_ends[i]]\n", + " yield range_index[0], range_index[-1]\n", + "\n", + "list(get_index_bounds(*example_ranges))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "533a7760-bf7a-4e85-92c6-5905ee4e7402", + "metadata": {}, + "outputs": [], + "source": [ + "example_ranges = data.wrapper.get_index_ranges(\n", + " start=[\"2020-01-01\", \"2020-04-01\", \"2020-08-01\"],\n", + " end=[\"2020-04-01\", \"2020-08-01\", \"2020-12-01\"]\n", + ")\n", + "\n", + "list(get_index_bounds(*example_ranges))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d990f69b-57e3-467a-96f7-4629e2efc3be", + "metadata": {}, + "outputs": [], + "source": [ + "example_ranges = data.wrapper.get_index_ranges(\n", + " start=\"2020-01-01\",\n", + " end=[\"2020-04-01\", \"2020-08-01\", \"2020-12-01\"]\n", + ")\n", + "\n", + "list(get_index_bounds(*example_ranges))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf3e9132-1ad9-419c-a947-28d76605f0a1", + "metadata": {}, + "outputs": [], + "source": [ + "example_ranges = data.wrapper.get_index_ranges(\n", + " every=\"Q\",\n", + " exact_start=True,\n", + " fixed_start=True\n", + ")\n", + "\n", + "list(get_index_bounds(*example_ranges))" + ] + }, + { + "cell_type": "markdown", + "id": "4eaa14c9-abd5-47c5-ab3e-4a567798f4d6", + "metadata": {}, + "source": [ + "### Optimization method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "65f35ec4-46c1-4402-a74c-39a7ecf44d50", + "metadata": {}, + "outputs": [], + "source": [ + "def inv_rank_optimize_func(price, index_slice):\n", + " price_period = price.iloc[index_slice]\n", + " first_price = price_period.iloc[0]\n", + " last_price = price_period.iloc[-1]\n", + " ret = (last_price - first_price) / first_price\n", + " ranks = ret.rank(ascending=False)\n", + " return ranks / ranks.sum()\n", + "\n", + "pfo = vbt.PortfolioOptimizer.from_optimize_func(\n", + " symbol_wrapper,\n", + " inv_rank_optimize_func,\n", + " data.get(\"Close\"),\n", + " vbt.Rep(\"index_slice\"),\n", + " every=\"M\"\n", + ")\n", + "\n", + "print(pfo.allocations)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da8c39d5-31da-44cc-a384-0e270e8faea9", + "metadata": {}, + "outputs": [], + "source": [ + "print(pfo.alloc_records.records_readable)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d2e0254-6445-4bde-a162-99f77ed07e06", + "metadata": {}, + "outputs": [], + "source": [ + "start_idx = pfo.alloc_records.values[0][\"start_idx\"]\n", + "end_idx = pfo.alloc_records.values[0][\"end_idx\"]\n", + "close_period = data.get(\"Close\").iloc[start_idx:end_idx]\n", + "close_period.vbt.rebase(1).vbt.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f8f395a-a4bc-4d82-bb1e-ceb838c1dbd9", + "metadata": {}, + "outputs": [], + "source": [ + "pfo.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1bec9bf7-116d-4ee0-8f22-f5d85abb02be", + "metadata": {}, + "outputs": [], + "source": [ + "pfo.plots().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "4cd5f118-7eff-444f-bcad-b0caed5563ac", + "metadata": {}, + "source": [ + "#### Numba" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e7d1b93-cbe0-489a-833e-2421cdddf5cd", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def inv_rank_optimize_func_nb(i, start_idx, end_idx, price):\n", + " price_period = price[start_idx:end_idx]\n", + " first_price = price_period[0]\n", + " last_price = price_period[-1]\n", + " ret = (last_price - first_price) / first_price\n", + " ranks = vbt.nb.rank_1d_nb(-ret)\n", + " return ranks / ranks.sum()\n", + "\n", + "pfo = vbt.PortfolioOptimizer.from_optimize_func(\n", + " symbol_wrapper,\n", + " inv_rank_optimize_func_nb,\n", + " data.get(\"Close\").values,\n", + " every=\"M\",\n", + " jitted_loop=True\n", + ")\n", + "\n", + "print(pfo.allocations)" + ] + }, + { + "cell_type": "markdown", + "id": "0f1cd1a0-beda-4a1d-ba6e-be353116c2a5", + "metadata": {}, + "source": [ + "## Integrations" + ] + }, + { + "cell_type": "markdown", + "id": "5b5eeddd-d48c-435e-afcd-032d56028605", + "metadata": {}, + "source": [ + "### PyPortfolioOpt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eddbb356-a684-43cf-bfaf-3a024b297532", + "metadata": {}, + "outputs": [], + "source": [ + "from pypfopt.expected_returns import mean_historical_return\n", + "from pypfopt.risk_models import CovarianceShrinkage\n", + "from pypfopt.efficient_frontier import EfficientFrontier\n", + "\n", + "expected_returns = mean_historical_return(data.get(\"Close\"))\n", + "cov_matrix = CovarianceShrinkage(data.get(\"Close\")).ledoit_wolf()\n", + "optimizer = EfficientFrontier(expected_returns, cov_matrix)\n", + "weights = optimizer.max_sharpe()\n", + "weights" + ] + }, + { + "cell_type": "markdown", + "id": "9f718768-cf18-44fc-af63-ac18b28312c7", + "metadata": {}, + "source": [ + "#### Parsing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d01cc1c-99f3-47b6-91ef-5781d5ce2de0", + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro.portfolio.pfopt.base import resolve_pypfopt_func_kwargs\n", + "\n", + "print(vbt.format_func(mean_historical_return))\n", + "\n", + "print(vbt.prettify(resolve_pypfopt_func_kwargs(\n", + " mean_historical_return, \n", + " prices=data.get(\"Close\"),\n", + " freq=\"1h\",\n", + " year_freq=\"365d\",\n", + " other_arg=100\n", + ")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42341016-70d0-4ba1-b386-97b0ca8a3422", + "metadata": {}, + "outputs": [], + "source": [ + "print(vbt.prettify(resolve_pypfopt_func_kwargs(\n", + " EfficientFrontier, \n", + " prices=data.get(\"Close\")\n", + ")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd9d9605-e5db-4466-af74-25a4f8fe7eae", + "metadata": {}, + "outputs": [], + "source": [ + "print(vbt.prettify(resolve_pypfopt_func_kwargs(\n", + " EfficientFrontier, \n", + " prices=data.get(\"Close\"),\n", + " expected_returns=\"ema_historical_return\",\n", + " cov_matrix=\"sample_cov\"\n", + ")))" + ] + }, + { + "cell_type": "markdown", + "id": "24a6eef0-9743-4a77-9966-9f049ecf3bc1", + "metadata": {}, + "source": [ + "#### Auto-optimization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c5a41b5-0c06-445e-8279-fab28bd64ee2", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.pypfopt_optimize(prices=data.get(\"Close\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "999ff2a1-57db-4f25-817f-296e3323b65e", + "metadata": {}, + "outputs": [], + "source": [ + "S = CovarianceShrinkage(data.get(\"Close\")).ledoit_wolf()\n", + "ef = EfficientFrontier(None, S, weight_bounds=(-1, 1))\n", + "ef.min_volatility()\n", + "weights = ef.clean_weights()\n", + "weights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45ed42f6-34e7-481a-b1bd-3b3d8e45ca9c", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.pypfopt_optimize(\n", + " prices=data.get(\"Close\"),\n", + " expected_returns=None,\n", + " weight_bounds=(-1, 1),\n", + " target=\"min_volatility\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6ed4f49-2c33-4a12-9c6b-f91dfa7d381b", + "metadata": {}, + "outputs": [], + "source": [ + "from pypfopt.expected_returns import capm_return\n", + "\n", + "sector_mapper = {\n", + " \"ADAUSDT\": \"DeFi\",\n", + " \"BNBUSDT\": \"DeFi\",\n", + " \"BTCUSDT\": \"Payment\",\n", + " \"ETHUSDT\": \"DeFi\",\n", + " \"XRPUSDT\": \"Payment\"\n", + "}\n", + "sector_lower = {\n", + " \"DeFi\": 0.75\n", + "}\n", + "sector_upper = {}\n", + "\n", + "mu = capm_return(data.get(\"Close\"))\n", + "S = CovarianceShrinkage(data.get(\"Close\")).ledoit_wolf()\n", + "ef = EfficientFrontier(mu, S)\n", + "ef.add_sector_constraints(sector_mapper, sector_lower, sector_upper)\n", + "adausdt_index = ef.tickers.index(\"ADAUSDT\")\n", + "ef.add_constraint(lambda w: w[adausdt_index] == 0.10)\n", + "ef.max_sharpe()\n", + "weights = ef.clean_weights()\n", + "weights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0920cb2-fcec-4d60-bbd1-860ff7e9f3e0", + "metadata": {}, + "outputs": [], + "source": [ + "adausdt_index = list(sector_mapper.keys()).index(\"ADAUSDT\")\n", + "vbt.pypfopt_optimize(\n", + " prices=data.get(\"Close\"),\n", + " sector_mapper=sector_mapper,\n", + " sector_lower=sector_lower,\n", + " sector_upper=sector_upper,\n", + " constraints=[lambda w: w[adausdt_index] == 0.10],\n", + " expected_returns=\"capm_return\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3baebc8f-b29e-4b99-924d-be1e30c41819", + "metadata": {}, + "outputs": [], + "source": [ + "from pypfopt.objective_functions import L2_reg\n", + "\n", + "mu = capm_return(data.get(\"Close\"))\n", + "S = CovarianceShrinkage(data.get(\"Close\")).ledoit_wolf()\n", + "ef = EfficientFrontier(mu, S)\n", + "ef.add_sector_constraints(sector_mapper, sector_lower, sector_upper)\n", + "ef.add_objective(L2_reg, gamma=0.1) # gamme is the tuning parameter\n", + "ef.efficient_risk(0.15)\n", + "weights = ef.clean_weights()\n", + "weights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962886c1-70ed-4537-bce5-4329f47eeb66", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.pypfopt_optimize(\n", + " prices=data.get(\"Close\"),\n", + " expected_returns=\"capm_return\",\n", + " sector_mapper=sector_mapper,\n", + " sector_lower=sector_lower,\n", + " sector_upper=sector_upper,\n", + " objectives=[\"L2_reg\"],\n", + " gamma=0.1,\n", + " target=\"efficient_risk\",\n", + " target_volatility=0.15\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d0be75d-a98c-476d-8355-d4d9682ca44c", + "metadata": {}, + "outputs": [], + "source": [ + "from pypfopt import EfficientSemivariance\n", + "from pypfopt.expected_returns import returns_from_prices\n", + "\n", + "mu = capm_return(data.get(\"Close\"))\n", + "returns = returns_from_prices(data.get(\"Close\"))\n", + "returns = returns.dropna()\n", + "es = EfficientSemivariance(mu, returns)\n", + "es.efficient_return(0.01)\n", + "weights = es.clean_weights()\n", + "weights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ec40502-5d85-477f-a719-1d81626b308f", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.pypfopt_optimize(\n", + " prices=data.get(\"Close\"),\n", + " expected_returns=\"capm_return\",\n", + " optimizer=\"efficient_semivariance\",\n", + " target=\"efficient_return\",\n", + " target_return=0.01\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc19d54f-e653-46b7-9dc1-e9164a6cafb6", + "metadata": {}, + "outputs": [], + "source": [ + "initial_weights = np.array([1 / len(data.symbols)] * len(data.symbols))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1ca5f3d-717b-48de-8ca6-7365785314ad", + "metadata": {}, + "outputs": [], + "source": [ + "from pypfopt.objective_functions import transaction_cost\n", + "\n", + "mu = mean_historical_return(data.get(\"Close\"))\n", + "S = CovarianceShrinkage(data.get(\"Close\")).ledoit_wolf()\n", + "ef = EfficientFrontier(mu, S)\n", + "ef.add_objective(transaction_cost, w_prev=initial_weights, k=0.001)\n", + "ef.add_objective(L2_reg, gamma=0.05)\n", + "ef.min_volatility()\n", + "weights = ef.clean_weights()\n", + "weights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2872ca4e-3e6c-4013-9f9e-6a78210da576", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.pypfopt_optimize(\n", + " prices=data.get(\"Close\"),\n", + " objectives=[\"transaction_cost\", \"L2_reg\"],\n", + " w_prev=initial_weights, \n", + " k=0.001,\n", + " gamma=0.05,\n", + " target=\"min_volatility\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93a2b1f9-3b76-4b73-8875-2f8148aed53e", + "metadata": {}, + "outputs": [], + "source": [ + "import cvxpy as cp\n", + "\n", + "def logarithmic_barrier_objective(w, cov_matrix, k=0.1):\n", + " log_sum = cp.sum(cp.log(w))\n", + " var = cp.quad_form(w, cov_matrix)\n", + " return var - k * log_sum" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "618445c6-b151-4e48-ab97-09e73ea454ab", + "metadata": {}, + "outputs": [], + "source": [ + "mu = mean_historical_return(data.get(\"Close\"))\n", + "S = CovarianceShrinkage(data.get(\"Close\")).ledoit_wolf()\n", + "ef = EfficientFrontier(mu, S, weight_bounds=(0.01, 0.3))\n", + "ef.convex_objective(logarithmic_barrier_objective, cov_matrix=S, k=0.001)\n", + "weights = ef.clean_weights()\n", + "weights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "363b4772-b8f7-4780-8dd6-0879ca0be1e4", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.pypfopt_optimize(\n", + " prices=data.get(\"Close\"),\n", + " weight_bounds=(0.01, 0.3),\n", + " k=0.001,\n", + " target=logarithmic_barrier_objective\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d315ee2a-a300-4b2f-aabf-8dfd8e8a71ea", + "metadata": {}, + "outputs": [], + "source": [ + "def deviation_risk_parity(w, cov_matrix):\n", + " cov_matrix = np.asarray(cov_matrix)\n", + " n = cov_matrix.shape[0]\n", + " rp = (w * (cov_matrix @ w)) / cp.quad_form(w, cov_matrix)\n", + " return cp.sum_squares(rp - 1 / n).value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "becea885-c1a8-4346-aa21-49e426fd6a30", + "metadata": {}, + "outputs": [], + "source": [ + "mu = mean_historical_return(data.get(\"Close\"))\n", + "S = CovarianceShrinkage(data.get(\"Close\")).ledoit_wolf()\n", + "ef = EfficientFrontier(mu, S)\n", + "ef.nonconvex_objective(deviation_risk_parity, ef.cov_matrix)\n", + "weights = ef.clean_weights()\n", + "weights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d23a8f29-5fcb-489f-a566-d5e303009009", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.pypfopt_optimize(\n", + " prices=data.get(\"Close\"),\n", + " target=deviation_risk_parity,\n", + " target_is_convex=False\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52331cdc-4078-4d33-b6a5-c65f85afc1c1", + "metadata": {}, + "outputs": [], + "source": [ + "sp500_data = vbt.YFData.pull(\n", + " \"^GSPC\", \n", + " start=data.wrapper.index[0], \n", + " end=data.wrapper.index[-1]\n", + ")\n", + "market_caps = data.get(\"Close\") * data.get(\"Volume\")\n", + "viewdict = {\n", + " \"ADAUSDT\": 0.20, \n", + " \"BNBUSDT\": -0.30, \n", + " \"BTCUSDT\": 0, \n", + " \"ETHUSDT\": -0.2, \n", + " \"XRPUSDT\": 0.15\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "953df670-d6bc-4530-a5a7-4e6120fc64d8", + "metadata": {}, + "outputs": [], + "source": [ + "from pypfopt.black_litterman import (\n", + " market_implied_risk_aversion,\n", + " market_implied_prior_returns,\n", + " BlackLittermanModel\n", + ")\n", + "\n", + "S = CovarianceShrinkage(data.get(\"Close\")).ledoit_wolf()\n", + "delta = market_implied_risk_aversion(sp500_data.get(\"Close\"))\n", + "prior = market_implied_prior_returns(market_caps.iloc[-1], delta, S)\n", + "bl = BlackLittermanModel(S, pi=prior, absolute_views=viewdict)\n", + "rets = bl.bl_returns()\n", + "ef = EfficientFrontier(rets, S)\n", + "ef.min_volatility()\n", + "weights = ef.clean_weights()\n", + "weights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48480f30-3641-463f-8a8e-5ce0485d1795", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.pypfopt_optimize(\n", + " prices=data.get(\"Close\"),\n", + " expected_returns=\"bl_returns\",\n", + " market_prices=sp500_data.get(\"Close\"),\n", + " market_caps=market_caps.iloc[-1],\n", + " absolute_views=viewdict,\n", + " target=\"min_volatility\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "597a8769-bf4c-49ee-8af6-e15db567d9e2", + "metadata": {}, + "outputs": [], + "source": [ + "from pypfopt import HRPOpt\n", + "\n", + "rets = returns_from_prices(data.get(\"Close\"))\n", + "hrp = HRPOpt(rets)\n", + "hrp.optimize()\n", + "weights = hrp.clean_weights()\n", + "weights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04648737-d27f-4c57-96a7-7a898b7bbf30", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.pypfopt_optimize(\n", + " prices=data.get(\"Close\"),\n", + " optimizer=\"hrp\",\n", + " target=\"optimize\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "7ed59e17-aff0-4d23-b896-bce0a4d45fef", + "metadata": {}, + "source": [ + "#### Argument groups" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ddbad7dd-8655-40fa-8b0a-d85deb808cec", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.pypfopt_optimize( \n", + " prices=data.get(\"Close\"),\n", + " expected_returns=\"bl_returns\", \n", + " market_prices=sp500_data.get(\"Close\"),\n", + " market_caps=market_caps.iloc[-1],\n", + " absolute_views=viewdict,\n", + " target=\"min_volatility\",\n", + " cov_matrix=vbt.pfopt_func_dict({\n", + " \"EfficientFrontier\": \"sample_cov\",\n", + " \"_def\": \"ledoit_wolf\"\n", + " })\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "5afd87a6-eaa1-4656-9e09-f988f811c139", + "metadata": {}, + "source": [ + "#### Periodically" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69629e9e-dcff-48a0-9af6-6ccef81d1dce", + "metadata": {}, + "outputs": [], + "source": [ + "pfo = vbt.PortfolioOptimizer.from_pypfopt(\n", + " prices=data.get(\"Close\"),\n", + " every=\"W\"\n", + ")\n", + "\n", + "pfo.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "719a764c-a500-41ad-b93b-2161b618373b", + "metadata": {}, + "outputs": [], + "source": [ + "pfo = vbt.PortfolioOptimizer.from_pypfopt(\n", + " prices=data.get(\"Close\"),\n", + " every=\"W\",\n", + " target=vbt.Param([\n", + " \"max_sharpe\", \n", + " \"min_volatility\", \n", + " \"max_quadratic_utility\"\n", + " ])\n", + ")\n", + "\n", + "pfo.plot(column=\"min_volatility\").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91d13211-9be7-4bd6-9357-23fa3320ffa4", + "metadata": {}, + "outputs": [], + "source": [ + "pf = pfo.simulate(data, freq=\"1h\")\n", + "\n", + "pf.sharpe_ratio" + ] + }, + { + "cell_type": "markdown", + "id": "b1eb1e34-c040-49ab-96c5-2fe596444b50", + "metadata": {}, + "source": [ + "### Riskfolio-Lib" + ] + }, + { + "cell_type": "markdown", + "id": "2177d3d7-963e-4e88-a20d-ff649699ff00", + "metadata": {}, + "source": [ + "#### Parsing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bcbe3a9a-1473-4571-8a8f-ca32815ede4f", + "metadata": {}, + "outputs": [], + "source": [ + "import riskfolio as rp\n", + "\n", + "returns = data.get(\"Close\").vbt.to_returns()\n", + "\n", + "port = rp.Portfolio(returns=returns)\n", + "port.assets_stats(\n", + " method_mu=\"hist\", \n", + " method_cov=\"hist\",\n", + " d=0.94\n", + ")\n", + "w = port.optimization(\n", + " model=\"Classic\",\n", + " rm=\"MV\",\n", + " obj=\"Sharpe\",\n", + " rf=0,\n", + " l=0,\n", + " hist=True\n", + ")\n", + "print(w.T)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d0c602c-0ec9-45ac-b663-9cff902154bf", + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro.utils.parsing import get_func_arg_names\n", + "\n", + "get_func_arg_names(port.assets_stats)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44b61c1c-d5b8-4fed-a994-df2cb018c820", + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro.portfolio.pfopt.base import resolve_riskfolio_func_kwargs\n", + "\n", + "resolve_riskfolio_func_kwargs(\n", + " port.assets_stats,\n", + " method_mu=\"hist\",\n", + " method_cov=\"hist\",\n", + " model=\"Classic\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9b0e924-0a7e-4241-9fb1-cf4ae622c718", + "metadata": {}, + "outputs": [], + "source": [ + "resolve_riskfolio_func_kwargs(\n", + " port.assets_stats,\n", + " method_mu=\"hist\",\n", + " method_cov=\"hist\",\n", + " model=\"Classic\",\n", + " func_kwargs=dict(\n", + " assets_stats=dict(method_mu=\"ewma1\"),\n", + " optimization=dict(model=\"BL\")\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "acc762cc-a303-4a17-8056-d6d387ce7949", + "metadata": {}, + "source": [ + "#### Auto-optimization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d77e4539-e19d-4496-9694-8fd67ee3f753", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.riskfolio_optimize(returns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6a920e0-080a-4663-ad16-fd2e3de42e39", + "metadata": {}, + "outputs": [], + "source": [ + "port = rp.Portfolio(returns=returns)\n", + "port.assets_stats(\n", + " method_mu=\"hist\", \n", + " method_cov=\"hist\", \n", + " d=0.94\n", + ")\n", + "w = port.optimization(\n", + " model=\"Classic\", \n", + " rm=\"UCI\", \n", + " obj=\"Sharpe\", \n", + " rf=0, \n", + " l=0, \n", + " hist=True\n", + ")\n", + "print(w.T)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0cc3ab5a-e304-4f44-9cc3-05f458d7818b", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.riskfolio_optimize(\n", + " returns,\n", + " method_mu=\"hist\", \n", + " method_cov=\"hist\", \n", + " d=0.94,\n", + " rm=\"UCI\", \n", + " obj=\"Sharpe\", \n", + " rf=0, \n", + " l=0, \n", + " hist=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5cafb960-87ab-4aaf-818a-085270ade0db", + "metadata": {}, + "outputs": [], + "source": [ + "port = rp.Portfolio(returns=returns)\n", + "port.assets_stats(\n", + " method_mu=\"hist\", \n", + " method_cov=\"hist\", \n", + " d=0.94\n", + ")\n", + "port.wc_stats(\n", + " box=\"s\", \n", + " ellip=\"s\", \n", + " q=0.05, \n", + " n_sim=3000, \n", + " window=3, \n", + " dmu=0.1, \n", + " dcov=0.1, \n", + " seed=0\n", + ")\n", + "w = port.wc_optimization(\n", + " obj=\"Sharpe\", \n", + " rf=0, \n", + " l=0, \n", + " Umu=\"box\", \n", + " Ucov=\"box\"\n", + ")\n", + "print(w.T)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "edf7699f-b286-4b00-b3a9-244641840cf1", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.riskfolio_optimize(\n", + " returns,\n", + " opt_method=\"wc\",\n", + " method_mu=\"hist\", \n", + " method_cov=\"hist\", \n", + " box=\"s\", \n", + " ellip=\"s\", \n", + " q=0.05, \n", + " n_sim=3000, \n", + " window=3, \n", + " dmu=0.1, \n", + " dcov=0.1, \n", + " seed=0,\n", + " obj=\"Sharpe\", \n", + " rf=0, \n", + " l=0, \n", + " Umu=\"box\", \n", + " Ucov=\"box\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c175c0b-d47b-4039-9918-a4d496b941be", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.riskfolio_optimize(\n", + " returns,\n", + " func_kwargs=dict(\n", + " assets_stats=dict(\n", + " opt_method=\"wc\",\n", + " method_mu=\"hist\", \n", + " method_cov=\"hist\"\n", + " ),\n", + " wc_stats=dict(\n", + " box=\"s\", \n", + " ellip=\"s\", \n", + " q=0.05, \n", + " n_sim=3000, \n", + " window=3, \n", + " dmu=0.1, \n", + " dcov=0.1, \n", + " seed=0\n", + " ),\n", + " wc_optimization=dict(\n", + " obj=\"Sharpe\", \n", + " rf=0, \n", + " l=0, \n", + " Umu=\"box\", \n", + " Ucov=\"box\"\n", + " )\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "558d4ffc-9297-497a-bf76-0c5114696d00", + "metadata": {}, + "outputs": [], + "source": [ + "port = rp.Portfolio(returns=returns)\n", + "port.sht = True # Allows to use Short Weights\n", + "port.uppersht = 0.3 # Maximum value of sum of short weights in absolute value\n", + "port.upperlng = 1.3 # Maximum value of sum of positive weights\n", + "port.budget = 1.0 # No leverage\n", + "port.assets_stats(\n", + " method_mu=\"hist\", \n", + " method_cov=\"hist\", \n", + " d=0.94\n", + ")\n", + "w = port.optimization(\n", + " model=\"Classic\", \n", + " rm=\"MV\", \n", + " obj=\"Sharpe\", \n", + " rf=0, \n", + " l=0, \n", + " hist=True\n", + ")\n", + "print(w.T)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0343022a-98e1-44e1-82c6-61c7d2175cfc", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.riskfolio_optimize(\n", + " returns,\n", + " sht=True,\n", + " uppersht=0.3,\n", + " upperlng=1.3,\n", + " budget=1.0,\n", + " method_mu=\"hist\", \n", + " method_cov=\"hist\", \n", + " d=0.94,\n", + " rm=\"MV\", \n", + " obj=\"Sharpe\", \n", + " rf=0, \n", + " l=0, \n", + " hist=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "edd1993f-27de-4b36-9c2d-fb801f145e3e", + "metadata": {}, + "outputs": [], + "source": [ + "port = rp.Portfolio(returns=returns)\n", + "port.assets_stats(\n", + " method_mu=\"hist\", \n", + " method_cov=\"hist\",\n", + " d=0.94\n", + ")\n", + "asset_classes = {\"Assets\": returns.columns.tolist()}\n", + "asset_classes = pd.DataFrame(asset_classes)\n", + "constraints = {\n", + " \"Disabled\": [False, False],\n", + " \"Type\": [\"All Assets\", \"Assets\"],\n", + " \"Set\": [\"\", \"\"],\n", + " \"Position\": [\"\", \"BTCUSDT\"],\n", + " \"Sign\": [\">=\", \"<=\"],\n", + " 'Weight': [0.1, 0.15],\n", + " \"Type Relative\": [\"\", \"\"],\n", + " \"Relative Set\": [\"\", \"\"],\n", + " \"Relative\": [\"\", \"\"],\n", + " \"Factor\": [\"\", \"\"],\n", + "}\n", + "constraints = pd.DataFrame(constraints)\n", + "A, B = rp.assets_constraints(constraints, asset_classes)\n", + "port.ainequality = A\n", + "port.binequality = B\n", + "w = port.optimization(\n", + " model=\"Classic\",\n", + " rm=\"MV\",\n", + " obj=\"Sharpe\",\n", + " rf=0,\n", + " l=0,\n", + " hist=True\n", + ")\n", + "print(w.T)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d782230-ca6c-4816-b398-0a42510bf4f5", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.riskfolio_optimize(\n", + " returns,\n", + " method_mu=\"hist\", \n", + " method_cov=\"hist\", \n", + " constraints=[{\n", + " \"Type\": \"All Assets\",\n", + " \"Sign\": \">=\",\n", + " \"Weight\": 0.1\n", + " }, {\n", + " \"Type\": \"Assets\",\n", + " \"Position\": \"BTCUSDT\",\n", + " \"Sign\": \"<=\",\n", + " \"Weight\": 0.15\n", + " }],\n", + " d=0.94,\n", + " rm=\"MV\", \n", + " obj=\"Sharpe\", \n", + " rf=0, \n", + " l=0, \n", + " hist=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37b200ad-f197-4b83-ae03-2bb7ff1fc21b", + "metadata": {}, + "outputs": [], + "source": [ + "tags = [\n", + " \"Smart contracts\",\n", + " \"Smart contracts\",\n", + " \"Payments\",\n", + " \"Smart contracts\",\n", + " \"Payments\"\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7db1f74f-4fc2-455e-bd4b-e42f8e0f733b", + "metadata": {}, + "outputs": [], + "source": [ + "port = rp.Portfolio(returns=returns)\n", + "port.assets_stats(\n", + " method_mu=\"hist\", \n", + " method_cov=\"hist\",\n", + " d=0.94\n", + ")\n", + "asset_classes = {\n", + " \"Assets\": returns.columns.tolist(),\n", + " \"Tags\": tags\n", + "}\n", + "asset_classes = pd.DataFrame(asset_classes)\n", + "constraints = {\n", + " \"Disabled\": [False],\n", + " \"Type\": [\"Classes\"],\n", + " \"Set\": [\"Tags\"],\n", + " \"Position\": [\"Smart contracts\"],\n", + " \"Sign\": [\">=\"],\n", + " 'Weight': [0.8],\n", + " \"Type Relative\": [\"\"],\n", + " \"Relative Set\": [\"\"],\n", + " \"Relative\": [\"\"],\n", + " \"Factor\": [\"\"],\n", + "}\n", + "constraints = pd.DataFrame(constraints)\n", + "A, B = rp.assets_constraints(constraints, asset_classes)\n", + "port.ainequality = A\n", + "port.binequality = B\n", + "w = port.optimization(\n", + " model=\"Classic\",\n", + " rm=\"MV\",\n", + " obj=\"Sharpe\",\n", + " rf=0,\n", + " l=0,\n", + " hist=True\n", + ")\n", + "print(w.T)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7403aeb2-fae6-487d-ae11-f50071dd0e1a", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.riskfolio_optimize(\n", + " returns,\n", + " method_mu=\"hist\", \n", + " method_cov=\"hist\", \n", + " asset_classes={\"Tags\": tags},\n", + " constraints=[{\n", + " \"Type\": \"Classes\",\n", + " \"Set\": \"Tags\",\n", + " \"Position\": \"Smart contracts\",\n", + " \"Sign\": \">=\",\n", + " \"Weight\": 0.8\n", + " }],\n", + " d=0.94,\n", + " rm=\"MV\", \n", + " obj=\"Sharpe\", \n", + " rf=0, \n", + " l=0, \n", + " hist=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ee363b9-7241-4569-8c90-f1f0feb98644", + "metadata": {}, + "outputs": [], + "source": [ + "port = rp.HCPortfolio(returns=returns)\n", + "w = port.optimization(\n", + " model=\"NCO\",\n", + " codependence=\"pearson\",\n", + " covariance=\"hist\",\n", + " obj=\"MinRisk\",\n", + " rm=\"MV\",\n", + " rf=0,\n", + " l=2,\n", + " linkage=\"ward\",\n", + " max_k=10,\n", + " leaf_order=True\n", + ")\n", + "print(w.T)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02c18d54-6721-49d8-a7de-fb3bd44a2dd9", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.riskfolio_optimize(\n", + " returns,\n", + " port_cls=\"HCPortfolio\",\n", + " model=\"NCO\",\n", + " codependence=\"pearson\",\n", + " covariance=\"hist\",\n", + " obj=\"MinRisk\",\n", + " rm=\"MV\",\n", + " rf=0,\n", + " l=2,\n", + " linkage=\"ward\",\n", + " max_k=10,\n", + " leaf_order=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "c329e926-3cea-4610-b214-0d01c2001a7f", + "metadata": {}, + "source": [ + "#### Periodically" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25c31c61-5735-4312-b3a0-f0d1513bcd4d", + "metadata": {}, + "outputs": [], + "source": [ + "pfo = vbt.PortfolioOptimizer.from_riskfolio(\n", + " returns=returns,\n", + " every=\"W\"\n", + ")\n", + "\n", + "pfo.plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a538f664-dabe-4820-ae5b-0984df69f81c", + "metadata": {}, + "outputs": [], + "source": [ + "pfo = vbt.PortfolioOptimizer.from_riskfolio(\n", + " returns=returns,\n", + " constraints=[{\n", + " \"Type\": \"Assets\",\n", + " \"Position\": \"BTCUSDT\",\n", + " \"Sign\": \"<=\",\n", + " \"Weight\": vbt.Param([0.1, 0.2, 0.3], name=\"BTCUSDT_maxw\")\n", + " }],\n", + " every=\"W\",\n", + " param_search_kwargs=dict(incl_types=list)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0835ab3e-b051-4a26-aa2b-a69ca9000d18", + "metadata": {}, + "outputs": [], + "source": [ + "print(pfo.allocations.groupby(\"BTCUSDT_maxw\").max())" + ] + }, + { + "cell_type": "markdown", + "id": "bf1c570e-764f-4c9a-be82-fdf1d0e2ae51", + "metadata": {}, + "source": [ + "### Universal portfolios" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8dcf11ca-dc92-4637-aab4-cffc2aa7b8cb", + "metadata": {}, + "outputs": [], + "source": [ + "from universal import tools, algos\n", + "\n", + "with vbt.WarningsFiltered():\n", + " algo = algos.CRP()\n", + " algo_result = algo.run(data.get(\"Close\"))\n", + " \n", + "print(algo_result.weights)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d73a4383-09cd-43a1-9306-9585cf05d3ce", + "metadata": {}, + "outputs": [], + "source": [ + "with vbt.WarningsFiltered():\n", + " algo = algos.DynamicCRP(\n", + " n=30, \n", + " min_history=7, \n", + " metric='sharpe', \n", + " alpha=0.01\n", + " )\n", + " algo_result = algo.run(data.get(\"Close\").resample(\"D\").last())\n", + " down_weights = algo_result.weights\n", + " \n", + "print(down_weights)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "933da56f-6797-4d05-91cb-f5071a262058", + "metadata": {}, + "outputs": [], + "source": [ + "weights = down_weights.vbt.realign(\n", + " data.wrapper.index,\n", + " freq=\"1h\",\n", + " source_rbound=True,\n", + " target_rbound=True,\n", + " ffill=False\n", + ")\n", + "print(weights)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d40174db-9aa1-4397-b76e-f3dfd1f5db5a", + "metadata": {}, + "outputs": [], + "source": [ + "with vbt.WarningsFiltered():\n", + " down_pfo = vbt.PortfolioOptimizer.from_universal_algo(\n", + " \"DynamicCRP\",\n", + " data.get(\"Close\").resample(\"D\").last(),\n", + " n=vbt.Param([7, 14, 30, 90]), \n", + " min_history=7, \n", + " metric='sharpe', \n", + " alpha=0.01\n", + " )\n", + "\n", + "down_pfo.plot(column=90).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "248bb47a-28d1-47e5-8c25-a4807c7d6c8b", + "metadata": {}, + "outputs": [], + "source": [ + "resampler = vbt.Resampler(\n", + " down_pfo.wrapper.index, \n", + " data.wrapper.index, \n", + " target_freq=\"1h\"\n", + ")\n", + "pfo = down_pfo.resample(resampler)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "251b6943-4f95-4de9-914c-f3b5c3d326d6", + "metadata": {}, + "outputs": [], + "source": [ + "pf = pfo.simulate(data, freq=\"1h\")\n", + "\n", + "pf.sharpe_ratio" + ] + }, + { + "cell_type": "markdown", + "id": "447827ee-687d-4b7b-ae6b-c4b7e2900b22", + "metadata": {}, + "source": [ + "#### Custom algorithm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e4aa1ef-0306-4d62-8ec8-24f02a72509e", + "metadata": {}, + "outputs": [], + "source": [ + "from universal.algo import Algo\n", + "\n", + "class MeanReversion(Algo):\n", + " PRICE_TYPE = 'log'\n", + " \n", + " def __init__(self, n):\n", + " self.n = n\n", + " super().__init__(min_history=n)\n", + " \n", + " def init_weights(self, cols):\n", + " return pd.Series(np.zeros(len(cols)), cols)\n", + " \n", + " def step(self, x, last_b, history):\n", + " ma = history.iloc[-self.n:].mean()\n", + " delta = x - ma\n", + " w = np.maximum(-delta, 0.)\n", + " return w / sum(w)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32919021-27fb-4121-80c5-640bab1811f4", + "metadata": {}, + "outputs": [], + "source": [ + "with vbt.WarningsFiltered():\n", + " pfo = vbt.PortfolioOptimizer.from_universal_algo(\n", + " MeanReversion,\n", + " data.get(\"Close\").resample(\"D\").last(),\n", + " n=30,\n", + " every=\"W\"\n", + " )\n", + "\n", + "pfo.plot().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "f5a0e81b-4b60-49d2-8a97-f61f3f937a76", + "metadata": {}, + "source": [ + "## Dynamic" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6939f3bf-7430-4e13-8194-dc687fc17f95", + "metadata": {}, + "outputs": [], + "source": [ + "GroupMemory = namedtuple(\"GroupMemory\", [\n", + " \"target_alloc\", \n", + " \"size_type\",\n", + " \"direction\",\n", + " \"order_value_out\"\n", + "])\n", + "\n", + "@njit\n", + "def pre_group_func_nb(c):\n", + " group_memory = GroupMemory(\n", + " target_alloc=np.full(c.group_len, np.nan),\n", + " size_type=np.full(c.group_len, vbt.pf_enums.SizeType.TargetPercent),\n", + " direction=np.full(c.group_len, vbt.pf_enums.Direction.Both),\n", + " order_value_out=np.full(c.group_len, np.nan)\n", + " )\n", + " return group_memory,\n", + "\n", + "@njit\n", + "def pre_segment_func_nb(\n", + " c, \n", + " group_memory,\n", + " min_history,\n", + " threshold,\n", + " allocate_func_nb,\n", + " *args\n", + "):\n", + " should_rebalance = False\n", + " \n", + " if c.i >= min_history:\n", + " in_position = False\n", + " for col in range(c.from_col, c.to_col):\n", + " if c.last_position[col] != 0:\n", + " in_position = True\n", + " break\n", + " \n", + " if not in_position:\n", + " should_rebalance = True\n", + " else:\n", + " curr_value = c.last_value[c.group]\n", + " for group_col in range(c.group_len):\n", + " col = c.from_col + group_col\n", + " curr_position = c.last_position[col]\n", + " curr_price = c.last_val_price[col]\n", + " curr_alloc = curr_position * curr_price / curr_value\n", + " curr_threshold = vbt.pf_nb.select_from_col_nb(c, col, threshold)\n", + " alloc_diff = curr_alloc - group_memory.target_alloc[group_col]\n", + " \n", + " if abs(alloc_diff) >= curr_threshold:\n", + " should_rebalance = True\n", + " break\n", + " \n", + " if should_rebalance:\n", + " allocate_func_nb(c, group_memory, *args)\n", + " vbt.pf_nb.sort_call_seq_1d_nb(\n", + " c, \n", + " group_memory.target_alloc, \n", + " group_memory.size_type, \n", + " group_memory.direction, \n", + " group_memory.order_value_out\n", + " )\n", + " \n", + " return group_memory, should_rebalance\n", + "\n", + "@njit\n", + "def order_func_nb(\n", + " c, \n", + " group_memory,\n", + " should_rebalance, \n", + " price,\n", + " fees\n", + "):\n", + " if not should_rebalance:\n", + " return vbt.pf_nb.order_nothing_nb()\n", + " \n", + " group_col = c.col - c.from_col\n", + " return vbt.pf_nb.order_nb(\n", + " size=group_memory.target_alloc[group_col], \n", + " price=vbt.pf_nb.select_nb(c, price),\n", + " size_type=group_memory.size_type[group_col],\n", + " direction=group_memory.direction[group_col],\n", + " fees=vbt.pf_nb.select_nb(c, fees)\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e55a608f-9ea4-46b3-bde3-188f20728530", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def uniform_allocate_func_nb(c, group_memory):\n", + " for group_col in range(c.group_len):\n", + " group_memory.target_alloc[group_col] = 1 / c.group_len" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c90956d-24e0-4d42-93ff-19a3c3688288", + "metadata": {}, + "outputs": [], + "source": [ + "def simulate_threshold_rebalancing(threshold, allocate_func_nb, *args, **kwargs):\n", + " return vbt.Portfolio.from_order_func(\n", + " data.get(\"Close\"),\n", + " open=data.get(\"Open\"),\n", + " pre_group_func_nb=pre_group_func_nb, \n", + " pre_group_args=(),\n", + " pre_segment_func_nb=pre_segment_func_nb, \n", + " pre_segment_args=(\n", + " 0,\n", + " vbt.Rep(\"threshold\"),\n", + " allocate_func_nb,\n", + " *args\n", + " ),\n", + " order_func_nb=order_func_nb, \n", + " order_args=(vbt.Rep('price'), vbt.Rep('fees')),\n", + " broadcast_named_args=dict(\n", + " price=data.get(\"Close\"),\n", + " fees=0.005,\n", + " threshold=threshold\n", + " ),\n", + " cash_sharing=True,\n", + " group_by=vbt.ExceptLevel(\"symbol\"),\n", + " freq='1h', \n", + " **kwargs\n", + " )\n", + "\n", + "pf = simulate_threshold_rebalancing(0.05, uniform_allocate_func_nb)\n", + "pf.plot_allocations().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ccbf4e6-b3b6-4b9d-b036-03483440d176", + "metadata": {}, + "outputs": [], + "source": [ + "pf = simulate_threshold_rebalancing(\n", + " vbt.Param(np.arange(1, 16) / 100, name=\"threshold\"), \n", + " uniform_allocate_func_nb\n", + ")\n", + "\n", + "pf.sharpe_ratio" + ] + }, + { + "cell_type": "markdown", + "id": "d0559a9a-d45a-4760-9d8d-4ee2cb8a3449", + "metadata": {}, + "source": [ + "### Post-analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "588f1e8b-5c2c-41b2-b50a-1d6b5d912963", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def track_uniform_allocate_func_nb(c, group_memory, index_points, alloc_counter):\n", + " for group_col in range(c.group_len):\n", + " group_memory.target_alloc[group_col] = 1 / c.group_len\n", + " index_points[alloc_counter[0]] = c.i\n", + " alloc_counter[0] += 1\n", + "\n", + "index_points = np.empty(data.wrapper.shape[0], dtype=np.int_)\n", + "alloc_counter = np.full(1, 0)\n", + "pf = simulate_threshold_rebalancing(\n", + " 0.05,\n", + " track_uniform_allocate_func_nb, \n", + " index_points, \n", + " alloc_counter\n", + ")\n", + "index_points = index_points[:alloc_counter[0]]\n", + "\n", + "data.wrapper.index[index_points]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2794df2a-d6b5-4b69-bfe4-765a06fe6200", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def random_allocate_func_nb(\n", + " c, \n", + " group_memory, \n", + " alloc_points, \n", + " alloc_weights, \n", + " alloc_counter\n", + "):\n", + " weights = np.random.uniform(0, 1, c.group_len)\n", + " group_memory.target_alloc[:] = weights / weights.sum()\n", + " \n", + " group_count = alloc_counter[c.group]\n", + " count = alloc_counter.sum()\n", + " alloc_points[\"id\"][count] = group_count\n", + " alloc_points[\"col\"][count] = c.group\n", + " alloc_points[\"alloc_idx\"][count] = c.i\n", + " alloc_weights[count] = group_memory.target_alloc\n", + " alloc_counter[c.group] += 1\n", + "\n", + "thresholds = pd.Index(np.arange(1, 16) / 100, name=\"threshold\")\n", + "max_entries = data.wrapper.shape[0] * len(thresholds)\n", + "alloc_points = np.empty(max_entries, dtype=vbt.pf_enums.alloc_point_dt)\n", + "alloc_weights = np.empty((max_entries, len(data.symbols)), dtype=np.float_)\n", + "alloc_counter = np.full(len(thresholds), 0)\n", + "\n", + "pf = simulate_threshold_rebalancing(\n", + " vbt.Param(thresholds),\n", + " random_allocate_func_nb, \n", + " alloc_points, \n", + " alloc_weights,\n", + " alloc_counter,\n", + " seed=42\n", + ")\n", + "alloc_points = alloc_points[:alloc_counter.sum()]\n", + "alloc_weights = alloc_weights[:alloc_counter.sum()]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d940d355-bce3-4443-857f-4ab62ed7aeb9", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def random_allocate_func_nb(c, group_memory):\n", + " weights = np.random.uniform(0, 1, c.group_len)\n", + " group_memory.target_alloc[:] = weights / weights.sum()\n", + " \n", + " group_count = c.in_outputs.alloc_counter[c.group]\n", + " count = c.in_outputs.alloc_counter.sum()\n", + " c.in_outputs.alloc_points[\"id\"][count] = group_count\n", + " c.in_outputs.alloc_points[\"col\"][count] = c.group\n", + " c.in_outputs.alloc_points[\"alloc_idx\"][count] = c.i\n", + " c.in_outputs.alloc_weights[count] = group_memory.target_alloc\n", + " c.in_outputs.alloc_counter[c.group] += 1\n", + "\n", + "alloc_points = vbt.RepEval(\"\"\"\n", + " max_entries = target_shape[0] * len(group_lens)\n", + " np.empty(max_entries, dtype=alloc_point_dt)\n", + "\"\"\", context=dict(alloc_point_dt=vbt.pf_enums.alloc_point_dt))\n", + "alloc_weights = vbt.RepEval(\"\"\"\n", + " max_entries = target_shape[0] * len(group_lens)\n", + " np.empty((max_entries, n_cols), dtype=np.float_)\n", + "\"\"\", context=dict(n_cols=len(data.symbols)))\n", + "alloc_counter = vbt.RepEval(\"np.full(len(group_lens), 0)\")\n", + "\n", + "InOutputs = namedtuple(\"InOutputs\", [\n", + " \"alloc_points\",\n", + " \"alloc_weights\",\n", + " \"alloc_counter\"\n", + "])\n", + "in_outputs = InOutputs(\n", + " alloc_points=alloc_points, \n", + " alloc_weights=alloc_weights,\n", + " alloc_counter=alloc_counter,\n", + ")\n", + "\n", + "pf = simulate_threshold_rebalancing(\n", + " vbt.Param(np.arange(1, 16) / 100, name=\"threshold\"),\n", + " random_allocate_func_nb, \n", + " in_outputs=in_outputs,\n", + " seed=42\n", + ")\n", + "alloc_points = pf.in_outputs.alloc_points[:pf.in_outputs.alloc_counter.sum()]\n", + "alloc_weights = pf.in_outputs.alloc_weights[:pf.in_outputs.alloc_counter.sum()]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1420356-030b-4815-a522-14ed1fe71642", + "metadata": {}, + "outputs": [], + "source": [ + "pfo = vbt.PortfolioOptimizer(\n", + " wrapper=pf.wrapper,\n", + " alloc_records=vbt.AllocPoints(\n", + " pf.wrapper.resolve(), \n", + " alloc_points\n", + " ),\n", + " allocations=alloc_weights\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6fdf238e-e38b-4d6c-b36c-33ed342adccb", + "metadata": {}, + "outputs": [], + "source": [ + "print(pfo[0.1].allocations.describe())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "515f6baa-7b46-4f19-b84a-c9660ae95362", + "metadata": {}, + "outputs": [], + "source": [ + "pfo.plot(column=0.1).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7466d4f5-c1c9-416e-a47d-802b3f6813ca", + "metadata": {}, + "outputs": [], + "source": [ + "pfo.plot(column=0.03).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8671183-8dc7-4a33-8d0a-559ac876f0bd", + "metadata": {}, + "outputs": [], + "source": [ + "pf[0.03].plot_allocations().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "291d453b-1a17-436b-afbe-d3a8dfd8295c", + "metadata": {}, + "outputs": [], + "source": [ + "pf.sharpe_ratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b09c8a6-1f02-4380-b972-f4fe4660a261", + "metadata": {}, + "outputs": [], + "source": [ + "pf_new = vbt.Portfolio.from_optimizer(\n", + " data,\n", + " pfo, \n", + " val_price=data.get(\"Open\"), \n", + " freq=\"1h\", \n", + " fees=0.005\n", + ")\n", + "\n", + "pf_new.sharpe_ratio" + ] + }, + { + "cell_type": "markdown", + "id": "f4aabcbe-8baa-4507-a38a-a4b937d6a5c7", + "metadata": {}, + "source": [ + "### Bonus 1: Own optimizer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de4de790-9fd4-4030-8dfb-dae986de9012", + "metadata": {}, + "outputs": [], + "source": [ + "@njit(nogil=True)\n", + "def optimize_portfolio_nb(\n", + " close, \n", + " val_price,\n", + " range_starts,\n", + " range_ends,\n", + " optimize_func_nb,\n", + " optimize_args=(),\n", + " price=np.inf,\n", + " fees=0.,\n", + " init_cash=100.,\n", + " group=0\n", + "):\n", + " val_price_ = vbt.to_2d_array_nb(np.asarray(val_price))\n", + " price_ = vbt.to_2d_array_nb(np.asarray(price))\n", + " fees_ = vbt.to_2d_array_nb(np.asarray(fees))\n", + "\n", + " order_records = np.empty(close.shape, dtype=vbt.pf_enums.order_dt)\n", + " order_counts = np.full(close.shape[1], 0, dtype=np.int_)\n", + " \n", + " order_value = np.empty(close.shape[1], dtype=np.float_)\n", + " call_seq = np.empty(close.shape[1], dtype=np.int_)\n", + " \n", + " last_position = np.full(close.shape[1], 0.0, dtype=np.float_)\n", + " last_debt = np.full(close.shape[1], 0.0, dtype=np.float_)\n", + " last_locked_cash = np.full(close.shape[1], 0.0, dtype=np.float_)\n", + " cash_now = float(init_cash)\n", + " free_cash_now = float(init_cash)\n", + " value_now = float(init_cash)\n", + "\n", + " for k in range(len(range_starts)):\n", + " i = range_ends[k]\n", + " size = optimize_func_nb(\n", + " range_starts[k], \n", + " range_ends[k], \n", + " *optimize_args\n", + " )\n", + " \n", + " value_now = cash_now\n", + " for col in range(close.shape[1]):\n", + " val_price_now = vbt.flex_select_nb(val_price_, i, col)\n", + " value_now += last_position[col] * val_price_now\n", + " \n", + " for col in range(close.shape[1]):\n", + " val_price_now = vbt.flex_select_nb(val_price_, i, col)\n", + " exec_state = vbt.pf_enums.ExecState(\n", + " cash=cash_now,\n", + " position=last_position[col],\n", + " debt=last_debt[col],\n", + " locked_cash=last_locked_cash[col],\n", + " free_cash=free_cash_now,\n", + " val_price=val_price_now,\n", + " value=value_now,\n", + " )\n", + " order_value[col] = vbt.pf_nb.approx_order_value_nb(\n", + " exec_state,\n", + " size[col],\n", + " vbt.pf_enums.SizeType.TargetPercent,\n", + " vbt.pf_enums.Direction.Both,\n", + " )\n", + " call_seq[col] = col\n", + "\n", + " vbt.pf_nb.insert_argsort_nb(order_value, call_seq)\n", + "\n", + " for c in range(close.shape[1]):\n", + " col = call_seq[c]\n", + " \n", + " order = vbt.pf_nb.order_nb(\n", + " size=size[col],\n", + " price=vbt.flex_select_nb(price_, i, col),\n", + " size_type=vbt.pf_enums.SizeType.TargetPercent,\n", + " direction=vbt.pf_enums.Direction.Both,\n", + " fees=vbt.flex_select_nb(fees_, i, col),\n", + " )\n", + "\n", + " price_area = vbt.pf_enums.PriceArea(\n", + " open=np.nan,\n", + " high=np.nan,\n", + " low=np.nan,\n", + " close=vbt.flex_select_nb(close, i, col),\n", + " )\n", + " val_price_now = vbt.flex_select_nb(val_price_, i, col)\n", + " exec_state = vbt.pf_enums.ExecState(\n", + " cash=cash_now,\n", + " position=last_position[col],\n", + " debt=last_debt[col],\n", + " locked_cash=last_locked_cash[col],\n", + " free_cash=free_cash_now,\n", + " val_price=val_price_now,\n", + " value=value_now,\n", + " )\n", + " _, new_exec_state = vbt.pf_nb.process_order_nb(\n", + " group=group,\n", + " col=col,\n", + " i=i,\n", + " exec_state=exec_state,\n", + " order=order,\n", + " price_area=price_area,\n", + " order_records=order_records,\n", + " order_counts=order_counts\n", + " )\n", + "\n", + " cash_now = new_exec_state.cash\n", + " free_cash_now = new_exec_state.free_cash\n", + " value_now = new_exec_state.value\n", + " last_position[col] = new_exec_state.position\n", + " last_debt[col] = new_exec_state.debt\n", + " last_locked_cash[col] = new_exec_state.locked_cash\n", + "\n", + " return vbt.nb.repartition_nb(order_records, order_counts)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e345590b-3a5a-43ec-b325-3db9766d3ea1", + "metadata": {}, + "outputs": [], + "source": [ + "@njit(nogil=True)\n", + "def sharpe_optimize_func_nb(\n", + " start_idx, \n", + " end_idx, \n", + " close, \n", + " num_tests, \n", + " ann_factor\n", + "):\n", + " close_period = close[start_idx:end_idx]\n", + " returns = (close_period[1:] - close_period[:-1]) / close_period[:-1]\n", + " mean = vbt.nb.nanmean_nb(returns)\n", + " cov = np.cov(returns, rowvar=False)\n", + " best_sharpe_ratio = -np.inf\n", + " weights = np.full(close.shape[1], np.nan, dtype=np.float_)\n", + " \n", + " for i in range(num_tests):\n", + " w = np.random.random_sample(close.shape[1])\n", + " w = w / np.sum(w)\n", + " p_return = np.sum(mean * w) * ann_factor\n", + " p_std = np.sqrt(np.dot(w.T, np.dot(cov, w))) * np.sqrt(ann_factor)\n", + " sharpe_ratio = p_return / p_std\n", + " if sharpe_ratio > best_sharpe_ratio:\n", + " best_sharpe_ratio = sharpe_ratio\n", + " weights = w\n", + " \n", + " return weights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0b16d9d-688a-49b5-8738-2191b1ad966a", + "metadata": {}, + "outputs": [], + "source": [ + "range_starts, range_ends = data.wrapper.get_index_ranges(every=\"W\")\n", + "ann_factor = vbt.timedelta(\"365d\") / vbt.timedelta(\"1h\")\n", + "init_cash = 100\n", + "num_tests = 30\n", + "fees = 0.005\n", + "\n", + "order_records = optimize_portfolio_nb(\n", + " data.get(\"Close\").values,\n", + " data.get(\"Open\").values,\n", + " range_starts,\n", + " range_ends,\n", + " sharpe_optimize_func_nb,\n", + " optimize_args=(data.get(\"Close\").values, num_tests, ann_factor),\n", + " fees=fees,\n", + " init_cash=init_cash\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7c2e668-d30b-4bbf-ab45-e7287eba39d8", + "metadata": {}, + "outputs": [], + "source": [ + "pf = vbt.Portfolio(\n", + " wrapper=symbol_wrapper.regroup(True), \n", + " close=data.get(\"Close\"), \n", + " order_records=order_records, \n", + " log_records=np.array([]), \n", + " cash_sharing=True, \n", + " init_cash=init_cash\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa378c28-5ec9-4999-88cf-edd11df53afa", + "metadata": {}, + "outputs": [], + "source": [ + "pf.plot_allocations().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "89431545-729d-4b7f-b465-afed7168ec53", + "metadata": {}, + "source": [ + "### Bonus 2: Parameterization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ee3cc01-379c-4ee0-a188-78cb1fbdd526", + "metadata": {}, + "outputs": [], + "source": [ + "def merge_func(order_records_list, param_index):\n", + " sharpe_ratios = pd.Series(index=param_index, dtype=np.float_)\n", + " for i, order_records in enumerate(order_records_list):\n", + " pf = vbt.Portfolio(\n", + " wrapper=symbol_wrapper.regroup(True), \n", + " close=data.get(\"Close\"), \n", + " order_records=order_records, \n", + " cash_sharing=True, \n", + " init_cash=init_cash\n", + " )\n", + " sharpe_ratios.iloc[i] = pf.sharpe_ratio\n", + " return sharpe_ratios" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ffd14c31-25fd-415c-9b2f-295e31732a1d", + "metadata": {}, + "outputs": [], + "source": [ + "param_optimize_portfolio_nb = vbt.parameterized(\n", + " optimize_portfolio_nb, \n", + " merge_func=merge_func,\n", + " merge_kwargs=dict(param_index=vbt.Rep(\"param_index\")),\n", + " engine=\"dask\",\n", + " chunk_len=4\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1ec6941-a6df-462c-bd2d-043f1dee498b", + "metadata": {}, + "outputs": [], + "source": [ + "every_index = pd.Index([\"D\", \"W\", \"M\"], name=\"every\")\n", + "num_tests_index = pd.Index([30, 50, 100], name=\"num_tests\")\n", + "fees_index = pd.Index([0.0, 0.005, 0.01], name=\"fees\")\n", + "\n", + "range_starts = []\n", + "range_ends = []\n", + "for every in every_index:\n", + " index_ranges = symbol_wrapper.get_index_ranges(every=every)\n", + " range_starts.append(index_ranges[0])\n", + " range_ends.append(index_ranges[1])\n", + "num_tests = num_tests_index.tolist()\n", + "\n", + "range_starts = vbt.Param(range_starts, level=0, keys=every_index)\n", + "range_ends = vbt.Param(range_ends, level=0, keys=every_index)\n", + "num_tests = vbt.Param(num_tests, level=1, keys=num_tests_index)\n", + "fees = vbt.Param(fees_index.values, level=2, keys=fees_index)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5534b797-2c85-4c33-8143-d41b9542c1df", + "metadata": {}, + "outputs": [], + "source": [ + "sharpe_ratios = param_optimize_portfolio_nb(\n", + " data.get(\"Close\").values,\n", + " data.get(\"Open\").values,\n", + " range_starts,\n", + " range_ends,\n", + " sharpe_optimize_func_nb,\n", + " optimize_args=(\n", + " data.get(\"Close\").values, \n", + " num_tests, \n", + " ann_factor\n", + " ),\n", + " fees=fees,\n", + " init_cash=init_cash,\n", + " group=vbt.Rep(\"config_idx\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "508c1fc7-c141-4428-9844-6bef69ea64cd", + "metadata": {}, + "outputs": [], + "source": [ + "sharpe_ratios" + ] + }, + { + "cell_type": "markdown", + "id": "d87c6506-c84f-49bc-9236-3804cd22dd22", + "metadata": {}, + "source": [ + "### Bonus 3: Hyperopt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "111953f4-6b42-4023-b204-4b4480bf1ed8", + "metadata": {}, + "outputs": [], + "source": [ + "def objective(kwargs):\n", + " close_values = data.get(\"Close\").values\n", + " open_values = data.get(\"Open\").values\n", + " index_ranges = symbol_wrapper.get_index_ranges(every=kwargs[\"every\"])\n", + " order_records = optimize_portfolio_nb(\n", + " close_values,\n", + " open_values,\n", + " index_ranges[0],\n", + " index_ranges[1],\n", + " sharpe_optimize_func_nb,\n", + " optimize_args=(close_values, kwargs[\"num_tests\"], ann_factor),\n", + " fees=vbt.to_2d_array(kwargs[\"fees\"]),\n", + " init_cash=init_cash\n", + " )\n", + " pf = vbt.Portfolio(\n", + " wrapper=symbol_wrapper.regroup(True), \n", + " close=data.get(\"Close\"), \n", + " order_records=order_records, \n", + " log_records=np.array([]), \n", + " cash_sharing=True, \n", + " init_cash=init_cash\n", + " )\n", + " return -pf.sharpe_ratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7de2667c-977e-4595-808d-17afc7279796", + "metadata": {}, + "outputs": [], + "source": [ + "from hyperopt import fmin, tpe, hp\n", + "\n", + "space = {\n", + " \"every\": hp.choice(\"every\", [\"%dD\" % n for n in range(1, 100)]),\n", + " \"num_tests\": hp.quniform(\"num_tests\", 5, 100, 1),\n", + " \"fees\": hp.uniform('fees', 0, 0.05)\n", + "}\n", + "\n", + "best = fmin(\n", + " fn=objective,\n", + " space=space,\n", + " algo=tpe.suggest,\n", + " max_evals=30\n", + ")\n", + "best" + ] + }, + { + "cell_type": "markdown", + "id": "a19b8f97-e160-4238-bafa-6701c5a7d3d7", + "metadata": {}, + "source": [ + "### Bonus 4: Hybrid" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d0c221b-4c46-4875-ac11-248518e025f8", + "metadata": {}, + "outputs": [], + "source": [ + "def optimize_func(\n", + " data, \n", + " index_slice, \n", + " temp_allocations, \n", + " temp_pfs, \n", + " threshold\n", + "):\n", + " sub_data = data.iloc[index_slice]\n", + " if len(temp_allocations) > 0:\n", + " prev_allocation = sub_data.symbol_wrapper.wrap(\n", + " [temp_allocations[-1]], \n", + " index=sub_data.wrapper.index[[0]]\n", + " )\n", + " prev_pfo = vbt.PortfolioOptimizer.from_allocations(\n", + " sub_data.symbol_wrapper,\n", + " prev_allocation\n", + " )\n", + " if len(temp_pfs) > 0:\n", + " init_cash = temp_pfs[-1].cash.iloc[-1]\n", + " init_position = temp_pfs[-1].assets.iloc[-1]\n", + " init_price = temp_pfs[-1].close.iloc[-1]\n", + " else:\n", + " init_cash = 100.\n", + " init_position = 0.\n", + " init_price = np.nan\n", + " prev_pf = prev_pfo.simulate(\n", + " sub_data,\n", + " init_cash=init_cash, \n", + " init_position=init_position,\n", + " init_price=init_price\n", + " )\n", + " temp_pfs.append(prev_pf)\n", + " should_rebalance = False\n", + " curr_alloc = prev_pf.allocations.iloc[-1].values\n", + " if (np.abs(curr_alloc - temp_allocations[-1]) >= threshold).any():\n", + " should_rebalance = True\n", + " else:\n", + " should_rebalance = True\n", + " n_symbols = len(sub_data.symbols)\n", + " if should_rebalance:\n", + " new_alloc = np.full(n_symbols, 1 / n_symbols)\n", + " else:\n", + " new_alloc = np.full(n_symbols, np.nan)\n", + " temp_allocations.append(new_alloc)\n", + " return new_alloc\n", + "\n", + "pfs = []\n", + "allocations = []\n", + "pfopt = vbt.PortfolioOptimizer.from_optimize_func(\n", + " data.symbol_wrapper,\n", + " optimize_func,\n", + " data,\n", + " vbt.Rep(\"index_slice\"),\n", + " allocations,\n", + " pfs,\n", + " 0.03,\n", + " every=\"W\"\n", + ")\n", + "pf = pfopt.simulate(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1cc1c7c-e487-45a7-b36b-dbd9d6de9ee8", + "metadata": {}, + "outputs": [], + "source": [ + "final_values = pd.concat(map(lambda x: x.value[[-1]], pfs))\n", + "final_values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae5aeee2-7b53-4c77-b848-abe9c23b3c69", + "metadata": {}, + "outputs": [], + "source": [ + "pd.testing.assert_series_equal(\n", + " final_values,\n", + " pf.value.loc[final_values.index],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ed00107-34bd-4cb8-abfd-6f5432a56f2c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/to_explore/notebooks/QQ_TelegramSignals.ipynb b/to_explore/notebooks/QQ_TelegramSignals.ipynb new file mode 100644 index 0000000..851f7dc --- /dev/null +++ b/to_explore/notebooks/QQ_TelegramSignals.ipynb @@ -0,0 +1,1107 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "8b6c912e-3c56-4c04-b9ce-2ba70342aade", + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro import *\n", + "# whats_imported()\n", + "\n", + "vbt.settings.set_theme(\"dark\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "725278d1-9e9e-48ed-ac9d-9ddf1bcc59b7", + "metadata": {}, + "outputs": [], + "source": [ + "# Pull data\n", + "\n", + "def date_parser(timestamps):\n", + " # First column are integer timestamps, parse them into DatetimeIndex\n", + " return pd.to_datetime(timestamps, utc=True, unit=\"ms\")\n", + "\n", + "data = vbt.CSVData.pull(\"download/xauusd-m1-bid-2021-09-01-2023-03-14.csv\", date_parser=date_parser)\n", + "\n", + "print(data.wrapper.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e149c879-62a5-4924-83bb-262c00591ed9", + "metadata": {}, + "outputs": [], + "source": [ + "# Pull signals\n", + "signal_data = vbt.CSVData.pull(\"download/TG_Extracted_Signals.csv\", index_col=1)\n", + "\n", + "print(signal_data.wrapper.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59ba2f9b-de8f-4a9c-9313-77089b696caa", + "metadata": {}, + "outputs": [], + "source": [ + "# Numba doesn't understand strings, thus create an enumerated type for stop types\n", + "\n", + "# Create a type first\n", + "OrderTypeT = namedtuple(\"OrderTypeT\", [\"BUY\", \"SELL\", \"BUYSTOP\", \"SELLSTOP\"])\n", + "\n", + "# Then create a tuple\n", + "OrderType = OrderTypeT(*range(len(OrderTypeT._fields)))\n", + "\n", + "print(OrderType)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48e1efd4-cd32-4cb5-9ee7-3e21c52d8956", + "metadata": {}, + "outputs": [], + "source": [ + "# Prepare signals\n", + "\n", + "def transform_signal_data(df):\n", + " # Select only one symbol, the one we pulled the data for\n", + " df = df[df[\"Symbol\"] == \"XAUUSD\"]\n", + " \n", + " # Select columns of interest\n", + " df = df.iloc[:, -7:]\n", + " \n", + " # Map order types using OrderType\n", + " df[\"OrderType\"] = df[\"OrderType\"].map(lambda x: OrderType._fields.index(x.replace(\" \", \"\")))\n", + " \n", + " # Some entry prices are zero\n", + " df = df[df[\"EntryPrice\"] > 0]\n", + " \n", + " return df\n", + "\n", + "signal_data = signal_data.transform(transform_signal_data)\n", + "\n", + "print(signal_data.wrapper.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02543169-e8c1-4813-ac6c-482a79d9e32d", + "metadata": {}, + "outputs": [], + "source": [ + "# Create named tuples which will act as containers for various arrays\n", + "\n", + "# SignalInfo will contain signal information in a vbt-friendly format\n", + "# Rows in each array correspond to signals\n", + "SignalInfo = namedtuple(\"SignalInfo\", [\n", + " \"timestamp\", # 1d array with timestamps in nanosecond format (int64)\n", + " \"order_type\", # 1d array with order types in integer format (int64, see order_type_map)\n", + " \"entry_price\", # 1d array with entry price (float64)\n", + " \"sl\", # 2d array where columns are SL levels (float64)\n", + " \"tp\", # 2d array where columns are TP levels (float64)\n", + "])\n", + "\n", + "# TempInfo will contain temporary information that will be written during backtesting\n", + "# You can imagine being buffer that we write and then access at a later time\n", + "# Rows in each array correspond to signals\n", + "TempInfo = namedtuple(\"TempInfo\", [\n", + " \"ts_bar\", # 1d array with row indices where signal was hit (int64)\n", + " \"entry_price_bar\", # 1d array with row indices where entry price was hit (int64)\n", + " \"sl_bar\", # 2d array with row indices where each SL level was hit, same shape as SignalInfo.sl (int64)\n", + " \"tp_bar\", # 2d array with row indices where each TP level was hit, same shape as SignalInfo.tp (int64)\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "521c1736-57a9-4d9b-86c7-566814e30aba", + "metadata": {}, + "outputs": [], + "source": [ + "# Here's what we will do:\n", + "# Represent each signal as a separate column with its own starting capital\n", + "# Run an order function using Portfolio.from_order_func\n", + "# The order function is executed at each bar and column (signal in our case)\n", + "# If the current bar contains a signal, execute the signal logic\n", + "# Order functions can issue only one order at bar, thus we if multiple stops were hit, we will aggregate them\n", + "# We will go all in and then gradually reduce the position based on the number of stops\n", + "\n", + "@njit\n", + "def has_data_nb(c):\n", + " # Numba function to check whether OHLC is not NaN\n", + " if np.isnan(vbt.pf_nb.select_nb(c, c.open)):\n", + " return False\n", + " if np.isnan(vbt.pf_nb.select_nb(c, c.high)):\n", + " return False\n", + " if np.isnan(vbt.pf_nb.select_nb(c, c.low)):\n", + " return False\n", + " if np.isnan(vbt.pf_nb.select_nb(c, c.close)):\n", + " return False\n", + " return True\n", + "\n", + "@njit\n", + "def check_price_hit_nb(c, price, hit_below, can_use_ohlc):\n", + " # Numba function to check whether a price level was hit during this bar\n", + " # Use hit_below=True to check against low and hit_below=False to check against high\n", + " # If can_use_ohlc is False, will check only against the close price\n", + " \n", + " order_price, hit_on_open, hit = vbt.pf_nb.check_price_hit_nb(\n", + " open=vbt.pf_nb.select_nb(c, c.open), # OHLC are flexible arrays, always use select_nb!\n", + " high=vbt.pf_nb.select_nb(c, c.high),\n", + " low=vbt.pf_nb.select_nb(c, c.low),\n", + " close=vbt.pf_nb.select_nb(c, c.close),\n", + " price=price,\n", + " hit_below=hit_below,\n", + " can_use_ohlc=can_use_ohlc\n", + " )\n", + " # Order price here isn't necessarily the price that has been hit\n", + " # For example, if the price was hit before open, order price is set to the open price\n", + " return order_price, hit\n", + "\n", + "@njit(boundscheck=True)\n", + "def order_func_nb(c, signal_info, temp_info): # first argument is context, other are our containers\n", + " if not has_data_nb(c):\n", + " # If this bar contains no data, skip it\n", + " return vbt.pf_nb.order_nothing_nb()\n", + " \n", + " # Each column corresponds to a signal\n", + " signal = c.col\n", + " \n", + " # Each row corresponds to a bar\n", + " bar = c.i\n", + " \n", + " # Define various flags for pure convenience\n", + " buy_market = signal_info.order_type[signal] == OrderType.BUY\n", + " sell_market = signal_info.order_type[signal] == OrderType.SELL\n", + " buy_stop = signal_info.order_type[signal] == OrderType.BUYSTOP\n", + " sell_stop = signal_info.order_type[signal] == OrderType.SELLSTOP\n", + " buy = buy_market or buy_stop\n", + " \n", + " # First, we need to check whether the current bar contains a signal\n", + " can_use_ohlc = True\n", + " if temp_info.ts_bar[signal] == -1:\n", + " if c.index[bar] == signal_info.timestamp[signal]:\n", + " # If so, store the current row index in a temporary array\n", + " # such that later we know that we already discovered a signal\n", + " temp_info.ts_bar[signal] = bar\n", + "\n", + " # The signal has the granularity of seconds, thus it belongs somewhere in the bar\n", + " # We need to notify the functions below that they cannot use full OHLC information, only close\n", + " # This is to avoid using prices that technically happened before the signal\n", + " can_use_ohlc = False\n", + " \n", + " # Here comes the entry order\n", + " # Check whether the signal has been discovered\n", + " # -1 means hasn't been discovered yet\n", + " if temp_info.ts_bar[signal] != -1:\n", + " \n", + " # Then, check whether the entry order hasn't been executed\n", + " if temp_info.entry_price_bar[signal] == -1:\n", + " \n", + " # If so, execute the entry order\n", + " if buy_market:\n", + " # Buy market order (using closing price)\n", + " \n", + " # Store the current row index in a temporary array such that future bars know\n", + " # that the order has already been executed\n", + " temp_info.entry_price_bar[signal] = bar\n", + " order_price = signal_info.entry_price[signal]\n", + " return vbt.pf_nb.order_nb(np.inf, np.inf) # size, price\n", + " \n", + " if sell_market:\n", + " # Sell market order (using closing price)\n", + " temp_info.entry_price_bar[signal] = bar\n", + " order_price = signal_info.entry_price[signal]\n", + " return vbt.pf_nb.order_nb(-np.inf, np.inf)\n", + " \n", + " if buy_stop:\n", + " # Buy stop order\n", + " # A buy stop order is entered at a stop price above the current market price\n", + " \n", + " # Since it's a pending order, we first need to check whether the entry price has been hit\n", + " order_price, hit = check_price_hit_nb(\n", + " c,\n", + " price=signal_info.entry_price[signal],\n", + " hit_below=False,\n", + " can_use_ohlc=can_use_ohlc,\n", + " )\n", + " if hit:\n", + " # If so, execute the order\n", + " temp_info.entry_price_bar[signal] = bar\n", + " return vbt.pf_nb.order_nb(np.inf, order_price)\n", + " \n", + " if sell_stop:\n", + " # Sell stop order\n", + " # A sell stop order is entered at a stop price below the current market price\n", + " order_price, hit = check_price_hit_nb(\n", + " c,\n", + " price=signal_info.entry_price[signal],\n", + " hit_below=True,\n", + " can_use_ohlc=can_use_ohlc,\n", + " )\n", + " if hit:\n", + " temp_info.entry_price_bar[signal] = bar\n", + " return vbt.pf_nb.order_nb(-np.inf, order_price)\n", + " \n", + " # Here comes the stop order\n", + " # Check whether the entry order has been executed\n", + " if temp_info.entry_price_bar[signal] != -1:\n", + " \n", + " # We also need to check whether we're still in a position\n", + " # in case stops have already closed out the position\n", + " if c.last_position[signal] != 0:\n", + " \n", + " # If so, start with checking for potential SL orders\n", + " # (remember that SL pessimistically comes before TP)\n", + " # First, we need to know the number of potential and already executed SL levels\n", + " # since we want to gradually reduce the position proportially to the number of levels\n", + " # For example, one signal may define [12.35, 12.29] and another [17.53, nan]\n", + " n_sl_levels = 0\n", + " n_sl_hits = 0\n", + " sl_levels = signal_info.sl[signal] # select 1d array from 2d array\n", + " sl_bar = temp_info.sl_bar[signal] # same here\n", + " for k in range(len(sl_levels)):\n", + " if not np.isnan(sl_levels[k]):\n", + " n_sl_levels += 1\n", + " if sl_bar[k] != -1:\n", + " n_sl_hits += 1\n", + " \n", + " # We can execute only one order at the current bar\n", + " # Thus, if the price crossed multiple SL levels, we need to pack them into one order\n", + " # Since SL levels are guaranteed to be sorted, we will check the most distant levels first\n", + " # because if a distant stop has been hit, the closer stops are automatically hit too\n", + " for k in range(n_sl_levels - 1, n_sl_hits - 1, -1):\n", + " if not np.isnan(sl_levels[k]) and sl_bar[k] == -1:\n", + " # Check against low for buy orders and against high for sell orders\n", + " order_price, hit = check_price_hit_nb(\n", + " c,\n", + " price=sl_levels[k],\n", + " hit_below=buy,\n", + " can_use_ohlc=can_use_ohlc,\n", + " )\n", + " if hit:\n", + " sl_bar[k] = bar\n", + " # The further away the stop is, the more of the position needs to be closed\n", + " # We will specify a target percentage\n", + " # For example, for two stops it would be 0.5 (SL1) and 0.0 (SL2)\n", + " # while for three stops it would be 0.66 (SL1), 0.33 (SL2), and 0.0 (SL3)\n", + " # This works only if we went all in before (size=np.inf)!\n", + " size = 1 - (k + 1) / n_sl_levels\n", + " size_type = vbt.pf_enums.SizeType.TargetPercent\n", + " if buy:\n", + " return vbt.pf_nb.order_nb(size, order_price, size_type)\n", + " else:\n", + " # Size must be negative for short positions\n", + " return vbt.pf_nb.order_nb(-size, order_price, size_type)\n", + " \n", + " # Same for potential TP orders\n", + " n_tp_levels = 0\n", + " n_tp_hits = 0\n", + " tp_levels = signal_info.tp[signal]\n", + " tp_bar = temp_info.tp_bar[signal]\n", + " for k in range(len(tp_levels)):\n", + " if not np.isnan(tp_levels[k]):\n", + " n_tp_levels += 1\n", + " if tp_bar[k] != -1:\n", + " n_tp_hits += 1\n", + " \n", + " for k in range(n_tp_levels - 1, n_tp_hits - 1, -1):\n", + " if not np.isnan(tp_levels[k]) and tp_bar[k] == -1:\n", + " # Check against high for buy orders and against low for sell orders\n", + " order_price, hit = check_price_hit_nb(\n", + " c,\n", + " price=tp_levels[k],\n", + " hit_below=not buy,\n", + " can_use_ohlc=can_use_ohlc,\n", + " )\n", + " if hit:\n", + " tp_bar[k] = bar\n", + " size = 1 - (k + 1) / n_tp_levels\n", + " size_type = vbt.pf_enums.SizeType.TargetPercent\n", + " if buy:\n", + " return vbt.pf_nb.order_nb(size, order_price, size_type)\n", + " else:\n", + " return vbt.pf_nb.order_nb(-size, order_price, size_type)\n", + " \n", + " # If neither of orders has been executed, order nothing\n", + " return vbt.pf_nb.order_nothing_nb()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c543610e-e215-4bee-afb9-2278a98b354a", + "metadata": {}, + "outputs": [], + "source": [ + "# Prepare signal information\n", + "\n", + "timestamp = vbt.dt.to_ns(signal_data.index) # nanoseconds\n", + "order_type = signal_data.get(\"OrderType\").values\n", + "entry_price = signal_data.get(\"EntryPrice\").values\n", + "sl = signal_data.get(\"SL\").values\n", + "tp1 = signal_data.get(\"TP1\").values\n", + "tp2 = signal_data.get(\"TP2\").values\n", + "tp3 = signal_data.get(\"TP3\").values\n", + "tp4 = signal_data.get(\"TP4\").values\n", + "\n", + "n_signals = len(timestamp)\n", + "print(n_signals)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb1c5978-a2c1-4f8e-956c-419da2d01608", + "metadata": {}, + "outputs": [], + "source": [ + "# Since the signals are of the second granularity while the data is of the minute granularity,\n", + "# we need to round the timestamp of the signal to the nearest minute\n", + "# Timestamps represent the opening time, thus the second \"19:28:59\" belongs to the minute \"19:28:00\"\n", + "\n", + "timestamp = timestamp - timestamp % vbt.dt_nb.m_ns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bd91be38-ed79-4d15-bc28-49c70478a6ec", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a named tuple for signal information\n", + "\n", + "signal_info = SignalInfo(\n", + " timestamp=timestamp,\n", + " order_type=order_type,\n", + " entry_price=entry_price,\n", + " sl=np.column_stack((sl,)),\n", + " tp=np.column_stack((tp1, tp2, tp3, tp4))\n", + ")\n", + "\n", + "n_sl_levels = signal_info.sl.shape[1]\n", + "print(n_sl_levels)\n", + "\n", + "n_tp_levels = signal_info.tp.shape[1]\n", + "print(n_tp_levels)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33b06286-9cfe-4cce-a0ab-2e583210c048", + "metadata": {}, + "outputs": [], + "source": [ + "# Important: re-run this cell every time you're running the simulation!\n", + "# Create a named tuple for temporary information\n", + "# All arrays below hold row indices, thus the default value is -1\n", + "\n", + "def build_temp_info(signal_info):\n", + " return TempInfo(\n", + " ts_bar=np.full(len(signal_info.timestamp), -1),\n", + " entry_price_bar=np.full(len(signal_info.timestamp), -1),\n", + " sl_bar=np.full(signal_info.sl.shape, -1),\n", + " tp_bar=np.full(signal_info.tp.shape, -1)\n", + " )\n", + "\n", + "temp_info = build_temp_info(signal_info)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4e9e4fa-218f-4890-b479-b141ebc64e44", + "metadata": {}, + "outputs": [], + "source": [ + "# By default, vectorbt initializes an empty order array of the same shape as data\n", + "# But since our data is highly granular, it would take a lot of RAM\n", + "# Let's limit the number of records to one entry order and the maximum number of SL and TP orders\n", + "# It will be applied per column\n", + "\n", + "max_order_records = 1 + n_sl_levels + n_tp_levels\n", + "\n", + "print(max_order_records)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99c8374f-0c00-490f-917d-f551f5037531", + "metadata": {}, + "outputs": [], + "source": [ + "# Perform the actual simulation\n", + "# Since we don't broadcast data against any other array, vectorbt doesn't know anything about\n", + "# our signal arrays and will simulate only the one column in our data\n", + "# Thus, we need to tell it to expand the number of columns by the number of signals using tiling\n", + "# But don't worry: thanks to flexible indexing vectorbt won't actually tile the data - good for RAM!\n", + "# (it would tile the data if it had multiple columns though!)\n", + "\n", + "pf = vbt.Portfolio.from_order_func(\n", + " data,\n", + " order_func_nb=order_func_nb,\n", + " order_args=(signal_info, temp_info),\n", + " broadcast_kwargs=dict(tile=n_signals), # tiling here\n", + " max_order_records=max_order_records,\n", + " freq=\"minute\" # we have an irregular one-minute frequency\n", + ")\n", + "# (may take a minute...)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a17d594-4b6a-48b1-aa18-761b0d6dbca6", + "metadata": {}, + "outputs": [], + "source": [ + "# Let's print out the order records in a human-readable format\n", + "\n", + "print(pf.orders.records_readable)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb299f83-87fe-4367-9f45-3bd089f95491", + "metadata": {}, + "outputs": [], + "source": [ + "# We can notice above that there's no information whether an order is an SL or TP order\n", + "# What we can do is to create our own order records with custom fields, copy the old ones over,\n", + "# and tell the portfolio to use them instead of the default ones\n", + "\n", + "# First, we need to create an enumerated field for stop types\n", + "# SL levels will come first, TP levels second, in an incremental fashion\n", + "StopTypeT = namedtuple(\"StopTypeT\", [\n", + " *[f\"SL{i + 1}\" for i in range(n_sl_levels)],\n", + " *[f\"TP{i + 1}\" for i in range(n_tp_levels)]\n", + "])\n", + "StopType = StopTypeT(*range(len(StopTypeT._fields)))\n", + "\n", + "print(StopType)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2750453a-1238-4c02-95ba-ddcf5d4b372e", + "metadata": {}, + "outputs": [], + "source": [ + "# To extend order records, we just need to append new fields and construct a new data type\n", + "\n", + "custom_order_dt = np.dtype(vbt.pf_enums.order_fields + [(\"order_type\", np.int_), (\"stop_type\", np.int_)])\n", + "\n", + "def fix_order_records(order_records, signal_info, temp_info):\n", + " # This is a function that will \"fix\" our default records and return the fixed ones\n", + " \n", + " # Create a new empty record array with the new data type\n", + " # Empty here means that the array isn't initialized yet and contains junk data\n", + " # Thus, make sure to override each single element\n", + " custom_order_records = np.empty(order_records.shape, dtype=custom_order_dt)\n", + " \n", + " # Copy over the information from our default records\n", + " for field, _ in vbt.pf_enums.order_fields:\n", + " custom_order_records[field] = order_records[field]\n", + " \n", + " # Iterate over the new records and fill the stop type\n", + " for i in range(len(custom_order_records)):\n", + " record = custom_order_records[i]\n", + " signal = record[\"col\"] # each column corresponds to a signal\n", + " \n", + " # Fill the order type\n", + " record[\"order_type\"] = signal_info.order_type[signal]\n", + " \n", + " # Concatenate SL and TP row indices of this signal into a new list\n", + " # We must do it the same way as we did in StopTypeT\n", + " bar = [\n", + " *temp_info.sl_bar[signal],\n", + " *temp_info.tp_bar[signal]\n", + " ]\n", + " \n", + " # Check whether the row index of this order is in this list\n", + " # (which means that this order is a stop order)\n", + " if record[\"idx\"] in bar:\n", + " # If so, get the matching position in this list and use it as order type\n", + " # It will correspond to a field in StopType\n", + " record[\"stop_type\"] = bar.index(record[\"idx\"])\n", + " else:\n", + " record[\"stop_type\"] = -1\n", + " return custom_order_records\n", + " \n", + "custom_order_records = fix_order_records(pf.order_records, signal_info, temp_info)\n", + "print(custom_order_records[:10])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71749025-c6bf-4ffd-ba72-27d7d593e1c5", + "metadata": {}, + "outputs": [], + "source": [ + "# Having raw order records is not enough as vbt.Orders doesn't know what to do with the new field\n", + "# (remember that vbt.Orders is used to analyze the records)\n", + "# Let's create our custom class that subclasses vbt.Orders\n", + "# and override the field config to also include the information on the new field\n", + "\n", + "from vectorbtpro.records.decorators import attach_fields, override_field_config\n", + "\n", + "@attach_fields(dict(stop_type=dict(attach_filters=True)))\n", + "@override_field_config(dict(\n", + " dtype=custom_order_dt, # specify the new data type\n", + " settings=dict(\n", + " order_type=dict(\n", + " title=\"Order Type\", # specify a human-readable title for the field\n", + " mapping=OrderType, # specify the mapper for the field\n", + " ),\n", + " stop_type=dict(\n", + " title=\"Stop Type\", # specify a human-readable title for the field\n", + " mapping=StopType, # specify the mapper for the field\n", + " ),\n", + " )\n", + "))\n", + "class CustomOrders(vbt.Orders):\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9a93714-db59-4ac2-a682-ca785c7158cc", + "metadata": {}, + "outputs": [], + "source": [ + "# Finally, let's replace the order records and the class in the portfolio\n", + "\n", + "pf = pf.replace(order_records=custom_order_records, orders_cls=CustomOrders)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7907f9c9-c4f7-48a0-898d-af5ac1f8ac60", + "metadata": {}, + "outputs": [], + "source": [ + "# We can now effortlessly analyze the stop type\n", + "\n", + "print(pf.orders.records_readable)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6a94274-4751-411a-b2eb-2dd8f1a4cf44", + "metadata": {}, + "outputs": [], + "source": [ + "# And here are the signals that correspond to these records for verification\n", + "\n", + "print(signal_data.get())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95401889-0e74-49aa-835e-49a74e956c35", + "metadata": {}, + "outputs": [], + "source": [ + "# We can see that some signals were skipped, let's remove them from the portfolio\n", + "\n", + "pf = pf.loc[:, pf.orders.count() >= 1]\n", + "\n", + "print(len(pf.wrapper.columns))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1401064f-b2de-4f6e-921b-4be4a5a72f3a", + "metadata": {}, + "outputs": [], + "source": [ + "# There are various ways to analyze the data\n", + "# For example, we can count how many times each stop type was triggered\n", + "# Since we want to combine all trades in each statistic, we need to provide grouping\n", + "\n", + "print(pf.orders.stop_type.stats(group_by=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8fe58a9c-178d-4742-a558-471aa21d3af4", + "metadata": {}, + "outputs": [], + "source": [ + "# We can also get the position stats for P&L information\n", + "\n", + "print(pf.positions.stats(group_by=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f3b6a19-c0e0-4d07-8ab0-7961e8229438", + "metadata": {}, + "outputs": [], + "source": [ + "# Let's plot a random trade\n", + "# The only issue: we have too much data for that (thanks to Plotly)\n", + "# Thus, crop it before plotting to remove irrelevant data\n", + "\n", + "signal = np.random.choice(len(pf.wrapper.columns))\n", + "pf.trades.iloc[:, signal].crop().plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40204477-d5ca-4402-81b5-bb162226f48b", + "metadata": {}, + "outputs": [], + "source": [ + "# Let's verify that the entry price stays within each candle\n", + "\n", + "print(pd.concat((\n", + " pf.orders.records_readable[[\"Column\", \"Order Type\", \"Stop Type\", \"Price\"]],\n", + " pf.orders.bar_high.to_readable(title=\"High\", only_values=True),\n", + " pf.orders.bar_low.to_readable(title=\"Low\", only_values=True),\n", + " pf.orders.price_status.to_readable(title=\"Price Status\", only_values=True),\n", + "), axis=1))\n", + "\n", + "print(pf.orders.price_status.stats(group_by=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "729339d4-6653-4d0f-8c80-ce7650655a00", + "metadata": {}, + "outputs": [], + "source": [ + "# Now, what if we're interested in portfolio metrics, such as the Sharpe ratio?\n", + "# The problem is that most metrics are producing multiple (intermediate) time series \n", + "# of the full shape, which is disastrous for RAM since our data will have to be tiled \n", + "# by the number of columns. But here's a trick: merge order records of all columns into one, \n", + "# as if we did the simulation on just one column!\n", + "\n", + "def merge_order_records(order_records):\n", + " merged_order_records = order_records.copy()\n", + " \n", + " # New records should have only one column\n", + " merged_order_records[\"col\"][:] = 0\n", + " \n", + " # Sort the records by the timestamp\n", + " merged_order_records = merged_order_records[np.argsort(merged_order_records[\"idx\"])]\n", + " \n", + " # Reset the order ids\n", + " merged_order_records[\"id\"][:] = np.arange(len(merged_order_records))\n", + " return merged_order_records\n", + "\n", + "merged_order_records = merge_order_records(custom_order_records)\n", + "print(merged_order_records[:10])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5499c82d-f517-41a5-928d-b2fe09eb869d", + "metadata": {}, + "outputs": [], + "source": [ + "# We also need to change the wrapper because it holds the information on our columns\n", + "\n", + "merged_wrapper = pf.wrapper.replace(columns=[0], ndim=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e09f8492-3cf9-467f-8797-7109edeff4ee", + "metadata": {}, + "outputs": [], + "source": [ + "# Is there any other array that requires merging?\n", + "# Let's introspect the portfolio instance and search for arrays of the full shape\n", + "\n", + "print(pf)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7789f4ab-207a-4de5-a19b-ccd30f2e447a", + "metadata": {}, + "outputs": [], + "source": [ + "# There are none, thus replace only the records and the wrapper\n", + "# Also, the previous individual portfolios were each using the starting capital of $100\n", + "# Which was used by 100%, but since we merge columns together, we now may require less starting capital\n", + "# Thus, we will determine it automatically\n", + "\n", + "merged_pf = pf.replace(\n", + " order_records=merged_order_records, \n", + " wrapper=merged_wrapper,\n", + " init_cash=\"auto\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19abb33d-c964-43a6-9d63-7665beba4987", + "metadata": {}, + "outputs": [], + "source": [ + "# We can now get any portfolio statistic\n", + "\n", + "print(merged_pf.stats())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "206e69ae-4a0c-46a5-95f2-523ff660bb74", + "metadata": {}, + "outputs": [], + "source": [ + "# You may wonder why the win rate and other trade metrics are different here\n", + "# There are two reasons: \n", + "# 1) portfolio stats uses exit trades (previously we used positions), \n", + "# that is, each stop order is a trade\n", + "# 2) after merging, there's no more information which order belongs to which trade, \n", + "# thus positions are built in a sequential order\n", + "\n", + "# But to verify that both portfolio match, we can compare to the total profit to the previous trade P&L\n", + "print(merged_pf.total_profit)\n", + "print(pf.trades.pnl.sum(group_by=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b45b35d-9860-4436-be99-00e3dd3b7b0b", + "metadata": {}, + "outputs": [], + "source": [ + "# We can now plot the entire portfolio\n", + "\n", + "merged_pf.resample(\"daily\").plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eff09d0c-c36f-4f21-97e7-41240f3f07e2", + "metadata": {}, + "outputs": [], + "source": [ + "# The main issue with using from_order_func is that we need to go over the entire data \n", + "# as many times as there are signals because the order function is run on single each element\n", + "# A far more time-efficient approach would be processing trades in a sequential order\n", + "# This is easily possible because our trades are perfectly sorted - we don't need\n", + "# to process a signal if the previous signal hasn't been processed yet\n", + "# Also, because the scope of this notebook assumes that signals are independent, \n", + "# we can simulate them independently and stop each signal's simulation once its position has been closed out\n", + "# This is only possible by writing an own simulator (which isn't as scary as it sounds!)\n", + "\n", + "# To avoid duplicating our signal logic, we will re-use order_func_nb by passing our own limited context\n", + "# It will consist only of the fields that are required by our order_func_nb\n", + "\n", + "OrderContext = namedtuple(\"OrderContext\", [\n", + " \"i\",\n", + " \"col\",\n", + " \"index\",\n", + " \"open\", \n", + " \"high\",\n", + " \"low\",\n", + " \"close\",\n", + " \"last_position\"\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2049c971-000a-46e8-a477-c012a664e3fb", + "metadata": {}, + "outputs": [], + "source": [ + "# Let's build the simulator\n", + "# Technically, it's just a regular Numba function that does whatever we want\n", + "# What's special about it is that it calls the vectorbt's low-level API to place orders and \n", + "# updates the simulation state such as cash balances and positions\n", + "# We'll first determine the bars where the signals happen, and then run a smaller simulation\n", + "# on the first signal. Once the signal's position has been closed out, we'll terminate the simulation\n", + "# and continue with the next signal, until all signals are processed.\n", + "\n", + "@njit(boundscheck=True)\n", + "def signal_simulator_nb(\n", + " index, \n", + " open, \n", + " high, \n", + " low, \n", + " close, \n", + " signal_info,\n", + " temp_info\n", + "):\n", + " # Determine the number of signals, levels, and potential orders\n", + " n_signals = len(signal_info.timestamp)\n", + " n_sl_levels = signal_info.sl.shape[1]\n", + " n_tp_levels = signal_info.tp.shape[1]\n", + " max_order_records = 1 + n_sl_levels + n_tp_levels\n", + " \n", + " # Temporary arrays\n", + " \n", + " # This array will hold the bar where each signal happens\n", + " signal_bars = np.full(n_signals, -1, dtype=np.int_)\n", + " \n", + " # This array will hold order records\n", + " # Initially, order records are uninitialized (junk data) but we will fill them gradually\n", + " # Notice how we use our own data type custom_order_dt - we can fill order type and stop type \n", + " # fields right during the simulation\n", + " order_records = np.empty((max_order_records, n_signals), dtype=custom_order_dt)\n", + " \n", + " # To be able to distinguish between uninitialized and initialized (filled) orders,\n", + " # we'll create another array holding the number of filled orders for each signal\n", + " # For example, if order_records has a maximum of 6 rows and only one record is filled,\n", + " # order_counts will be 1 for this signal, so vectorbt can remove 5 unfilled orders later\n", + " order_counts = np.full(n_signals, 0, dtype=np.int_)\n", + " \n", + " # order_func_nb requires last_position, which holds the position of each signal\n", + " last_position = np.full(n_signals, 0.0, dtype=np.float_)\n", + " \n", + " # First, we need to determine the bars where the signals happen\n", + " # Even though we know their timestamps, we need to translate them into absolute indices\n", + " signal = 0\n", + " bar = 0\n", + " while signal < n_signals and bar < len(index):\n", + " if index[bar] == signal_info.timestamp[signal]:\n", + " # If there's a match, save the bar and continue with the next signal on the next bar\n", + " signal_bars[signal] = bar\n", + " signal += 1\n", + " bar += 1\n", + " elif index[bar] > signal_info.timestamp[signal]:\n", + " # If we're past the signal, continue with the next signal on the same bar\n", + " signal += 1\n", + " else:\n", + " # If we haven't hit the signal yet, continue on the next bar\n", + " bar += 1\n", + "\n", + " # Once we know the bars, we can iterate over signals in a loop and simulate them independently\n", + " for signal in range(n_signals):\n", + " \n", + " # If there was no match in the previous level, skip the simulation\n", + " from_bar = signal_bars[signal]\n", + " if from_bar == -1:\n", + " continue\n", + " \n", + " # This is our initial execution state, which holds the most important balances\n", + " # We'll start with a starting capital of $100\n", + " exec_state = vbt.pf_enums.ExecState(\n", + " cash=100.0,\n", + " position=0.0,\n", + " debt=0.0,\n", + " locked_cash=0.0,\n", + " free_cash=100.0,\n", + " val_price=np.nan,\n", + " value=np.nan\n", + " )\n", + " \n", + " # Here comes the actual simulation that starts from the signal's bar and\n", + " # ends either once we processed all bars or once the position has been closed out (see below)\n", + " for bar in range(from_bar, len(index)):\n", + " \n", + " # Create a named tuple holding the current context (this is \"c\" in order_func_nb)\n", + " c = OrderContext( \n", + " i=bar,\n", + " col=signal,\n", + " index=index,\n", + " open=open,\n", + " high=high,\n", + " low=low,\n", + " close=close,\n", + " last_position=last_position,\n", + " )\n", + " \n", + " # If the first bar has no data, skip the simulation\n", + " if bar == from_bar and not has_data_nb(c):\n", + " break\n", + "\n", + " # Price area holds the OHLC of the current bar\n", + " price_area = vbt.pf_enums.PriceArea(\n", + " vbt.flex_select_nb(open, bar, signal), \n", + " vbt.flex_select_nb(high, bar, signal), \n", + " vbt.flex_select_nb(low, bar, signal), \n", + " vbt.flex_select_nb(close, bar, signal)\n", + " )\n", + " \n", + " # Why do we need to redefine the execution state?\n", + " # Because we need to manually update the valuation price and the value of the column\n", + " # to be able to use complex size types such as target percentages\n", + " # As in order_func_nb, we will use the opening price as the valuation price\n", + " # Why doesn't vectorbt do it on its own? Because it doesn't know anything\n", + " # about other columns. For example, imagine having a grouped simulation with 100 columns sharing\n", + " # the same cash: using the formula below wouldn't consider the positions of other 99 columns.\n", + " exec_state = vbt.pf_enums.ExecState(\n", + " cash=exec_state.cash,\n", + " position=exec_state.position,\n", + " debt=exec_state.debt,\n", + " locked_cash=exec_state.locked_cash,\n", + " free_cash=exec_state.free_cash,\n", + " val_price=price_area.open,\n", + " value=exec_state.cash + price_area.open * exec_state.position\n", + " )\n", + " \n", + " # Let's run the order function, which returns an order\n", + " # Remember when we used order_nothing_nb()? It also returns an order but with filled with nans\n", + " order = order_func_nb(c, signal_info, temp_info)\n", + " \n", + " # Here's the main function in the entire simulation, which 1) executes the order,\n", + " # 2) updates the execution state, and 3) updates the order_records and order_counts\n", + " order_result, exec_state = vbt.pf_nb.process_order_nb(\n", + " signal, \n", + " signal, \n", + " bar,\n", + " exec_state=exec_state,\n", + " order=order,\n", + " price_area=price_area,\n", + " order_records=order_records,\n", + " order_counts=order_counts\n", + " )\n", + " \n", + " # If the order was successful (i.e., it's now in order_records),\n", + " # we need to manually set the order type and stop type\n", + " if order_result.status == vbt.pf_enums.OrderStatus.Filled:\n", + " \n", + " # Use this line to get the last order of any signal\n", + " filled_order = order_records[order_counts[signal] - 1, signal]\n", + " \n", + " # Fill the order type\n", + " filled_order[\"order_type\"] = signal_info.order_type[signal]\n", + " \n", + " # Fill the stop type by going through the SL and TP levels and checking whether \n", + " # the order bar matches the level bar\n", + " order_is_stop = False\n", + " for k in range(n_sl_levels):\n", + " if filled_order[\"idx\"] == temp_info.sl_bar[signal, k]:\n", + " filled_order[\"stop_type\"] = k\n", + " order_is_stop = True\n", + " break\n", + " for k in range(n_tp_levels):\n", + " if filled_order[\"idx\"] == temp_info.tp_bar[signal, k]:\n", + " filled_order[\"stop_type\"] = n_sl_levels + k # TP indices come after SL indices\n", + " order_is_stop = True\n", + " break\n", + " \n", + " # If order bar hasn't been matched, it's not a stop order\n", + " if not order_is_stop:\n", + " filled_order[\"stop_type\"] = -1\n", + " \n", + " # If we're not in position after an entry anymore, terminate the simulation\n", + " if temp_info.entry_price_bar[signal] != -1:\n", + " if exec_state.position == 0:\n", + " break\n", + " \n", + " # Don't forget to update the position array\n", + " last_position[signal] = exec_state.position\n", + " \n", + " # Remove uninitialized order records and flatten 2d array into a 1d array\n", + " return vbt.nb.repartition_nb(order_records, order_counts)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0b0f696-b9b6-4b72-9c8c-69d0ed8acf5c", + "metadata": {}, + "outputs": [], + "source": [ + "# Numba requires arrays in a NumPy format, and to avoid preparing them each time,\n", + "# let's create a function that only takes the data and signal information, and does everything else for us\n", + "\n", + "def signal_simulator(data, signal_info):\n", + " temp_info = build_temp_info(signal_info)\n", + " \n", + " custom_order_records = signal_simulator_nb(\n", + " index=data.index.vbt.to_ns(), # convert to nanoseconds\n", + " open=vbt.to_2d_array(data.open), # flexible indexing requires inputs to be 2d\n", + " high=vbt.to_2d_array(data.high),\n", + " low=vbt.to_2d_array(data.low),\n", + " close=vbt.to_2d_array(data.close),\n", + " signal_info=signal_info,\n", + " temp_info=temp_info\n", + " )\n", + " \n", + " # We have order records, what's left is wrapping them with a Portfolio\n", + " # Required are three things: 1) array wrapper with index and columns, 2) order records, and 3) prices\n", + " # We also need to specify the starting capital that we used during the simulation\n", + " return vbt.Portfolio(\n", + " wrapper=vbt.ArrayWrapper(\n", + " index=data.index, \n", + " columns=range(len(signal_info.timestamp)), # one column per signal\n", + " freq=\"minute\"\n", + " ),\n", + " order_records=custom_order_records,\n", + " open=data.open,\n", + " high=data.high,\n", + " low=data.low,\n", + " close=data.close,\n", + " init_cash=100.0,\n", + " orders_cls=CustomOrders\n", + " )\n", + "\n", + "# That's it!\n", + "pf = signal_simulator(data, signal_info)\n", + "\n", + "print(pf.trades.pnl.sum(group_by=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4532223-7171-47d1-a99a-41d13d617239", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/to_explore/notebooks/SignalDevelopment.ipynb b/to_explore/notebooks/SignalDevelopment.ipynb new file mode 100644 index 0000000..16a589a --- /dev/null +++ b/to_explore/notebooks/SignalDevelopment.ipynb @@ -0,0 +1,2557 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "906f22ee-4fda-4b00-86b5-666eeba85526", + "metadata": {}, + "source": [ + "# Signal development" + ] + }, + { + "cell_type": "markdown", + "id": "ea0a17e8-dede-43b7-8d69-ae44fa9eeef0", + "metadata": {}, + "source": [ + "## Generation" + ] + }, + { + "cell_type": "markdown", + "id": "c92e74cd-bef9-43e1-a78e-f39d308798df", + "metadata": {}, + "source": [ + "### Comparison" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc046d2c-54c8-4363-aa50-9535db8c1fa7", + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro import *\n", + "# whats_imported()\n", + "\n", + "vbt.settings.set_theme(\"dark\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a5fa071-d8c4-4443-8528-88c5cab650cf", + "metadata": {}, + "outputs": [], + "source": [ + "data = vbt.BinanceData.pull(\n", + " [\"BTCUSDT\", \"ETHUSDT\"], \n", + " start=\"2021-01-01\",\n", + " end=\"2022-01-01\"\n", + ")\n", + "print(data.get(\"Low\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b43d9cc2-631e-44a4-9a0b-83aa08c3a722", + "metadata": {}, + "outputs": [], + "source": [ + "bb = vbt.talib(\"BBANDS\").run(\n", + " data.get(\"Close\"),\n", + " timeperiod=vbt.Default(14),\n", + " nbdevup=vbt.Default(2),\n", + " nbdevdn=vbt.Default(2)\n", + ")\n", + "print(bb.lowerband)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3ee3576-aa6f-4652-83de-fbaa47eb7137", + "metadata": {}, + "outputs": [], + "source": [ + "mask = data.get(\"Low\") < bb.lowerband\n", + "print(mask)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "73b16f6b-cd9d-4ccb-92b8-d374f59dfe4b", + "metadata": {}, + "outputs": [], + "source": [ + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1032773-752b-435f-8a96-3aea5e7fec6e", + "metadata": {}, + "outputs": [], + "source": [ + "bb_mult = vbt.talib(\"BBANDS\").run(\n", + " data.get(\"Close\"),\n", + " timeperiod=vbt.Default(14),\n", + " nbdevup=[2, 3],\n", + " nbdevdn=[2, 3]\n", + ")\n", + "# mask = data.get(\"Low\") < bb_mult.lowerband" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6aafc576-9b8a-4475-ad4c-1aaa7f71d4b1", + "metadata": {}, + "outputs": [], + "source": [ + "mask = data.get(\"Low\").vbt < bb_mult.lowerband\n", + "print(mask)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dbe069e6-47c0-4760-b6d7-ebc2e8bf9e62", + "metadata": {}, + "outputs": [], + "source": [ + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6db65b8-b971-433f-b4bc-fe97263baa30", + "metadata": {}, + "outputs": [], + "source": [ + "mask = bb_mult.lowerband_above(data.get(\"Low\"))\n", + "mask.sum()" + ] + }, + { + "cell_type": "markdown", + "id": "7301f0a1-6e79-4da1-9daf-8345c35da38d", + "metadata": {}, + "source": [ + "#### Thresholds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2487f5f5-2277-4608-a6ee-bd26a88b3d48", + "metadata": {}, + "outputs": [], + "source": [ + "bandwidth = (bb.upperband - bb.lowerband) / bb.middleband\n", + "\n", + "mask = bandwidth.vbt > vbt.Param([0.15, 0.3], name=\"threshold\")\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f634615-3e6f-4ba8-a3e4-843748fb6663", + "metadata": {}, + "outputs": [], + "source": [ + "mask = bandwidth.vbt.combine(\n", + " [0.15, 0.3],\n", + " combine_func=np.greater, \n", + " keys=pd.Index([0.15, 0.3], name=\"threshold\")\n", + ")\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6bda89d4-6381-4ec1-b1bc-ffe302454181", + "metadata": {}, + "outputs": [], + "source": [ + "mask = pd.concat(\n", + " (bandwidth > 0.15, bandwidth > 0.3), \n", + " keys=pd.Index([0.15, 0.3], name=\"threshold\"), \n", + " axis=1\n", + ")\n", + "mask.sum()" + ] + }, + { + "cell_type": "markdown", + "id": "66ef2746-1d4f-48bd-887e-da3ce86aa2d9", + "metadata": {}, + "source": [ + "#### Crossovers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "112ffe71-78da-4bba-a9d4-7f5ffeb5cb3e", + "metadata": {}, + "outputs": [], + "source": [ + "low_below_lband = data.get(\"Low\") < bb.lowerband\n", + "mask = low_below_lband.vbt.signals.first()\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e92bbb10-724a-4f28-9a54-6d41f36f7bf4", + "metadata": {}, + "outputs": [], + "source": [ + "btc_low = data.get(\"Low\", \"BTCUSDT\").rename(\"Low\")\n", + "btc_lowerband = bb.lowerband[\"BTCUSDT\"].rename(\"Lower Band\")\n", + "btc_mask = mask[\"BTCUSDT\"].rename(\"Signals\")\n", + "\n", + "fig = btc_low.vbt.plot()\n", + "btc_lowerband.vbt.plot(fig=fig)\n", + "btc_mask.vbt.signals.plot_as_markers(\n", + " y=btc_low, \n", + " trace_kwargs=dict(\n", + " marker=dict(\n", + " color=\"#DFFF00\"\n", + " )\n", + " ),\n", + " fig=fig\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9caf1ea8-18b8-42d6-b759-e3ef87ea23ab", + "metadata": {}, + "outputs": [], + "source": [ + "mask = low_below_lband.vbt.signals.first(after_false=True)\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d22b9fb1-2757-4fe9-b257-233e8a6428b9", + "metadata": {}, + "outputs": [], + "source": [ + "sample_low = pd.Series([10, 9, 8, 9, 8])\n", + "sample_lband = pd.Series([np.nan, np.nan, 9, 8, 9])\n", + "sample_mask = sample_low < sample_lband\n", + "sample_mask.vbt.signals.first(after_false=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd63d8f4-f71e-4b77-ba9e-f3aa5fab70d3", + "metadata": {}, + "outputs": [], + "source": [ + "sample_mask[sample_lband.ffill().isnull()] = True\n", + "sample_mask.vbt.signals.first(after_false=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48ec8914-6279-4e04-9577-7a61123e8f9b", + "metadata": {}, + "outputs": [], + "source": [ + "buffer = sample_lband.ffill().isnull().sum(axis=0).max()\n", + "buffer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e7b37d6-435b-4d05-b813-c71e3533d7f6", + "metadata": {}, + "outputs": [], + "source": [ + "sample_buf_mask = sample_low.iloc[buffer:] < sample_lband.iloc[buffer:]\n", + "sample_buf_mask = sample_buf_mask.vbt.signals.first(after_false=True)\n", + "sample_mask = sample_low.vbt.wrapper.fill(False)\n", + "sample_mask.loc[sample_buf_mask.index] = sample_buf_mask\n", + "sample_mask" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "497a1101-5e73-4bd3-9d6d-ceb51bd6ac58", + "metadata": {}, + "outputs": [], + "source": [ + "mask = data.get(\"Low\").vbt.crossed_below(bb.lowerband, wait=1)\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e385bd31-4be5-40c8-95eb-23ec488dcff2", + "metadata": {}, + "outputs": [], + "source": [ + "mask = bb.lowerband_crossed_above(data.get(\"Low\"), wait=1)\n", + "mask.sum()" + ] + }, + { + "cell_type": "markdown", + "id": "67ac0af5-b8bc-4e4f-8351-e33661778165", + "metadata": {}, + "source": [ + "### Logical operators" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "931d10e3-7259-49f7-bde3-3c6b11e356d0", + "metadata": {}, + "outputs": [], + "source": [ + "cond1 = data.get(\"Low\") < bb.lowerband\n", + "cond2 = bandwidth > 0.3\n", + "cond3 = data.get(\"High\") > bb.upperband\n", + "cond4 = bandwidth < 0.15\n", + "\n", + "mask = (cond1 & cond2) | (cond3 & cond4)\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a8e5bb5-3a3f-4e95-8805-78dbeb73e119", + "metadata": {}, + "outputs": [], + "source": [ + "cond1 = data.get(\"Low\").vbt < bb.lowerband\n", + "cond2 = bandwidth.vbt > vbt.Param([0.3, 0.3, 0.4, 0.4], name=\"cond2_th\")\n", + "cond3 = data.get(\"High\").vbt > bb.upperband\n", + "cond4 = bandwidth.vbt < vbt.Param([0.1, 0.2, 0.1, 0.2], name=\"cond4_th\")\n", + "\n", + "mask = (cond1.vbt & cond2).vbt | (cond3.vbt & cond4)\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96fec8a9-27fe-4ec0-9745-06b612a8beeb", + "metadata": {}, + "outputs": [], + "source": [ + "cond1 = data.get(\"Low\").vbt < bb.lowerband\n", + "cond2 = bandwidth.vbt > vbt.Param([0.3, 0.4], name=\"cond2_th\")\n", + "cond3 = data.get(\"High\").vbt > bb.upperband\n", + "cond4 = bandwidth.vbt < vbt.Param([0.1, 0.2], name=\"cond4_th\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "553a64f0-a358-4e98-a0f6-e05d44ba84be", + "metadata": {}, + "outputs": [], + "source": [ + "i1 = np.split(np.arange(len(cond1.columns)), len(cond1.columns) // 2)\n", + "i2 = np.split(np.arange(len(cond2.columns)), len(cond2.columns) // 2)\n", + "i3 = np.split(np.arange(len(cond3.columns)), len(cond3.columns) // 2)\n", + "i4 = np.split(np.arange(len(cond4.columns)), len(cond4.columns) // 2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55799254-e8c1-4b67-aba1-a7015ebe5b92", + "metadata": {}, + "outputs": [], + "source": [ + "print(i1)\n", + "print(i2)\n", + "print(i3)\n", + "print(i4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6cef9c96-52bf-42be-8c70-750c39bea02a", + "metadata": {}, + "outputs": [], + "source": [ + "i1, i2, i3, i4 = zip(*product(i1, i2, i3, i4))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7348da68-f874-4334-88ac-be80db0ec4e7", + "metadata": {}, + "outputs": [], + "source": [ + "print(i1)\n", + "print(i2)\n", + "print(i3)\n", + "print(i4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f055cbef-62ef-4958-a6c2-9c253e023e9d", + "metadata": {}, + "outputs": [], + "source": [ + "i1 = np.asarray(i1).flatten()\n", + "i2 = np.asarray(i2).flatten()\n", + "i3 = np.asarray(i3).flatten()\n", + "i4 = np.asarray(i4).flatten()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f7941975-2907-4a19-811f-e98848822d66", + "metadata": {}, + "outputs": [], + "source": [ + "print(i1)\n", + "print(i2)\n", + "print(i3)\n", + "print(i4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ed4ad6f-3182-4da6-9bfa-8ecb705af57e", + "metadata": {}, + "outputs": [], + "source": [ + "cond1 = cond1.iloc[:, i1]\n", + "cond2 = cond2.iloc[:, i2]\n", + "cond3 = cond3.iloc[:, i3]\n", + "cond4 = cond4.iloc[:, i4]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38fb824d-8aa5-494d-b2f8-bea49e2d584e", + "metadata": {}, + "outputs": [], + "source": [ + "mask = (cond1.vbt & cond2).vbt | (cond3.vbt & cond4)\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e5545a3-b347-41de-bee5-9954ff2b0698", + "metadata": {}, + "outputs": [], + "source": [ + "MaskGenerator = vbt.IF.from_expr(\"\"\"\n", + "upperband, middleband, lowerband = @res_talib_bbands\n", + "bandwidth = (upperband - lowerband) / middleband\n", + "cond1 = low < lowerband\n", + "cond2 = bandwidth > @p_cond2_th\n", + "cond3 = high > upperband\n", + "cond4 = bandwidth < @p_cond4_th\n", + "@out_mask:(cond1 & cond2) | (cond3 & cond4)\n", + "\"\"\")\n", + "\n", + "print(vbt.format_func(MaskGenerator.run, incl_doc=False))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3bbbab02-1e9e-497c-8616-cbfaa19cd39e", + "metadata": {}, + "outputs": [], + "source": [ + "mask_generator = MaskGenerator.run(\n", + " high=data.get(\"High\"),\n", + " low=data.get(\"Low\"),\n", + " close=data.get(\"Close\"),\n", + " cond2_th=[0.3, 0.4],\n", + " cond4_th=[0.1, 0.2],\n", + " bbands_timeperiod=vbt.Default(14),\n", + " param_product=True\n", + ")\n", + "mask_generator.mask.sum()" + ] + }, + { + "cell_type": "markdown", + "id": "8d6deb30-8e48-48e1-b06e-699454a28098", + "metadata": {}, + "source": [ + "### Shifting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58f200ec-96b8-450b-afc4-76307bf59549", + "metadata": {}, + "outputs": [], + "source": [ + "cond1 = data.get(\"Low\") < bb.lowerband\n", + "cond2 = bandwidth > bandwidth.shift(1)\n", + "\n", + "mask = cond1 & cond2\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "efef3852-ba5a-4670-8ccf-cb51e5d41432", + "metadata": {}, + "outputs": [], + "source": [ + "cond2 = bandwidth > bandwidth.rolling(\"7d\").apply(lambda x: x[0])\n", + "\n", + "mask = cond1 & cond2\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b308a3e-f8e8-46b0-b016-2aa1933dc44d", + "metadata": {}, + "outputs": [], + "source": [ + "def exactly_ago(sr):\n", + " if sr.index[0] == sr.index[-1] - vbt.timedelta(\"7d\"):\n", + " return sr.iloc[0]\n", + " return np.nan\n", + "\n", + "cond_7d_ago = bandwidth.rolling(\"8d\").apply(exactly_ago, raw=False)\n", + "cond2 = bandwidth > cond_7d_ago\n", + "\n", + "mask = cond1 & cond2\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a41f9fe9-51d8-420b-8352-2238cf58208f", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def exactly_ago_meta_nb(from_i, to_i, col, index, freq, arr):\n", + " if index[from_i] == index[to_i - 1] - freq:\n", + " return arr[from_i, col]\n", + " return np.nan\n", + "\n", + "cond_7d_ago = vbt.pd_acc.rolling_apply(\n", + " \"8d\",\n", + " exactly_ago_meta_nb,\n", + " bandwidth.index.values,\n", + " vbt.timedelta(\"7d\").to_timedelta64(),\n", + " vbt.to_2d_array(bandwidth),\n", + " wrapper=bandwidth.vbt.wrapper\n", + ")\n", + "cond2 = bandwidth > cond_7d_ago\n", + "\n", + "mask = cond1 & cond2\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa58a149-9754-41da-ab53-e7642d0c3410", + "metadata": {}, + "outputs": [], + "source": [ + "cond2 = bandwidth > bandwidth.vbt.ago(\"7d\")\n", + "\n", + "mask = cond1 & cond2\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae73e445-ca38-4b64-acee-daed9f4ba6fe", + "metadata": {}, + "outputs": [], + "source": [ + "bandwidth.iloc[-8]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bba2e968-9d6a-422a-a88f-abad435da438", + "metadata": {}, + "outputs": [], + "source": [ + "bandwidth.vbt.ago(\"7d\").iloc[-1]" + ] + }, + { + "cell_type": "markdown", + "id": "50937a93-0f50-47c2-a28f-578baa33609a", + "metadata": {}, + "source": [ + "### Truth value testing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1e5ab80-5cda-46cf-8a30-d20ab4ab9aab", + "metadata": {}, + "outputs": [], + "source": [ + "cond2 = data.get(\"Close\").vbt.crossed_below(bb.middleband)\n", + "cond2 = cond2.rolling(5, min_periods=1).max().astype(bool)\n", + "\n", + "mask = cond1 & cond2\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a4231aa-0152-4aba-b1ff-b40c98f2f196", + "metadata": {}, + "outputs": [], + "source": [ + "cond2 = data.get(\"Close\").vbt.crossed_below(bb.middleband)\n", + "cond2 = cond2.vbt.rolling_any(5)\n", + "\n", + "mask = cond1 & cond2\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df8a2ad1-3ba1-4b81-8d63-ebca8e656a2b", + "metadata": {}, + "outputs": [], + "source": [ + "cond2 = data.get(\"Close\").vbt.crossed_below(bb.middleband)\n", + "cond2 = cond2.vbt.rolling_apply(\n", + " \"W\", \"any\", \n", + " minp=1, \n", + " wrap_kwargs=dict(fillna=0, dtype=bool)\n", + ")\n", + "\n", + "mask = cond1 & cond2\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b1b59c-d0ad-4757-bfe6-789a12e11026", + "metadata": {}, + "outputs": [], + "source": [ + "anchor_points = data.wrapper.get_index_points(\n", + " every=\"M\", \n", + " start=0, \n", + " exact_start=True\n", + ")\n", + "anchor_points" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6b79fd8-b86e-486b-8e87-663e2ddff2c9", + "metadata": {}, + "outputs": [], + "source": [ + "left_bound = np.full(len(data.wrapper.index), np.nan)\n", + "left_bound[anchor_points] = anchor_points\n", + "left_bound = vbt.dt.to_ns(vbt.nb.ffill_1d_nb(left_bound))\n", + "left_bound = bandwidth.index[left_bound]\n", + "left_bound" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86481972-0ab1-4f65-9fa3-4c3a7f38d3a1", + "metadata": {}, + "outputs": [], + "source": [ + "right_bound = data.wrapper.index\n", + "right_bound" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5177843a-f65a-42ba-9e1d-c1744aee9e7b", + "metadata": {}, + "outputs": [], + "source": [ + "mask = (bandwidth <= 0.1).vbt.resample_between_bounds(\n", + " left_bound, \n", + " right_bound,\n", + " \"any\",\n", + " closed_lbound=True,\n", + " closed_rbound=True,\n", + " wrap_with_lbound=False,\n", + " wrap_kwargs=dict(fillna=0, dtype=bool)\n", + ")\n", + "mask.index = right_bound\n", + "mask.astype(int).vbt.ts_heatmap().show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "73d0ca33-a66b-4fe6-8e7d-144df6bd0882", + "metadata": {}, + "source": [ + "### Periodically" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a27fa7cc-6c65-4ff4-aebf-aacdaf160ca1", + "metadata": {}, + "outputs": [], + "source": [ + "min_data = vbt.BinanceData.pull(\n", + " [\"BTCUSDT\", \"ETHUSDT\"], \n", + " start=\"2021-01-01 UTC\",\n", + " end=\"2021-02-01 UTC\",\n", + " timeframe=\"1h\"\n", + ")\n", + "index = min_data.wrapper.index\n", + "tuesday_index = index[index.weekday == 1]\n", + "tuesday_index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "61283286-8bc4-4a67-8385-54b88926b078", + "metadata": {}, + "outputs": [], + "source": [ + "tuesday_1800_index = tuesday_index[tuesday_index.hour == 18]\n", + "tuesday_1800_index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c1f8c57-df34-4cd0-99b3-f9511f987f99", + "metadata": {}, + "outputs": [], + "source": [ + "tuesday_1730_index = index[\n", + " (index.weekday == 1) & \n", + " (index.hour == 17) & \n", + " (index.minute == 30)\n", + "]\n", + "tuesday_1730_index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52657759-5799-4aae-ac79-cd588037fcea", + "metadata": {}, + "outputs": [], + "source": [ + "index.get_indexer([vbt.timestamp(\"2021-01-07\", tz=index.tz)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e13344f-4c60-4e46-a2ff-9dcecc989475", + "metadata": {}, + "outputs": [], + "source": [ + "index.get_indexer([vbt.timestamp(\"2021-01-07 17:30:00\", tz=index.tz)]) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbdd67e5-4b86-4e5a-86f3-d2c123f3bf01", + "metadata": {}, + "outputs": [], + "source": [ + "index[index.get_indexer(\n", + " [vbt.timestamp(\"2021-01-07 17:30:00\", tz=index.tz)],\n", + " method=\"ffill\"\n", + ")]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "241ed072-2873-4f0b-9cba-7fa5d8bd5401", + "metadata": {}, + "outputs": [], + "source": [ + "index[index.get_indexer(\n", + " [vbt.timestamp(\"2021-01-07 17:30:00\", tz=index.tz)],\n", + " method=\"bfill\"\n", + ")]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e31841d0-3c4a-4967-a0a1-62b5c680cd85", + "metadata": {}, + "outputs": [], + "source": [ + "each_tuesday = vbt.date_range(index[0], index[-1], freq=\"tuesday\")\n", + "each_tuesday_1730 = each_tuesday + pd.Timedelta(hours=17, minutes=30)\n", + "each_tuesday_1730" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21d5f7eb-f18e-4327-8d20-9cdc72cad6d8", + "metadata": {}, + "outputs": [], + "source": [ + "positions = index.get_indexer(each_tuesday_1730, method=\"bfill\")\n", + "\n", + "min_symbol_wrapper = min_data.get_symbol_wrapper()\n", + "mask = min_symbol_wrapper.fill(False)\n", + "mask.iloc[positions] = True\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bfadfca9-23df-4372-aab6-ecfb9e5c4046", + "metadata": {}, + "outputs": [], + "source": [ + "mask[mask.any(axis=1)].index.strftime(\"%A %T\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1206dc67-91f7-4207-ae3e-05e579576fe3", + "metadata": {}, + "outputs": [], + "source": [ + "tuesday_after_1700 = (index.weekday == 1) & (index.hour >= 17)\n", + "wednesday_before_1700 = (index.weekday == 2) & (index.hour < 17)\n", + "main_cond = tuesday_after_1700 | wednesday_before_1700\n", + "mask = min_symbol_wrapper.fill(False)\n", + "mask[main_cond] = True\n", + "mask = mask.vbt.signals.first()\n", + "mask[mask.any(axis=1)].index.strftime(\"%A %T\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e011aa6b-94a2-4a76-a3e6-f2998e4bdc5e", + "metadata": {}, + "outputs": [], + "source": [ + "mask = min_symbol_wrapper.fill(False)\n", + "mask.vbt.set(\n", + " True, \n", + " every=\"tuesday\", \n", + " at_time=\"17:30\", \n", + " inplace=True\n", + ")\n", + "mask[mask.any(axis=1)].index.strftime(\"%A %T\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3849cd8e-63ab-4603-b84a-ea2f0e0cb282", + "metadata": {}, + "outputs": [], + "source": [ + "mask = min_symbol_wrapper.fill(False)\n", + "mask.vbt.set(\n", + " True, \n", + " every=\"tuesday\", \n", + " at_time=\"18:00\", \n", + " add_delta=pd.Timedelta(nanoseconds=1),\n", + " inplace=True\n", + ")\n", + "mask[mask.any(axis=1)].index.strftime(\"%A %T\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0bcd77bd-b5f9-4e03-8a1b-a2f1646d2887", + "metadata": {}, + "outputs": [], + "source": [ + "mask = min_symbol_wrapper.fill(False)\n", + "mask.vbt.set_between(\n", + " True, \n", + " every=\"monday\", \n", + " start_time=\"12:00\", \n", + " end_time=\"17:00\", \n", + " add_end_delta=pd.Timedelta(days=1),\n", + " inplace=True\n", + ")\n", + "mask[mask.any(axis=1)].index.strftime(\"%A %T\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0335ab31-4ae0-4e2d-be6e-412949468b6f", + "metadata": {}, + "outputs": [], + "source": [ + "mask = min_symbol_wrapper.fill(False)\n", + "mask.vbt.set(\n", + " True, \n", + " on=\"January 7th 2021 UTC\",\n", + " indexer_method=None,\n", + " inplace=True\n", + ")\n", + "mask[mask.any(axis=1)].index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8383ce7c-dda3-472c-af61-5b1f33b67b3f", + "metadata": {}, + "outputs": [], + "source": [ + "mask = min_symbol_wrapper.fill(False)\n", + "mask.vbt.set_between(\n", + " True, \n", + " start=[\"2021-01-01 12:00:00\", \"2021-01-07 12:00:00\"],\n", + " end=[\"2021-01-02 12:00:00\", \"2021-01-08 12:00:00\"],\n", + " inplace=True\n", + ")\n", + "mask[mask.any(axis=1)].index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1be65183-69dc-4576-925f-ba51c2d9b16d", + "metadata": {}, + "outputs": [], + "source": [ + "mask = min_symbol_wrapper.fill(False)\n", + "mask.vbt.set_between(\n", + " True, \n", + " every=\"monday\",\n", + " split_every=False,\n", + " add_end_delta=\"2h\",\n", + " inplace=True\n", + ")\n", + "mask[mask.any(axis=1)].index" + ] + }, + { + "cell_type": "markdown", + "id": "0942b710-9089-4203-812c-087c90458e50", + "metadata": {}, + "source": [ + "### Iteratively" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9da35717-67da-421a-acf7-e3e1a81d34f1", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def generate_mask_1d_nb(\n", + " high, low,\n", + " uband, mband, lband,\n", + " cond2_th, cond4_th\n", + "):\n", + " out = np.full(high.shape, False)\n", + " \n", + " for i in range(high.shape[0]):\n", + "\n", + " \n", + " bandwidth = (uband[i] - lband[i]) / mband[i]\n", + " cond1 = low[i] < lband[i]\n", + " cond2 = bandwidth > cond2_th\n", + " cond3 = high[i] > uband[i]\n", + " cond4 = bandwidth < cond4_th\n", + " signal = (cond1 and cond2) or (cond3 and cond4)\n", + " \n", + " out[i] = signal\n", + " \n", + " return out\n", + "\n", + "mask = generate_mask_1d_nb(\n", + " data.get(\"High\")[\"BTCUSDT\"].values,\n", + " data.get(\"Low\")[\"BTCUSDT\"].values,\n", + " bb.upperband[\"BTCUSDT\"].values,\n", + " bb.middleband[\"BTCUSDT\"].values,\n", + " bb.lowerband[\"BTCUSDT\"].values,\n", + " 0.30,\n", + " 0.15\n", + ")\n", + "symbol_wrapper = data.get_symbol_wrapper()\n", + "mask = symbol_wrapper[\"BTCUSDT\"].wrap(mask)\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff27246a-d96c-428a-a391-56decc8a457c", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def generate_mask_nb(\n", + " high, low,\n", + " uband, mband, lband,\n", + " cond2_th, cond4_th\n", + "):\n", + " out = np.empty(high.shape, dtype=np.bool_)\n", + " \n", + " for col in range(high.shape[1]):\n", + " out[:, col] = generate_mask_1d_nb(\n", + " high[:, col], low[:, col],\n", + " uband[:, col], mband[:, col], lband[:, col],\n", + " cond2_th, cond4_th\n", + " )\n", + " \n", + " return out\n", + "\n", + "mask = generate_mask_nb(\n", + " vbt.to_2d_array(data.get(\"High\")),\n", + " vbt.to_2d_array(data.get(\"Low\")),\n", + " vbt.to_2d_array(bb.upperband),\n", + " vbt.to_2d_array(bb.middleband),\n", + " vbt.to_2d_array(bb.lowerband),\n", + " 0.30,\n", + " 0.15\n", + ")\n", + "mask = symbol_wrapper.wrap(mask)\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e38705d-863f-40c7-bfad-127b7551999d", + "metadata": {}, + "outputs": [], + "source": [ + "MaskGenerator = vbt.IF(\n", + " input_names=[\"high\", \"low\", \"uband\", \"mband\", \"lband\"],\n", + " param_names=[\"cond2_th\", \"cond4_th\"],\n", + " output_names=[\"mask\"]\n", + ").with_apply_func(generate_mask_1d_nb, takes_1d=True)\n", + "mask_generator = MaskGenerator.run(\n", + " data.get(\"High\"),\n", + " data.get(\"Low\"),\n", + " bb.upperband,\n", + " bb.middleband,\n", + " bb.lowerband,\n", + " [0.3, 0.4],\n", + " [0.1, 0.2],\n", + " param_product=True\n", + ")\n", + "mask_generator.mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2989ca1-0925-4d8f-bfb1-82606aed0d80", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def value_ago_1d_nb(arr, ago):\n", + " out = np.empty(arr.shape, dtype=np.float_)\n", + " for i in range(out.shape[0]):\n", + " if i - ago >= 0:\n", + " out[i] = arr[i - ago]\n", + " else:\n", + " out[i] = np.nan\n", + " return out\n", + "\n", + "arr = np.array([1, 2, 3])\n", + "value_ago_1d_nb(arr, 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24455bb7-98c0-45c1-9333-78c9424838ef", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def any_in_window_1d_nb(arr, window):\n", + " out = np.empty(arr.shape, dtype=np.bool_)\n", + " for i in range(out.shape[0]):\n", + " from_i = max(0, i + 1 - window)\n", + " to_i = i + 1\n", + " out[i] = np.any(arr[from_i:to_i])\n", + " return out\n", + "\n", + "arr = np.array([False, True, True, False, False])\n", + "any_in_window_1d_nb(arr, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a4cf9d70-b97c-4c47-b65a-8a4c13f4ab9f", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def any_in_var_window_1d_nb(arr, index, freq):\n", + " out = np.empty(arr.shape, dtype=np.bool_)\n", + " from_i = 0\n", + " for i in range(out.shape[0]):\n", + " if index[from_i] <= index[i] - freq:\n", + " for j in range(from_i + 1, index.shape[0]):\n", + " if index[j] > index[i] - freq:\n", + " from_i = j\n", + " break\n", + " to_i = i + 1\n", + " out[i] = np.any(arr[from_i:to_i])\n", + " return out\n", + "\n", + "arr = np.array([False, True, True, False, False])\n", + "index = vbt.date_range(\"2020\", freq=\"5min\", periods=len(arr)).values\n", + "freq = vbt.timedelta(\"10min\").to_timedelta64()\n", + "any_in_var_window_1d_nb(arr, index, freq)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fd6c643-a018-4f56-a52d-2352a5ecb430", + "metadata": {}, + "outputs": [], + "source": [ + "any_in_var_window_1d_nb(arr, vbt.dt.to_ns(index), vbt.dt.to_ns(freq))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a375903-eea0-4bd1-b46f-d71c0dbc719c", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.dt.to_ns(index)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e32610d0-01e8-4d4d-a3e6-b83218a8e877", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.dt.to_ns(index - np.datetime64(0, \"ns\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b775b6a0-2e64-4027-b84a-9aed88e0ae66", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.dt.to_ns(freq)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20a5f083-2f52-436e-9d23-3a0ef67d929d", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.dt.to_ns(freq) / 1000 / 1000 / 1000 / 60" + ] + }, + { + "cell_type": "markdown", + "id": "d1dc72ab-3858-49a0-93c2-a7d37c8073c7", + "metadata": {}, + "source": [ + "### Generators" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6bb839a-61f5-44be-8643-495078dbd208", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def place_func_nb(c, index):\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " weekday = vbt.dt_nb.weekday_nb(index[i])\n", + " hour = vbt.dt_nb.hour_nb(index[i])\n", + " if weekday == 2 and hour == 17:\n", + " c.out[out_i] = True\n", + " return out_i\n", + " return -1\n", + "\n", + "mask = vbt.pd_acc.signals.generate(\n", + " symbol_wrapper.shape,\n", + " place_func_nb,\n", + " vbt.dt.to_ns(symbol_wrapper.index),\n", + " wrapper=symbol_wrapper\n", + ")\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3124c4b-c02c-4e0e-9362-5c5b8114dd1b", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def place_func_nb(c, index):\n", + " last_i = -1\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " weekday = vbt.dt_nb.weekday_nb(index[i])\n", + " hour = vbt.dt_nb.hour_nb(index[i])\n", + " if weekday == 2 and hour == 17:\n", + " c.out[out_i] = True\n", + " last_i = out_i\n", + " else:\n", + " past_target_midnight = vbt.dt_nb.past_weekday_nb(index[i], 2)\n", + " past_target = past_target_midnight + 17 * vbt.dt_nb.h_ns\n", + " if (i > 0 and index[i - 1] < past_target) and \\\n", + " index[i] > past_target:\n", + " c.out[out_i] = True\n", + " last_i = out_i\n", + " return last_i\n", + "\n", + "mask = vbt.pd_acc.signals.generate(\n", + " symbol_wrapper.shape,\n", + " place_func_nb,\n", + " vbt.dt.to_ns(symbol_wrapper.index),\n", + " wrapper=symbol_wrapper\n", + ")\n", + "mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dce0bdc1-7956-4760-9b29-05cb8a7d13b6", + "metadata": {}, + "outputs": [], + "source": [ + "mask.index[mask.any(axis=1)].strftime('%A %m/%d/%Y')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1a5a0e2-23cd-48d3-ab36-2f94d8a03adc", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def place_func_nb(c, weekday, hour, index):\n", + " last_i = -1\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " weekday_now = vbt.dt_nb.weekday_nb(index[i])\n", + " hour_now = vbt.dt_nb.hour_nb(index[i])\n", + " if weekday_now == weekday and hour_now == hour:\n", + " c.out[out_i] = True\n", + " last_i = out_i\n", + " return last_i\n", + "\n", + "EntryGenerator = vbt.SignalFactory(\n", + " mode=\"entries\",\n", + " param_names=[\"weekday\", \"hour\"]\n", + ").with_place_func(\n", + " entry_place_func_nb=place_func_nb,\n", + " entry_settings=dict(\n", + " pass_params=[\"weekday\", \"hour\"],\n", + " ),\n", + " var_args=True\n", + ")\n", + "entry_generator = EntryGenerator.run(\n", + " symbol_wrapper.shape,\n", + " 2, \n", + " [0, 17],\n", + " vbt.dt.to_ns(symbol_wrapper.index),\n", + " input_index=symbol_wrapper.index,\n", + " input_columns=symbol_wrapper.columns\n", + ")\n", + "entry_generator.entries.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f29a5bde-9025-4946-932d-94d23e4b5ac0", + "metadata": {}, + "outputs": [], + "source": [ + "entry_generator.plot(column=(2, 0, \"BTCUSDT\")).show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "21f7b441-5d5a-4ac9-93bb-e0289eefa5d6", + "metadata": {}, + "source": [ + "#### Exits" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a58d3a9-e3fa-4839-908d-e6d8a0009e04", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def exit_place_func_nb(c):\n", + " c.out[0] = True\n", + " return 0\n", + "\n", + "entries = symbol_wrapper.fill(False)\n", + "entries.vbt.set(True, every=\"Q\", inplace=True)\n", + "entries.index[entries.any(axis=1)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03872da1-6712-4f1e-809d-92303661364e", + "metadata": {}, + "outputs": [], + "source": [ + "exits = entries.vbt.signals.generate_exits(exit_place_func_nb)\n", + "exits.index[exits.any(axis=1)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c80edcb4-602b-48e8-bf7b-98e805259758", + "metadata": {}, + "outputs": [], + "source": [ + "exits = entries.vbt.signals.generate_exits(\n", + " exit_place_func_nb,\n", + " wait=0\n", + ")\n", + "exits.index[exits.any(axis=1)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02c6a9cc-13f0-4998-8992-61110ea2d66f", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def exit_place_func_nb(c, index, wait_td):\n", + " last_i = -1\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " if index[i] >= index[c.from_i] + wait_td:\n", + " c.out[out_i] = True\n", + " last_i = out_i\n", + " break\n", + " return last_i\n", + "\n", + "exits = entries.vbt.signals.generate_exits(\n", + " exit_place_func_nb,\n", + " vbt.dt.to_ns(entries.index),\n", + " vbt.dt.to_ns(vbt.timedelta(\"7d\")),\n", + " wait=0\n", + ")\n", + "exits.index[exits.any(axis=1)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da4bdca3-d5e6-47f4-bf53-9ac89228df0a", + "metadata": {}, + "outputs": [], + "source": [ + "entries = symbol_wrapper.fill(False)\n", + "entries.vbt.set(True, every=\"5d\", inplace=True)\n", + "exits = entries.vbt.signals.generate_exits(\n", + " exit_place_func_nb,\n", + " vbt.dt.to_ns(entries.index),\n", + " vbt.dt.to_ns(vbt.timedelta(\"7d\")),\n", + " wait=0\n", + ")\n", + "exits.index[exits.any(axis=1)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6690a670-24a5-4eb9-8552-16f845ab3c50", + "metadata": {}, + "outputs": [], + "source": [ + "exits = entries.vbt.signals.generate_exits(\n", + " exit_place_func_nb,\n", + " vbt.dt.to_ns(entries.index),\n", + " vbt.dt.to_ns(vbt.timedelta(\"7d\")),\n", + " wait=0,\n", + " until_next=False\n", + ")\n", + "exits.index[exits.any(axis=1)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3729138-f008-4371-883f-31240e1aadb3", + "metadata": {}, + "outputs": [], + "source": [ + "exits = entries.vbt.signals.generate_exits(\n", + " exit_place_func_nb,\n", + " vbt.dt.to_ns(entries.index),\n", + " vbt.dt.to_ns(vbt.timedelta(\"7d\")),\n", + " wait=0,\n", + " until_next=False,\n", + " skip_until_exit=True\n", + ")\n", + "exits.index[exits.any(axis=1)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "939bb536-1973-4798-9f9e-bb65314a6f31", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def exit_place_func_nb(c, wait_td, index):\n", + " last_i = -1\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " if index[i] >= index[c.from_i] + wait_td:\n", + " c.out[out_i] = True\n", + " last_i = out_i\n", + " break\n", + " return last_i\n", + "\n", + "ExitGenerator = vbt.SignalFactory(\n", + " mode=\"exits\",\n", + " param_names=[\"wait_td\"]\n", + ").with_place_func(\n", + " exit_place_func_nb=exit_place_func_nb,\n", + " exit_settings=dict(\n", + " pass_params=[\"wait_td\"],\n", + " ),\n", + " var_args=True,\n", + " wait=0,\n", + " until_next=False,\n", + " skip_until_exit=True,\n", + " param_settings=dict(\n", + " wait_td=dict(\n", + " post_index_func=lambda x: x.map(lambda y: str(vbt.timedelta(y)))\n", + " )\n", + " ),\n", + ")\n", + "exit_generator = ExitGenerator.run(\n", + " entries,\n", + " [\n", + " vbt.timedelta(\"3d\").to_timedelta64(),\n", + " vbt.timedelta(\"7d\").to_timedelta64()\n", + " ],\n", + " symbol_wrapper.index.values\n", + ")\n", + "exit_generator.exits.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de964714-e321-4d79-bfe1-b466a087b293", + "metadata": {}, + "outputs": [], + "source": [ + "new_entries = exit_generator.entries.vbt.signals.first(\n", + " reset_by=exit_generator.exits,\n", + " allow_gaps=True, \n", + ")\n", + "new_entries.index[new_entries[(\"7 days 00:00:00\", \"BTCUSDT\")]]" + ] + }, + { + "cell_type": "markdown", + "id": "5b97e203-9f56-4390-a5cd-30ee65beb85f", + "metadata": {}, + "source": [ + "#### Both" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d838e1e5-0060-4a04-8c33-4c1f96e701a4", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def entry_place_func_nb(c, low, close, th):\n", + " if c.from_i == 0:\n", + " c.out[0] = True\n", + " return 0\n", + " exit_price = close[c.from_i - c.wait, c.col]\n", + " hit_price = exit_price * (1 - th)\n", + " last_i = -1\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " if low[i, c.col] <= hit_price:\n", + " c.out[out_i] = True\n", + " last_i = out_i\n", + " break\n", + " return last_i\n", + "\n", + "@njit\n", + "def exit_place_func_nb(c, high, close, th):\n", + " entry_price = close[c.from_i - c.wait, c.col]\n", + " hit_price = entry_price * (1 + th)\n", + " last_i = -1\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " if high[i, c.col] >= hit_price:\n", + " c.out[out_i] = True\n", + " last_i = out_i\n", + " break\n", + " return last_i\n", + "\n", + "entries, exits = vbt.pd_acc.signals.generate_both(\n", + " symbol_wrapper.shape,\n", + " entry_place_func_nb=entry_place_func_nb,\n", + " entry_place_args=(vbt.Rep(\"low\"), vbt.Rep(\"close\"), 0.1),\n", + " exit_place_func_nb=exit_place_func_nb,\n", + " exit_place_args=(vbt.Rep(\"high\"), vbt.Rep(\"close\"), 0.2),\n", + " wrapper=symbol_wrapper,\n", + " broadcast_named_args=dict(\n", + " high=data.get(\"High\"),\n", + " low=data.get(\"Low\"),\n", + " close=data.get(\"Close\")\n", + " ),\n", + " broadcast_kwargs=dict(\n", + " post_func=vbt.to_2d_array\n", + " )\n", + ")\n", + "\n", + "fig = data.plot(\n", + " symbol=\"BTCUSDT\", \n", + " ohlc_trace_kwargs=dict(opacity=0.5), \n", + " plot_volume=False\n", + ")\n", + "entries[\"BTCUSDT\"].vbt.signals.plot_as_entries(\n", + " y=data.get(\"Close\", \"BTCUSDT\"), fig=fig)\n", + "exits[\"BTCUSDT\"].vbt.signals.plot_as_exits(\n", + " y=data.get(\"Close\", \"BTCUSDT\"), fig=fig)\n", + "fig.show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1241ad12-8874-43f9-868f-337dba360343", + "metadata": {}, + "outputs": [], + "source": [ + "BothGenerator = vbt.SignalFactory(\n", + " mode=\"both\",\n", + " input_names=[\"high\", \"low\", \"close\"],\n", + " param_names=[\"entry_th\", \"exit_th\"]\n", + ").with_place_func(\n", + " entry_place_func_nb=entry_place_func_nb,\n", + " entry_settings=dict(\n", + " pass_inputs=[\"low\", \"close\"],\n", + " pass_params=[\"entry_th\"],\n", + " ),\n", + " exit_place_func_nb=exit_place_func_nb,\n", + " exit_settings=dict(\n", + " pass_inputs=[\"high\", \"close\"],\n", + " pass_params=[\"exit_th\"],\n", + " )\n", + ")\n", + "both_generator = BothGenerator.run(\n", + " data.get(\"High\"),\n", + " data.get(\"Low\"),\n", + " data.get(\"Close\"),\n", + " [0.1, 0.2],\n", + " [0.2, 0.3],\n", + " param_product=True\n", + ")\n", + "fig = data.plot(\n", + " symbol=\"BTCUSDT\", \n", + " ohlc_trace_kwargs=dict(opacity=0.5), \n", + " plot_volume=False\n", + ")\n", + "both_generator.plot(\n", + " column=(0.1, 0.3, \"BTCUSDT\"), \n", + " entry_y=data.get(\"Close\", \"BTCUSDT\"), \n", + " exit_y=data.get(\"Close\", \"BTCUSDT\"), \n", + " fig=fig\n", + ").show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "b3e76f00-2927-4af1-90e3-2c288f81225a", + "metadata": {}, + "source": [ + "#### Chained exits" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92d515cf-394e-4b2a-b5c9-8a7c702e9f45", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def exit_place_func_nb(c, low, request_price, fill_price_out):\n", + " _request_price = request_price[c.from_i - c.wait, c.col]\n", + " last_i = -1\n", + " for out_i in range(len(c.out)):\n", + " i = c.from_i + out_i\n", + " if low[i, c.col] <= _request_price:\n", + " fill_price_out[i, c.col] = _request_price\n", + " c.out[out_i] = True\n", + " last_i = out_i\n", + " break\n", + " return last_i\n", + "\n", + "ChainGenerator = vbt.SignalFactory(\n", + " mode=\"chain\",\n", + " input_names=[\"low\", \"request_price\"],\n", + " in_output_names=[\"fill_price_out\"]\n", + ").with_place_func(\n", + " exit_place_func_nb=exit_place_func_nb,\n", + " exit_settings=dict(\n", + " pass_inputs=[\"low\", \"request_price\"],\n", + " pass_in_outputs=[\"fill_price_out\"],\n", + " ),\n", + " fill_price_out=np.nan\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66e50dd1-4984-432a-8a3e-4ca8fa6a6a29", + "metadata": {}, + "outputs": [], + "source": [ + "fast_ma = vbt.talib(\"SMA\").run(\n", + " data.get(\"Close\"), \n", + " vbt.Default(10), \n", + " short_name=\"fast_ma\"\n", + ")\n", + "slow_ma = vbt.talib(\"SMA\").run(\n", + " data.get(\"Close\"), \n", + " vbt.Default(20), \n", + " short_name=\"slow_ma\"\n", + ")\n", + "entries = fast_ma.real_crossed_above(slow_ma)\n", + "entries.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45ad9fbd-9250-463a-b599-7c69eeb1247d", + "metadata": {}, + "outputs": [], + "source": [ + "chain_generator = ChainGenerator.run(\n", + " entries,\n", + " data.get(\"Low\"),\n", + " data.get(\"Close\") * (1 - 0.1)\n", + ")\n", + "request_mask = chain_generator.new_entries\n", + "request_mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f7a80bb3-b778-45df-a375-0df6672cf36b", + "metadata": {}, + "outputs": [], + "source": [ + "request_price = chain_generator.request_price\n", + "print(request_price[request_mask.any(axis=1)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bd9c6dd2-5da2-437f-b750-050b031160a1", + "metadata": {}, + "outputs": [], + "source": [ + "fill_mask = chain_generator.exits\n", + "fill_mask.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0691fbf4-4b08-43c0-a8ae-6075bdf0af12", + "metadata": {}, + "outputs": [], + "source": [ + "fill_price = chain_generator.fill_price_out\n", + "print(fill_price[fill_mask.any(axis=1)])" + ] + }, + { + "cell_type": "markdown", + "id": "0566319a-0cb4-4dd1-81f8-3ed7d2d5f9d6", + "metadata": {}, + "source": [ + "### Preset generators" + ] + }, + { + "cell_type": "markdown", + "id": "07b9bd34-8bc7-4383-9b3b-13d8fbb7ae99", + "metadata": {}, + "source": [ + "#### Random" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0081e390-e918-4edb-ac65-297da562a4a2", + "metadata": {}, + "outputs": [], + "source": [ + "btcusdt_wrapper = symbol_wrapper[\"BTCUSDT\"]\n", + "mask = vbt.pd_acc.signals.generate_random(\n", + " btcusdt_wrapper.shape,\n", + " prob=1 / 10,\n", + " wrapper=btcusdt_wrapper,\n", + " seed=42\n", + ")\n", + "mask_index = mask.index[mask]\n", + "(mask_index[1:] - mask_index[:-1]).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9cca388d-e1d0-4c83-a86b-ec7087d34e05", + "metadata": {}, + "outputs": [], + "source": [ + "monday_mask = btcusdt_wrapper.fill(False)\n", + "monday_mask.vbt.set(True, every=\"monday\", inplace=True)\n", + "mask = monday_mask.vbt.signals.generate_random_exits(wait=0)\n", + "mask_index = mask.index[mask]\n", + "mask_index.strftime(\"%W %A\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5df8383a-7c16-4d54-8b7a-6695d9b24546", + "metadata": {}, + "outputs": [], + "source": [ + "prob = np.linspace(0, 1, len(symbol_wrapper.index))\n", + "rprob = vbt.RPROB.run(\n", + " symbol_wrapper.shape,\n", + " vbt.Default(vbt.to_2d_pr_array(prob)),\n", + " seed=42,\n", + " input_index=symbol_wrapper.index,\n", + " input_columns=symbol_wrapper.columns\n", + ")\n", + "rprob.entries.astype(int).vbt.ts_heatmap().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ecf3d7f-ecb2-4cda-bd89-756d0a278c4c", + "metadata": {}, + "outputs": [], + "source": [ + "rprob = vbt.RPROB.run(\n", + " symbol_wrapper.shape,\n", + " [0.5, vbt.to_2d_pr_array(prob)],\n", + " seed=42,\n", + " input_index=symbol_wrapper.index,\n", + " input_columns=symbol_wrapper.columns\n", + ")\n", + "rprob.entries.sum()" + ] + }, + { + "cell_type": "markdown", + "id": "1646593e-9669-4757-8df9-3713a8e46ad6", + "metadata": {}, + "source": [ + "#### Stops" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecf076e4-697a-4da7-8afa-287820c2c1da", + "metadata": {}, + "outputs": [], + "source": [ + "new_entries, exits = entries.vbt.signals.generate_stop_exits(\n", + " data.get(\"Close\"),\n", + " data.get(\"High\"),\n", + " stop=0.1,\n", + " chain=True\n", + ")\n", + "print(new_entries[new_entries.any(axis=1)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe64925a-0fc3-42fa-b128-7f92cb650456", + "metadata": {}, + "outputs": [], + "source": [ + "print(exits[exits.any(axis=1)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e84b01df-1e52-4b51-8d91-5a057bed798a", + "metadata": {}, + "outputs": [], + "source": [ + "out_dict = {}\n", + "new_entries, exits = entries.vbt.signals.generate_stop_exits(\n", + " data.get(\"Close\"),\n", + " data.get(\"High\"),\n", + " stop=0.1,\n", + " chain=True,\n", + " out_dict=out_dict\n", + ")\n", + "print(out_dict[\"stop_ts\"][exits.any(axis=1)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92b3b055-6b0d-4968-8042-943ad3612b38", + "metadata": {}, + "outputs": [], + "source": [ + "stcx = vbt.STCX.run(\n", + " entries,\n", + " data.get(\"Open\"),\n", + " ts=data.get(\"Low\"),\n", + " follow_ts=data.get(\"High\"),\n", + " stop=-0.1,\n", + " trailing=[False, True],\n", + " wait=0\n", + ")\n", + "fig = data.plot(\n", + " symbol=\"BTCUSDT\", \n", + " ohlc_trace_kwargs=dict(opacity=0.5), \n", + " plot_volume=False\n", + ")\n", + "stcx.plot(\n", + " column=(-0.1, True, \"BTCUSDT\"), \n", + " entry_y=\"entry_ts\",\n", + " exit_y=\"stop_ts\", \n", + " fig=fig\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef68d113-7f90-4b68-810b-d31f6f13e1ea", + "metadata": {}, + "outputs": [], + "source": [ + "ohlcstcx = vbt.OHLCSTCX.run(\n", + " entries,\n", + " data.get(\"Close\"),\n", + " data.get(\"Open\"),\n", + " data.get(\"High\"),\n", + " data.get(\"Low\"),\n", + " data.get(\"Close\"),\n", + " sl_stop=vbt.Default(0.1),\n", + " tsl_stop=vbt.Default(0.15),\n", + " is_entry_open=False\n", + ")\n", + "ohlcstcx.plot(column=(\"BTCUSDT\")).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "222b01c5-3d86-4c5a-9c1e-dbfdd568407f", + "metadata": {}, + "outputs": [], + "source": [ + "print(ohlcstcx.stop_type_readable[ohlcstcx.exits.any(axis=1)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b7d0fdc-e793-46be-8559-398c418ccffc", + "metadata": {}, + "outputs": [], + "source": [ + "ohlcstcx = vbt.OHLCSTCX.run(\n", + " entries,\n", + " data.get(\"Close\"),\n", + " sl_stop=vbt.Default(0.1),\n", + " tsl_stop=vbt.Default(0.15),\n", + " is_entry_open=False\n", + ")\n", + "ohlcstcx.plot(column=(\"BTCUSDT\")).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04992035-5ab7-47c5-b4c1-468bc22221ce", + "metadata": {}, + "outputs": [], + "source": [ + "entry_pos_rank = entries.vbt.signals.pos_rank(allow_gaps=True)\n", + "short_entries = (entry_pos_rank >= 0) & (entry_pos_rank % 2 == 1)\n", + "\n", + "ohlcstcx = vbt.OHLCSTCX.run(\n", + " entries,\n", + " data.get(\"Close\"),\n", + " data.get(\"Open\"),\n", + " data.get(\"High\"),\n", + " data.get(\"Low\"),\n", + " data.get(\"Close\"),\n", + " tsl_th=vbt.Default(0.2),\n", + " tsl_stop=vbt.Default(0.1),\n", + " reverse=vbt.Default(short_entries),\n", + " is_entry_open=False\n", + ")\n", + "ohlcstcx.plot(column=(\"BTCUSDT\")).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d5b66ff-5a7f-46da-b515-a8d84c1c2334", + "metadata": {}, + "outputs": [], + "source": [ + "long_entries = ohlcstcx.new_entries.vbt & (~short_entries)\n", + "long_exits = ohlcstcx.exits.vbt.signals.first_after(long_entries)\n", + "short_entries = ohlcstcx.new_entries.vbt & short_entries\n", + "short_exits = ohlcstcx.exits.vbt.signals.first_after(short_entries)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad0ad046-3df7-4872-b943-03049982ce10", + "metadata": {}, + "outputs": [], + "source": [ + "fig = data.plot(\n", + " symbol=\"BTCUSDT\", \n", + " ohlc_trace_kwargs=dict(opacity=0.5), \n", + " plot_volume=False\n", + ")\n", + "long_entries[\"BTCUSDT\"].vbt.signals.plot_as_entries(\n", + " ohlcstcx.entry_price[\"BTCUSDT\"],\n", + " trace_kwargs=dict(marker=dict(color=\"limegreen\"), name=\"Long entries\"), \n", + " fig=fig\n", + ")\n", + "long_exits[\"BTCUSDT\"].vbt.signals.plot_as_exits(\n", + " ohlcstcx.stop_price[\"BTCUSDT\"],\n", + " trace_kwargs=dict(marker=dict(color=\"orange\"), name=\"Long exits\"),\n", + " fig=fig\n", + ")\n", + "short_entries[\"BTCUSDT\"].vbt.signals.plot_as_entries(\n", + " ohlcstcx.entry_price[\"BTCUSDT\"],\n", + " trace_kwargs=dict(marker=dict(color=\"magenta\"), name=\"Short entries\"),\n", + " fig=fig\n", + ")\n", + "short_exits[\"BTCUSDT\"].vbt.signals.plot_as_exits(\n", + " ohlcstcx.stop_price[\"BTCUSDT\"],\n", + " trace_kwargs=dict(marker=dict(color=\"red\"), name=\"Short exits\"),\n", + " fig=fig\n", + ").show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "bf356e79-4c3b-4819-ad6e-7f5c556be5fd", + "metadata": {}, + "source": [ + "## Pre-analysis" + ] + }, + { + "cell_type": "markdown", + "id": "30097f36-14d4-4a9b-bad9-d0cf8b6668f1", + "metadata": {}, + "source": [ + "### Ranking" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13ca8377-310f-4240-babf-5c99023e3d9f", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def rank_func_nb(c):\n", + " if c.sig_in_part_cnt == 1:\n", + " return 1\n", + " return 0\n", + "\n", + "sample_mask = pd.Series([True, True, False, True, True])\n", + "ranked = sample_mask.vbt.signals.rank(rank_func_nb)\n", + "ranked" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c44fcd0-d678-4ad8-8ef1-99d0f91aad40", + "metadata": {}, + "outputs": [], + "source": [ + "ranked == 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "693bd2d7-24bf-4c0c-80e4-8665b1649d2d", + "metadata": {}, + "outputs": [], + "source": [ + "ranked = sample_mask.vbt.signals.rank(rank_func_nb, after_false=True)\n", + "ranked == 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ba16d6f-f5fa-4bca-b8a5-fa4891c10587", + "metadata": {}, + "outputs": [], + "source": [ + "sample_entries = pd.Series([True, True, True, True, True])\n", + "sample_exits = pd.Series([False, False, True, False, False])\n", + "ranked = sample_entries.vbt.signals.rank(\n", + " rank_func_nb, \n", + " reset_by=sample_exits\n", + ")\n", + "ranked == 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "caff048c-a2ae-4e1f-ab1c-15c60a87a7ec", + "metadata": {}, + "outputs": [], + "source": [ + "ranked = sample_entries.vbt.signals.rank(\n", + " rank_func_nb, \n", + " reset_by=sample_exits,\n", + " after_reset=True\n", + ")\n", + "ranked == 1" + ] + }, + { + "cell_type": "markdown", + "id": "a973bf32-e451-4786-867e-5c90c82bc6f5", + "metadata": {}, + "source": [ + "#### Preset rankers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e2b978-480f-4149-8b31-63004f9ef9b9", + "metadata": {}, + "outputs": [], + "source": [ + "sample_mask = pd.Series([True, True, False, True, True])\n", + "ranked = sample_mask.vbt.signals.pos_rank()\n", + "ranked" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6415bbd4-2e5d-4484-ab21-0478566513c8", + "metadata": {}, + "outputs": [], + "source": [ + "ranked == 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4b19ce3-2a0e-47ff-bc1b-c1a3695cbcaa", + "metadata": {}, + "outputs": [], + "source": [ + "ranked = sample_mask.vbt.signals.pos_rank(allow_gaps=True)\n", + "ranked" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "369d7a73-c8cb-4579-8f42-28510b948f63", + "metadata": {}, + "outputs": [], + "source": [ + "(ranked > -1) & (ranked % 2 == 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50d51cf9-c42b-4468-a54f-be011327e474", + "metadata": {}, + "outputs": [], + "source": [ + "ranked = sample_mask.vbt.signals.partition_pos_rank(allow_gaps=True)\n", + "ranked" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d4864bb-e549-4016-bced-cd39ca4b5b70", + "metadata": {}, + "outputs": [], + "source": [ + "ranked == 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c624d629-f1fc-4c33-ad43-5c85496cb483", + "metadata": {}, + "outputs": [], + "source": [ + "entry_cond1 = data.get(\"Low\") < bb.lowerband\n", + "entry_cond2 = bandwidth > 0.3\n", + "entry_cond3 = data.get(\"High\") > bb.upperband\n", + "entry_cond4 = bandwidth < 0.15\n", + "entries = (entry_cond1 & entry_cond2) | (entry_cond3 & entry_cond4)\n", + "\n", + "entries.vbt.signals.from_nth(0).sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8b250d1-ba86-44ba-b532-cd42d261545a", + "metadata": {}, + "outputs": [], + "source": [ + "entries.vbt.signals.from_nth(1).sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "047fbe11-5da9-439f-b5c5-b1109f32fcbf", + "metadata": {}, + "outputs": [], + "source": [ + "entries.vbt.signals.from_nth(2).sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80f7a1d3-16eb-4aee-bb06-91cc09556a8e", + "metadata": {}, + "outputs": [], + "source": [ + "exit_cond1 = data.get(\"High\") > bb.upperband\n", + "exit_cond2 = bandwidth > 0.3\n", + "exit_cond3 = data.get(\"Low\") < bb.lowerband\n", + "exit_cond4 = bandwidth < 0.15\n", + "exits = (exit_cond1 & exit_cond2) | (exit_cond3 & exit_cond4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24028cc0-e4f5-4025-a107-38d92c01c4c1", + "metadata": {}, + "outputs": [], + "source": [ + "exits.vbt.signals.pos_rank_after(entries, reset_wait=0).max() + 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a4b2be2-8e5c-463a-9655-1f420b0017c9", + "metadata": {}, + "outputs": [], + "source": [ + "entries.vbt.signals.pos_rank_after(exits).max() + 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0beeb7b-b5c4-4f48-9791-8a0828e956ec", + "metadata": {}, + "outputs": [], + "source": [ + "ranked = exits.vbt.signals.pos_rank_after(entries, reset_wait=0)\n", + "highest_ranked = ranked == ranked.max()\n", + "print(ranked[highest_ranked.any(axis=1)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "805ef372-1748-4dc4-825a-46e3f55fb3f7", + "metadata": {}, + "outputs": [], + "source": [ + "exits_after = exits.vbt.signals.from_nth_after(0, entries, reset_wait=0)\n", + "(exits ^ exits_after).sum()" + ] + }, + { + "cell_type": "markdown", + "id": "0db70553-be70-4a1c-9ce4-92f906ba291b", + "metadata": {}, + "source": [ + "#### Mapped ranks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9aeaa467-6a09-4a45-8c4a-abd587c7148d", + "metadata": {}, + "outputs": [], + "source": [ + "mask = bandwidth.vbt > vbt.Param(np.arange(1, 10) / 10, name=\"bw_th\")\n", + "mapped_ranks = mask.vbt.signals.pos_rank(as_mapped=True)\n", + "mapped_ranks.max(group_by=vbt.ExceptLevel(\"symbol\"))" + ] + }, + { + "cell_type": "markdown", + "id": "5ef8deef-a5d9-40b0-9819-78dbac8f5b27", + "metadata": {}, + "source": [ + "### Cleaning" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "402c23ed-0a63-4b9c-b215-ac7dacf083e5", + "metadata": {}, + "outputs": [], + "source": [ + "new_exits = exits.vbt.signals.first_after(entries, reset_wait=0)\n", + "new_entries = entries.vbt.signals.first_after(exits)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b79491ce-3291-46ee-a2a6-5aa3ac9a2de3", + "metadata": {}, + "outputs": [], + "source": [ + "symbol = \"ETHUSDT\"\n", + "fig = data.plot(\n", + " symbol=symbol, \n", + " ohlc_trace_kwargs=dict(opacity=0.5), \n", + " plot_volume=False\n", + ")\n", + "entries[symbol].vbt.signals.plot_as_entries(\n", + " y=data.get(\"Close\", symbol), fig=fig)\n", + "exits[symbol].vbt.signals.plot_as_exits(\n", + " y=data.get(\"Close\", symbol), fig=fig)\n", + "new_entries[symbol].vbt.signals.plot_as_entry_marks(\n", + " y=data.get(\"Close\", symbol), fig=fig, \n", + " trace_kwargs=dict(name=\"New entries\"))\n", + "new_exits[symbol].vbt.signals.plot_as_exit_marks(\n", + " y=data.get(\"Close\", symbol), fig=fig, \n", + " trace_kwargs=dict(name=\"New exits\")).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "883ff6e0-7777-487e-b58b-80bc38f2634c", + "metadata": {}, + "outputs": [], + "source": [ + "new_entries, new_exits = entries.vbt.signals.clean(exits)" + ] + }, + { + "cell_type": "markdown", + "id": "82b4e080-808a-4ab3-a700-81ecf0b75d98", + "metadata": {}, + "source": [ + "### Duration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b9ac018-2643-4ef5-aded-a617062e726a", + "metadata": {}, + "outputs": [], + "source": [ + "ranges = entries.vbt.signals.between_ranges()\n", + "print(ranges.records)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa971658-1359-403b-9e65-a4a3ce390751", + "metadata": {}, + "outputs": [], + "source": [ + "ranges.start_idx.min(wrap_kwargs=dict(to_index=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35efd36c-c2f2-43c0-af21-9df040ec7cc8", + "metadata": {}, + "outputs": [], + "source": [ + "print(ranges.duration.describe(wrap_kwargs=dict(to_timedelta=True)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b944e41-0a8f-498e-82ce-30c46d4b2db6", + "metadata": {}, + "outputs": [], + "source": [ + "ranges = entries.vbt.signals.between_ranges(target=exits)\n", + "ranges.avg_duration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b920e56-d16d-400a-9382-a23d63162fda", + "metadata": {}, + "outputs": [], + "source": [ + "new_ranges = new_entries.vbt.signals.between_ranges(target=new_exits)\n", + "new_ranges.avg_duration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8528ab4c-22b6-4af0-87f5-5c6063bf68c4", + "metadata": {}, + "outputs": [], + "source": [ + "ranges = entries.vbt.signals.between_ranges(target=exits, relation=\"manyone\")\n", + "ranges.avg_duration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a047b2b8-a27c-4ac0-b124-9f6cfbe513b4", + "metadata": {}, + "outputs": [], + "source": [ + "new_ranges = new_entries.vbt.signals.between_ranges(target=new_exits, relation=\"manyone\")\n", + "new_ranges.avg_duration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ab9a53b-2f2e-4601-8e58-c04d86892ee5", + "metadata": {}, + "outputs": [], + "source": [ + "ranges = entries.vbt.signals.partition_ranges()\n", + "print(ranges.duration.describe())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c35fb94f-04e8-4e6f-a2a4-1f1542d02edb", + "metadata": {}, + "outputs": [], + "source": [ + "new_ranges = new_entries.vbt.signals.partition_ranges()\n", + "print(new_ranges.duration.describe())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5cdef2ce-f8ef-44c6-8eac-42172560d93c", + "metadata": {}, + "outputs": [], + "source": [ + "ranges = entries.vbt.signals.between_partition_ranges()\n", + "print(ranges.duration.describe(wrap_kwargs=dict(to_timedelta=True)))" + ] + }, + { + "cell_type": "markdown", + "id": "541e5a28-aa4f-4056-b4de-6ea7ecdb9064", + "metadata": {}, + "source": [ + "### Overview" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe7bcbc0-4300-4c7b-a4d6-17e903bb18bb", + "metadata": {}, + "outputs": [], + "source": [ + "entries.vbt.signals.stats(column=\"BTCUSDT\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a650256c-a112-4ef2-9480-b867ae5b1892", + "metadata": {}, + "outputs": [], + "source": [ + "entries.vbt.signals.stats(column=\"BTCUSDT\", settings=dict(target=exits))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1cf7ad48-e516-4f88-897b-29cc5a0f7d64", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/to_explore/notebooks/StopSignals.ipynb b/to_explore/notebooks/StopSignals.ipynb new file mode 100644 index 0000000..f59e374 --- /dev/null +++ b/to_explore/notebooks/StopSignals.ipynb @@ -0,0 +1,771 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notebook for the article [Stop Loss, Trailing Stop, or Take Profit? 2 Million Backtests Shed Light](https://polakowo.medium.com/stop-loss-trailing-stop-or-take-profit-2-million-backtests-shed-light-dde23bda40be)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro import *\n", + "# whats_imported()\n", + "\n", + "import ipywidgets\n", + "\n", + "vbt.settings.set_theme('dark')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "seed = 42\n", + "symbols = [\n", + " \"BTC-USD\", \"ETH-USD\", \"XRP-USD\", \"BCH-USD\", \"LTC-USD\", \n", + " \"BNB-USD\", \"EOS-USD\", \"XLM-USD\", \"XMR-USD\", \"ADA-USD\"\n", + "]\n", + "start_date = vbt.utc_timestamp(\"2018-01-01\")\n", + "end_date = vbt.utc_timestamp(\"2021-01-01\")\n", + "time_delta = end_date - start_date\n", + "window_len = vbt.timedelta(\"180d\")\n", + "window_cnt = 400\n", + "exit_types = [\"SL\", \"TS\", \"TP\", \"Random\", \"Holding\"]\n", + "step = 0.01\n", + "stops = np.arange(step, 1 + step, step)\n", + "\n", + "vbt.settings.wrapping[\"freq\"] = \"d\"\n", + "vbt.settings.plotting[\"layout\"][\"template\"] = \"vbt_dark\"\n", + "vbt.settings.portfolio[\"init_cash\"] = 100.\n", + "\n", + "print(pd.Series({\n", + " \"Start date\": start_date,\n", + " \"End date\": end_date,\n", + " \"Time period (days)\": time_delta.days,\n", + " \"Assets\": len(symbols),\n", + " \"Window length\": window_len,\n", + " \"Windows\": window_cnt,\n", + " \"Exit types\": len(exit_types),\n", + " \"Stop values\": len(stops),\n", + " \"Tests per asset\": window_cnt * len(stops) * len(exit_types),\n", + " \"Tests per window\": len(symbols) * len(stops) * len(exit_types),\n", + " \"Tests per exit type\": len(symbols) * window_cnt * len(stops),\n", + " \"Tests per stop type and value\": len(symbols) * window_cnt,\n", + " \"Tests total\": len(symbols) * window_cnt * len(stops) * len(exit_types)\n", + "}))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cols = [\"Open\", \"Low\", \"High\", \"Close\", \"Volume\"]\n", + "yfdata = vbt.YFData.pull(symbols, start=start_date, end=end_date)\n", + "\n", + "print(yfdata.data.keys())\n", + "print(yfdata.data[\"BTC-USD\"].shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "yfdata.plot(symbol=\"BTC-USD\").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ohlcv = yfdata.concat()\n", + "\n", + "print(ohlcv.keys())\n", + "print(ohlcv[\"Open\"].shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "splitter = vbt.Splitter.from_n_rolling(\n", + " ohlcv[\"Open\"].index, \n", + " n=window_cnt,\n", + " length=window_len.days\n", + ")\n", + "\n", + "split_ohlcv = {}\n", + "for k, v in ohlcv.items():\n", + " split_ohlcv[k] = splitter.take(v, into=\"reset_stacked\")\n", + "print(split_ohlcv[\"Open\"].shape)\n", + "\n", + "split_indexes = splitter.take(ohlcv[\"Open\"].index)\n", + "print(split_indexes)\n", + "print(split_indexes[10])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(split_ohlcv[\"Open\"].columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "entries = pd.DataFrame.vbt.signals.empty_like(split_ohlcv[\"Open\"])\n", + "entries.iloc[0, :] = True\n", + "\n", + "print(entries.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# We use OHLCSTX instead of built-in stop-loss in Portfolio.from_signals\n", + "# because we want to analyze signals before simulation + it's easier to construct param grids\n", + "# For reality check, run the same setup using Portfolio.from_signals alone\n", + "\n", + "sl_ohlcstx = vbt.OHLCSTX.run(\n", + " entries, \n", + " entry_price=split_ohlcv[\"Close\"], \n", + " open=split_ohlcv[\"Open\"], \n", + " high=split_ohlcv[\"High\"], \n", + " low=split_ohlcv[\"Low\"], \n", + " close=split_ohlcv[\"Close\"], \n", + " sl_stop=list(stops),\n", + " stop_type=None\n", + ")\n", + "sl_exits = sl_ohlcstx.exits.copy()\n", + "sl_price = sl_ohlcstx.close.copy()\n", + "sl_price[sl_exits] = sl_ohlcstx.stop_price\n", + "del sl_ohlcstx\n", + "\n", + "print(sl_exits.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tsl_ohlcstx = vbt.OHLCSTX.run(\n", + " entries, \n", + " entry_price=split_ohlcv[\"Close\"], \n", + " open=split_ohlcv[\"Open\"], \n", + " high=split_ohlcv[\"High\"], \n", + " low=split_ohlcv[\"Low\"], \n", + " close=split_ohlcv[\"Close\"], \n", + " tsl_stop=list(stops),\n", + " stop_type=None\n", + ")\n", + "tsl_exits = tsl_ohlcstx.exits.copy()\n", + "tsl_price = tsl_ohlcstx.close.copy()\n", + "tsl_price[tsl_exits] = tsl_ohlcstx.stop_price\n", + "del tsl_ohlcstx\n", + "\n", + "print(tsl_exits.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tp_ohlcstx = vbt.OHLCSTX.run(\n", + " entries, \n", + " entry_price=split_ohlcv[\"Close\"], \n", + " open=split_ohlcv[\"Open\"], \n", + " high=split_ohlcv[\"High\"], \n", + " low=split_ohlcv[\"Low\"], \n", + " close=split_ohlcv[\"Close\"], \n", + " tp_stop=list(stops),\n", + " stop_type=None\n", + ")\n", + "tp_exits = tp_ohlcstx.exits.copy()\n", + "tp_price = tp_ohlcstx.close.copy()\n", + "tp_price[tp_exits] = tp_ohlcstx.stop_price\n", + "del tp_ohlcstx\n", + "\n", + "print(tp_exits.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def rename_stop_level(df):\n", + " return df.vbt.rename_levels({\n", + " \"ohlcstx_sl_stop\": \"stop_value\",\n", + " \"ohlcstx_tsl_stop\": \"stop_value\",\n", + " \"ohlcstx_tp_stop\": \"stop_value\"\n", + " }, strict=False)\n", + "\n", + "sl_exits = rename_stop_level(sl_exits)\n", + "tsl_exits = rename_stop_level(tsl_exits)\n", + "tp_exits = rename_stop_level(tp_exits)\n", + "\n", + "sl_price = rename_stop_level(sl_price)\n", + "tsl_price = rename_stop_level(tsl_price)\n", + "tp_price = rename_stop_level(tp_price)\n", + "\n", + "print(sl_exits.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(pd.Series({\n", + " \"SL\": sl_exits.vbt.signals.total().mean(),\n", + " \"TS\": tsl_exits.vbt.signals.total().mean(),\n", + " \"TP\": tp_exits.vbt.signals.total().mean()\n", + "}, name=\"avg_num_signals\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def groupby_stop_value(df):\n", + " return df.vbt.signals.total().groupby(\"stop_value\").mean()\n", + "\n", + "pd.DataFrame({\n", + " \"Stop Loss\": groupby_stop_value(sl_exits),\n", + " \"Trailing Stop\": groupby_stop_value(tsl_exits),\n", + " \"Take Profit\": groupby_stop_value(tp_exits)\n", + "}).vbt.plot(\n", + " xaxis_title=\"Stop value\", \n", + " yaxis_title=\"Avg number of signals\"\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sl_exits.iloc[-1, :] = True\n", + "tsl_exits.iloc[-1, :] = True\n", + "tp_exits.iloc[-1, :] = True\n", + "\n", + "sl_exits = sl_exits.vbt.signals.first_after(entries)\n", + "tsl_exits = tsl_exits.vbt.signals.first_after(entries)\n", + "tp_exits = tp_exits.vbt.signals.first_after(entries)\n", + "\n", + "print(pd.Series({\n", + " \"SL\": sl_exits.vbt.signals.total().mean(),\n", + " \"TS\": tsl_exits.vbt.signals.total().mean(),\n", + " \"TP\": tp_exits.vbt.signals.total().mean()\n", + "}, name=\"avg_num_signals\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hold_exits = pd.DataFrame.vbt.signals.empty_like(sl_exits)\n", + "hold_exits.iloc[-1, :] = True\n", + "hold_price = vbt.broadcast_to(split_ohlcv[\"Close\"], sl_price)\n", + "\n", + "print(hold_exits.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rand_exits = hold_exits.vbt.shuffle(seed=seed)\n", + "rand_price = hold_price\n", + "\n", + "print(rand_exits.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "exits = pd.DataFrame.vbt.concat(\n", + " sl_exits, \n", + " tsl_exits, \n", + " tp_exits, \n", + " rand_exits, \n", + " hold_exits, \n", + " keys=pd.Index(exit_types, name=\"exit_type\")\n", + ")\n", + "del sl_exits\n", + "del tsl_exits\n", + "del tp_exits\n", + "del rand_exits\n", + "del hold_exits\n", + "\n", + "print(exits.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "price = pd.DataFrame.vbt.concat(\n", + " sl_price, \n", + " tsl_price, \n", + " tp_price, \n", + " rand_price, \n", + " hold_price, \n", + " keys=pd.Index(exit_types, name=\"exit_type\")\n", + ")\n", + "del sl_price\n", + "del tsl_price\n", + "del tp_price\n", + "del rand_price\n", + "del hold_price\n", + "\n", + "print(price.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(exits.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(exits.vbt.getsize())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(price.vbt.getsize())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "avg_distance = entries.vbt.signals.between_ranges(target=exits)\\\n", + " .duration.mean()\\\n", + " .groupby([\"exit_type\", \"stop_value\"])\\\n", + " .mean()\\\n", + " .unstack(level=\"exit_type\")\n", + "\n", + "print(avg_distance.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "avg_distance[exit_types].vbt.plot(\n", + " xaxis_title=\"Stop value\", \n", + " yaxis_title=\"Avg distance to entry\"\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "pf = vbt.Portfolio.from_signals(\n", + " split_ohlcv[\"Close\"], \n", + " entries, \n", + " exits, \n", + " price=price\n", + ")\n", + "\n", + "print(len(pf.orders))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "total_return = pf.total_return\n", + "del pf\n", + "\n", + "print(total_return.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import gc\n", + "\n", + "total_returns = []\n", + "for i in vbt.ProgressBar(range(len(exit_types))):\n", + " chunk_mask = exits.columns.get_level_values(\"exit_type\") == exit_types[i]\n", + " chunk_pf = vbt.Portfolio.from_signals(\n", + " split_ohlcv[\"Close\"], \n", + " entries, \n", + " exits.loc[:, chunk_mask],\n", + " price=price.loc[:, chunk_mask]\n", + " )\n", + " total_returns.append(chunk_pf.total_return)\n", + " \n", + " del chunk_pf\n", + " gc.collect()\n", + " \n", + "total_return = pd.concat(total_returns)\n", + "\n", + "print(total_return.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "return_by_type = total_return.unstack(level=\"exit_type\")[exit_types]\n", + "\n", + "print(return_by_type[\"Holding\"].describe(percentiles=[]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "purple_color = vbt.settings[\"plotting\"][\"color_schema\"][\"purple\"]\n", + "return_by_type[\"Holding\"].vbt.histplot(\n", + " xaxis_title=\"Total return\",\n", + " xaxis_tickformat=\".2%\",\n", + " yaxis_title=\"Count\",\n", + " trace_kwargs=dict(marker_color=purple_color)\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(pd.DataFrame({\n", + " \"Mean\": return_by_type.mean(),\n", + " \"Median\": return_by_type.median(),\n", + " \"Std\": return_by_type.std(),\n", + "}))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "return_by_type.vbt.boxplot(\n", + " trace_kwargs=dict(boxpoints=False),\n", + " yaxis_title=\"Total return\",\n", + " yaxis_tickformat=\".2%\"\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print((return_by_type > 0).mean().rename(\"win_rate\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "init_cash = vbt.settings.portfolio[\"init_cash\"]\n", + "\n", + "def get_expectancy(return_by_type, level_name):\n", + " grouped = return_by_type.groupby(level_name, axis=0)\n", + " win_rate = grouped.apply(lambda x: (x > 0).mean())\n", + " avg_win = grouped.apply(lambda x: init_cash * x[x > 0].mean())\n", + " avg_win = avg_win.fillna(0)\n", + " avg_loss = grouped.apply(lambda x: init_cash * x[x < 0].mean())\n", + " avg_loss = avg_loss.fillna(0)\n", + " return win_rate * avg_win - (1 - win_rate) * np.abs(avg_loss)\n", + " \n", + "expectancy_by_stop = get_expectancy(return_by_type, \"stop_value\")\n", + "\n", + "print(expectancy_by_stop.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "expectancy_by_stop.vbt.plot(\n", + " xaxis_title=\"Stop value\", \n", + " yaxis_title=\"Expectancy\"\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "return_values = np.sort(return_by_type[\"Holding\"].values)\n", + "idxs = np.ceil(np.linspace(0, len(return_values) - 1, 21)).astype(int)\n", + "bins = return_values[idxs][:-1]\n", + "\n", + "def bin_return(return_by_type):\n", + " classes = pd.cut(return_by_type[\"Holding\"], bins=bins, right=True)\n", + " new_level = np.array(classes.apply(lambda x: x.right))\n", + " new_level = pd.Index(new_level, name=\"bin_right\")\n", + " return return_by_type.vbt.add_levels(new_level, axis=0)\n", + "\n", + "binned_return_by_type = bin_return(return_by_type)\n", + "\n", + "expectancy_by_bin = get_expectancy(binned_return_by_type, \"bin_right\")\n", + "\n", + "expectancy_by_bin.vbt.plot(\n", + " trace_kwargs=dict(mode=\"lines\"),\n", + " xaxis_title=\"Total return of holding\",\n", + " xaxis_tickformat=\".2%\",\n", + " yaxis_title=\"Expectancy\"\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "range_starts = pd.DatetimeIndex(list(map(lambda x: x[0], split_indexes)))\n", + "range_ends = pd.DatetimeIndex(list(map(lambda x: x[-1], split_indexes)))\n", + "\n", + "symbol_lvl = return_by_type.index.get_level_values(\"symbol\")\n", + "split_lvl = return_by_type.index.get_level_values(\"split\")\n", + "range_start_lvl = range_starts[split_lvl]\n", + "range_end_lvl = range_ends[split_lvl]\n", + "\n", + "asset_multi_select = ipywidgets.SelectMultiple(\n", + " options=symbols,\n", + " value=symbols,\n", + " rows=len(symbols),\n", + " description=\"Symbols\"\n", + ")\n", + "dates = np.unique(yfdata.wrapper.index)\n", + "date_range_slider = ipywidgets.SelectionRangeSlider(\n", + " options=dates,\n", + " index=(0, len(dates)-1),\n", + " orientation=\"horizontal\",\n", + " readout=False,\n", + " continuous_update=False\n", + ")\n", + "range_start_label = ipywidgets.Label()\n", + "range_end_label = ipywidgets.Label()\n", + "metric_dropdown = ipywidgets.Dropdown(\n", + " options=[\"Mean\", \"Median\", \"Win Rate\", \"Expectancy\"],\n", + " value=\"Expectancy\"\n", + ")\n", + "stop_scatter = vbt.Scatter(\n", + " trace_names=exit_types,\n", + " x_labels=stops, \n", + " xaxis_title=\"Stop value\", \n", + " yaxis_title=\"Expectancy\"\n", + ")\n", + "stop_scatter_img = ipywidgets.Image(\n", + " format=\"png\",\n", + " width=stop_scatter.fig.layout.width,\n", + " height=stop_scatter.fig.layout.height\n", + ")\n", + "bin_scatter = vbt.Scatter(\n", + " trace_names=exit_types,\n", + " x_labels=expectancy_by_bin.index, \n", + " trace_kwargs=dict(mode=\"lines\"),\n", + " xaxis_title=\"Total return of holding\",\n", + " xaxis_tickformat=\"%\",\n", + " yaxis_title=\"Expectancy\"\n", + ")\n", + "bin_scatter_img = ipywidgets.Image(\n", + " format=\"png\",\n", + " width=bin_scatter.fig.layout.width,\n", + " height=bin_scatter.fig.layout.height\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def update_scatter(*args, **kwargs):\n", + " _symbols = asset_multi_select.value\n", + " _from = date_range_slider.value[0]\n", + " _to = date_range_slider.value[1]\n", + " _metric_name = metric_dropdown.value\n", + " \n", + " range_mask = (range_start_lvl >= _from) & (range_end_lvl <= _to)\n", + " asset_mask = symbol_lvl.isin(_symbols)\n", + " filt = return_by_type[range_mask & asset_mask]\n", + " \n", + " filt_binned = bin_return(filt)\n", + " if _metric_name == \"Mean\":\n", + " filt_metric = filt.groupby(\"stop_value\").mean()\n", + " filt_bin_metric = filt_binned.groupby(\"bin_right\").mean()\n", + " elif _metric_name == \"Median\":\n", + " filt_metric = filt.groupby(\"stop_value\").median()\n", + " filt_bin_metric = filt_binned.groupby(\"bin_right\").median()\n", + " elif _metric_name == \"Win Rate\":\n", + " filt_metric = (filt > 0).groupby(\"stop_value\").mean()\n", + " filt_bin_metric = (filt_binned > 0).groupby(\"bin_right\").mean()\n", + " elif _metric_name == \"Expectancy\":\n", + " filt_metric = get_expectancy(filt, \"stop_value\")\n", + " filt_bin_metric = get_expectancy(filt_binned, \"bin_right\")\n", + " \n", + " stop_scatter.fig.update_layout(yaxis_title=_metric_name)\n", + " stop_scatter.update(filt_metric)\n", + " stop_scatter_img.value = stop_scatter.fig.to_image(format=\"png\")\n", + " \n", + " bin_scatter.fig.update_layout(yaxis_title=_metric_name)\n", + " bin_scatter.update(filt_bin_metric)\n", + " bin_scatter_img.value = bin_scatter.fig.to_image(format=\"png\")\n", + " \n", + " range_start_label.value = np.datetime_as_string(_from.to_datetime64(), unit=\"D\")\n", + " range_end_label.value = np.datetime_as_string(_to.to_datetime64(), unit=\"D\")\n", + " \n", + "asset_multi_select.observe(update_scatter, names=\"value\")\n", + "date_range_slider.observe(update_scatter, names=\"value\")\n", + "metric_dropdown.observe(update_scatter, names=\"value\")\n", + "update_scatter()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dashboard = ipywidgets.VBox([\n", + " asset_multi_select,\n", + " ipywidgets.HBox([\n", + " range_start_label,\n", + " date_range_slider,\n", + " range_end_label\n", + " ]),\n", + " metric_dropdown,\n", + " stop_scatter_img,\n", + " bin_scatter_img\n", + "])\n", + "dashboard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dashboard.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/to_explore/notebooks/SuperTrend.ipynb b/to_explore/notebooks/SuperTrend.ipynb new file mode 100644 index 0000000..95b8772 --- /dev/null +++ b/to_explore/notebooks/SuperTrend.ipynb @@ -0,0 +1,2472 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d2ac023c-531a-487a-81f0-fcce3c9925b6", + "metadata": {}, + "source": [ + "# SuperFast SuperTrend" + ] + }, + { + "cell_type": "markdown", + "id": "e052b3c7-c1ec-4245-8025-fbacbc71e1a6", + "metadata": {}, + "source": [ + "## Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ea9cb06-d59c-4c2c-807a-23854fa16676", + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro import *\n", + "# whats_imported()\n", + "\n", + "vbt.settings.set_theme('dark')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f8c4e61-3d13-4659-b559-3cef404adfb4", + "metadata": {}, + "outputs": [], + "source": [ + "# data = vbt.BinanceData.pull(\n", + "# ['BTCUSDT', 'ETHUSDT'], \n", + "# start='2020-01-01 UTC',\n", + "# end='2022-01-01 UTC',\n", + "# timeframe='1h'\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "508e2929-eab9-4db9-b581-d3a78959e173", + "metadata": {}, + "outputs": [], + "source": [ + "# data.to_hdf('my_data.h5')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f4587b7-a9d2-4765-a52d-1ea9fa32bdbf", + "metadata": {}, + "outputs": [], + "source": [ + "data = vbt.HDFData.pull('my_data.h5')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87de3e25-9836-45a4-9452-a3a0976a1d5d", + "metadata": {}, + "outputs": [], + "source": [ + "data.data['BTCUSDT'].info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9389c053-5429-4fe7-a05f-cd2961ebaf94", + "metadata": {}, + "outputs": [], + "source": [ + "data.stats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b10cf7a1-88a5-42d4-820b-ae7752ab85f6", + "metadata": {}, + "outputs": [], + "source": [ + "high = data.get('High')\n", + "low = data.get('Low')\n", + "close = data.get('Close')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d086a72b-de2e-4b0f-875d-54de8753b1ab", + "metadata": {}, + "outputs": [], + "source": [ + "print(close)" + ] + }, + { + "cell_type": "markdown", + "id": "cc9fb395-725a-4376-8d89-f3cfbc0fb9dd", + "metadata": {}, + "source": [ + "## Design" + ] + }, + { + "cell_type": "markdown", + "id": "94ad1367-4ca1-4e8a-8628-39242e93ceea", + "metadata": {}, + "source": [ + "### Pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b94872b9-23db-42d4-b5d5-ec15176c4f6e", + "metadata": {}, + "outputs": [], + "source": [ + "def get_med_price(high, low):\n", + " return (high + low) / 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45fdd6c7-4566-4468-8872-6f1f6c62991e", + "metadata": {}, + "outputs": [], + "source": [ + "def get_atr(high, low, close, period):\n", + " tr0 = abs(high - low)\n", + " tr1 = abs(high - close.shift())\n", + " tr2 = abs(low - close.shift())\n", + " tr = pd.concat((tr0, tr1, tr2), axis=1).max(axis=1)\n", + " atr = tr.ewm(alpha=1 / period, adjust=False, min_periods=period).mean()\n", + " return atr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37baaa9e-901d-4f49-a143-a9e1a24a20e7", + "metadata": {}, + "outputs": [], + "source": [ + "def get_basic_bands(med_price, atr, multiplier):\n", + " matr = multiplier * atr\n", + " upper = med_price + matr\n", + " lower = med_price - matr\n", + " return upper, lower" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b67ade24-1bec-411e-b570-e54ab89a47c5", + "metadata": {}, + "outputs": [], + "source": [ + "def get_final_bands(close, upper, lower):\n", + " trend = pd.Series(np.full(close.shape, np.nan), index=close.index)\n", + " dir_ = pd.Series(np.full(close.shape, 1), index=close.index)\n", + " long = pd.Series(np.full(close.shape, np.nan), index=close.index)\n", + " short = pd.Series(np.full(close.shape, np.nan), index=close.index)\n", + "\n", + " for i in range(1, close.shape[0]):\n", + " if close.iloc[i] > upper.iloc[i - 1]:\n", + " dir_.iloc[i] = 1\n", + " elif close.iloc[i] < lower.iloc[i - 1]:\n", + " dir_.iloc[i] = -1\n", + " else:\n", + " dir_.iloc[i] = dir_.iloc[i - 1]\n", + " if dir_.iloc[i] > 0 and lower.iloc[i] < lower.iloc[i - 1]:\n", + " lower.iloc[i] = lower.iloc[i - 1]\n", + " if dir_.iloc[i] < 0 and upper.iloc[i] > upper.iloc[i - 1]:\n", + " upper.iloc[i] = upper.iloc[i - 1]\n", + "\n", + " if dir_.iloc[i] > 0:\n", + " trend.iloc[i] = long.iloc[i] = lower.iloc[i]\n", + " else:\n", + " trend.iloc[i] = short.iloc[i] = upper.iloc[i]\n", + " \n", + " return trend, dir_, long, short" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb3af20c-d59e-4bef-854e-b59bad25acce", + "metadata": {}, + "outputs": [], + "source": [ + "def supertrend(high, low, close, period=7, multiplier=3):\n", + " med_price = get_med_price(high, low)\n", + " atr = get_atr(high, low, close, period)\n", + " upper, lower = get_basic_bands(med_price, atr, multiplier)\n", + " return get_final_bands(close, upper, lower)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac2ba411-f0e2-49ba-87b0-c2510e0d35f0", + "metadata": {}, + "outputs": [], + "source": [ + "supert, superd, superl, supers = supertrend(\n", + " high['BTCUSDT'], \n", + " low['BTCUSDT'], \n", + " close['BTCUSDT']\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7baab77d-8681-49ac-af7e-7ca5671adbb2", + "metadata": {}, + "outputs": [], + "source": [ + "supert" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d8c9e67-84e0-4102-8c33-1b3b6fc3ad0e", + "metadata": {}, + "outputs": [], + "source": [ + "superd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bde7f7ec-7564-447a-bf48-27873ff72a8a", + "metadata": {}, + "outputs": [], + "source": [ + "superl" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f21db639-c324-4325-b9b9-0def69c37497", + "metadata": {}, + "outputs": [], + "source": [ + "supers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44cdaed3-f1a3-4202-8d74-fb9509aca2c8", + "metadata": {}, + "outputs": [], + "source": [ + "date_range = slice('2020-01-01', '2020-02-01')\n", + "fig = close.loc[date_range, 'BTCUSDT'].rename('Close').vbt.plot()\n", + "supers.loc[date_range].rename('Short').vbt.plot(fig=fig)\n", + "superl.loc[date_range].rename('Long').vbt.plot(fig=fig).show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5fc5caf2-d308-4401-b36a-dcfdc6c16b55", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "supertrend(high['BTCUSDT'], low['BTCUSDT'], close['BTCUSDT'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a73d8c09-378b-4c27-b306-8ea61153207b", + "metadata": {}, + "outputs": [], + "source": [ + "SUPERTREND = vbt.pandas_ta('SUPERTREND')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed795d82-9324-413a-a343-9d2ad3b06750", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "SUPERTREND.run(high['BTCUSDT'], low['BTCUSDT'], close['BTCUSDT'])" + ] + }, + { + "cell_type": "markdown", + "id": "42175d89-0ee6-42c4-9ce0-4124603cf90c", + "metadata": {}, + "source": [ + "### NumPy + Numba" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c401b752-fb8f-4e4f-a626-5b1bc1b00abf", + "metadata": {}, + "outputs": [], + "source": [ + "def get_atr_np(high, low, close, period):\n", + " shifted_close = vbt.nb.fshift_1d_nb(close)\n", + " tr0 = np.abs(high - low)\n", + " tr1 = np.abs(high - shifted_close)\n", + " tr2 = np.abs(low - shifted_close)\n", + " tr = np.column_stack((tr0, tr1, tr2)).max(axis=1)\n", + " atr = vbt.nb.wwm_mean_1d_nb(tr, period)\n", + " return atr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5020dffd-48cb-4805-b258-5b6987792d1d", + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def get_final_bands_nb(close, upper, lower):\n", + " trend = np.full(close.shape, np.nan)\n", + " dir_ = np.full(close.shape, 1)\n", + " long = np.full(close.shape, np.nan)\n", + " short = np.full(close.shape, np.nan)\n", + "\n", + " for i in range(1, close.shape[0]):\n", + " if close[i] > upper[i - 1]:\n", + " dir_[i] = 1\n", + " elif close[i] < lower[i - 1]:\n", + " dir_[i] = -1\n", + " else:\n", + " dir_[i] = dir_[i - 1]\n", + " if dir_[i] > 0 and lower[i] < lower[i - 1]:\n", + " lower[i] = lower[i - 1]\n", + " if dir_[i] < 0 and upper[i] > upper[i - 1]:\n", + " upper[i] = upper[i - 1]\n", + "\n", + " if dir_[i] > 0:\n", + " trend[i] = long[i] = lower[i]\n", + " else:\n", + " trend[i] = short[i] = upper[i]\n", + " \n", + " return trend, dir_, long, short" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0db7eb3a-c3ac-4495-a91e-ba056c09b7af", + "metadata": {}, + "outputs": [], + "source": [ + "def faster_supertrend(high, low, close, period=7, multiplier=3):\n", + " med_price = get_med_price(high, low)\n", + " atr = get_atr_np(high, low, close, period)\n", + " upper, lower = get_basic_bands(med_price, atr, multiplier)\n", + " return get_final_bands_nb(close, upper, lower)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1faaaafb-871c-46c9-a03e-2a81ec075e8f", + "metadata": {}, + "outputs": [], + "source": [ + "supert, superd, superl, supers = faster_supertrend(\n", + " high['BTCUSDT'].values, \n", + " low['BTCUSDT'].values, \n", + " close['BTCUSDT'].values\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1337f05f-b7e5-405a-abc0-7bab2099cabc", + "metadata": {}, + "outputs": [], + "source": [ + "supert" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30a8e2dd-c24c-4b25-8854-89ab4f256911", + "metadata": {}, + "outputs": [], + "source": [ + "superd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd5eb5ad-cf92-47e8-9eba-3b222ce8a3c6", + "metadata": {}, + "outputs": [], + "source": [ + "superl" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f87b9091-a8b8-49a5-a48d-d35f21d46461", + "metadata": {}, + "outputs": [], + "source": [ + "supers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08d97bbe-5c1b-44fe-94bb-0ce136de5c1c", + "metadata": {}, + "outputs": [], + "source": [ + "pd.Series(supert, index=close.index)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d9e597d-0cf8-415e-a199-eb55a4a513d5", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "faster_supertrend(\n", + " high['BTCUSDT'].values, \n", + " low['BTCUSDT'].values,\n", + " close['BTCUSDT'].values\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "2e52b679-5838-46ad-9e91-7674021261e0", + "metadata": {}, + "source": [ + "### NumPy + Numba + TA-Lib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2de5960-18c0-4b21-a0a8-856738c14bc9", + "metadata": {}, + "outputs": [], + "source": [ + "import talib\n", + "\n", + "def faster_supertrend_talib(high, low, close, period=7, multiplier=3):\n", + " avg_price = talib.MEDPRICE(high, low)\n", + " atr = talib.ATR(high, low, close, period)\n", + " upper, lower = get_basic_bands(avg_price, atr, multiplier)\n", + " return get_final_bands_nb(close, upper, lower)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ee2ed2c-2feb-4ef4-a8b7-914cd837610d", + "metadata": {}, + "outputs": [], + "source": [ + "faster_supertrend_talib(\n", + " high['BTCUSDT'].values, \n", + " low['BTCUSDT'].values, \n", + " close['BTCUSDT'].values\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92ff0625-ad73-44d7-8984-8103a702a03b", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "faster_supertrend_talib(\n", + " high['BTCUSDT'].values, \n", + " low['BTCUSDT'].values, \n", + " close['BTCUSDT'].values\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "9f2d1d3c-80f6-4bdf-972b-af349ec3e1ea", + "metadata": {}, + "source": [ + "## Indicator factory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcd4ca20-aa82-42f9-916b-7b9024890b14", + "metadata": {}, + "outputs": [], + "source": [ + "SuperTrend = vbt.IF(\n", + " class_name='SuperTrend',\n", + " short_name='st',\n", + " input_names=['high', 'low', 'close'],\n", + " param_names=['period', 'multiplier'],\n", + " output_names=['supert', 'superd', 'superl', 'supers']\n", + ").with_apply_func(\n", + " faster_supertrend_talib, \n", + " takes_1d=True,\n", + " period=7, \n", + " multiplier=3\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9b84719-41b9-4622-87a4-1afc4da1cafd", + "metadata": {}, + "outputs": [], + "source": [ + "help(SuperTrend.run)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98812dfa-3e5b-487f-9bb0-738ff2e9ffdf", + "metadata": {}, + "outputs": [], + "source": [ + "st = SuperTrend.run(high, low, close)\n", + "print(st.supert)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "200ef4a9-cd6e-4f04-a279-0e4941933a8c", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "SuperTrend.run(high, low, close)" + ] + }, + { + "cell_type": "markdown", + "id": "a038fda3-4789-4756-a298-f57a48cd2e99", + "metadata": {}, + "source": [ + "### Using expressions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ffcfb664-d323-49d2-8544-9696dd74a316", + "metadata": {}, + "outputs": [], + "source": [ + "expr = \"\"\"\n", + "SuperTrend[st]:\n", + "medprice = @talib_medprice(high, low)\n", + "atr = @talib_atr(high, low, close, @p_period)\n", + "upper, lower = get_basic_bands(medprice, atr, @p_multiplier)\n", + "supert, superd, superl, supers = get_final_bands(close, upper, lower)\n", + "supert, superd, superl, supers\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "522edc62-b6ad-4f1e-95f2-d8b871609b09", + "metadata": {}, + "outputs": [], + "source": [ + "SuperTrend = vbt.IF.from_expr(\n", + " expr, \n", + " takes_1d=True,\n", + " get_basic_bands=get_basic_bands,\n", + " get_final_bands=get_final_bands_nb,\n", + " period=7, \n", + " multiplier=3\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dcd6acd6-ee0e-4953-9bb0-223fbb0158f1", + "metadata": {}, + "outputs": [], + "source": [ + "st = SuperTrend.run(high, low, close)\n", + "print(st.supert)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fcb34c2-d1cc-48e7-8eb4-fdd0a1a9b1bc", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "SuperTrend.run(high, low, close)" + ] + }, + { + "cell_type": "markdown", + "id": "5b834219-4c05-478a-b581-9bb6ec0db8fb", + "metadata": {}, + "source": [ + "## Plot indicator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21bb38bf-b6da-42b1-ab48-039d9e75a16d", + "metadata": {}, + "outputs": [], + "source": [ + "class SuperTrend(SuperTrend):\n", + " def plot(self, \n", + " column=None, \n", + " close_kwargs=None,\n", + " superl_kwargs=None,\n", + " supers_kwargs=None,\n", + " fig=None, \n", + " **layout_kwargs):\n", + " close_kwargs = close_kwargs if close_kwargs else {}\n", + " superl_kwargs = superl_kwargs if superl_kwargs else {}\n", + " supers_kwargs = supers_kwargs if supers_kwargs else {}\n", + " \n", + " close = self.select_col_from_obj(self.close, column).rename('Close')\n", + " supers = self.select_col_from_obj(self.supers, column).rename('Short')\n", + " superl = self.select_col_from_obj(self.superl, column).rename('Long')\n", + " \n", + " fig = close.vbt.plot(fig=fig, **close_kwargs, **layout_kwargs)\n", + " supers.vbt.plot(fig=fig, **supers_kwargs)\n", + " superl.vbt.plot(fig=fig, **superl_kwargs)\n", + " \n", + " return fig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99b0be24-8256-468a-bdaf-bd82467e4819", + "metadata": {}, + "outputs": [], + "source": [ + "st = SuperTrend.run(high, low, close)\n", + "st.loc[date_range, 'BTCUSDT'].plot(\n", + " superl_kwargs=dict(trace_kwargs=dict(line_color='limegreen')),\n", + " supers_kwargs=dict(trace_kwargs=dict(line_color='red'))\n", + ").show_svg()" + ] + }, + { + "cell_type": "markdown", + "id": "2d7956a9-a24c-4db1-871e-ef5f938e193e", + "metadata": {}, + "source": [ + "## Test indicator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df6dfa41-4206-4d61-a270-6d5f7fc77c2b", + "metadata": {}, + "outputs": [], + "source": [ + "entries = (~st.superl.isnull()).vbt.signals.fshift()\n", + "exits = (~st.supers.isnull()).vbt.signals.fshift()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1cd2f65d-2682-4cfd-bbcf-54702bc115b2", + "metadata": {}, + "outputs": [], + "source": [ + "pf = vbt.Portfolio.from_signals(\n", + " close=close, \n", + " entries=entries, \n", + " exits=exits, \n", + " fees=0.001, \n", + " freq='1h'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "757609ba-f3ed-4c70-8e3c-9c6f0cdca05c", + "metadata": {}, + "outputs": [], + "source": [ + "pf['ETHUSDT'].stats()" + ] + }, + { + "cell_type": "markdown", + "id": "961fa4a3-74c3-4b90-9678-aaee56cf60af", + "metadata": {}, + "source": [ + "### Optimization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1b757f6-0c30-4102-8580-d6d82bf249f8", + "metadata": {}, + "outputs": [], + "source": [ + "periods = np.arange(4, 20)\n", + "multipliers = np.arange(20, 41) / 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "65b4d9ed-2866-4044-8023-829d5e9b0f30", + "metadata": {}, + "outputs": [], + "source": [ + "st = SuperTrend.run(\n", + " high, low, close, \n", + " period=periods, \n", + " multiplier=multipliers,\n", + " param_product=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00adab7e-1ac1-4aca-a2b5-e00926381009", + "metadata": {}, + "outputs": [], + "source": [ + "st.wrapper.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bcea7660-bf82-4ea2-ae3d-6407914167bc", + "metadata": {}, + "outputs": [], + "source": [ + "st.loc[date_range, (19, 4, 'ETHUSDT')].plot().show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1ed2c21-e61c-49e1-8b08-1324380ca566", + "metadata": {}, + "outputs": [], + "source": [ + "print(st.getsize())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4cc44d67-cabe-41e5-9d4b-c37527ff6dbb", + "metadata": {}, + "outputs": [], + "source": [ + "input_size = st.wrapper.shape[0] * st.wrapper.shape[1]\n", + "n_outputs = 4\n", + "data_type_size = 8\n", + "input_size * n_outputs * data_type_size / 1024 / 1024" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b150641-4aa6-4634-98a9-8bd698448f4c", + "metadata": {}, + "outputs": [], + "source": [ + "entries = (~st.superl.isnull()).vbt.signals.fshift()\n", + "exits = (~st.supers.isnull()).vbt.signals.fshift()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f5a43d0-9b85-4347-9bb9-f0e9b096d143", + "metadata": {}, + "outputs": [], + "source": [ + "pf = vbt.Portfolio.from_signals(close, entries, exits, fees=0.001, freq='1h')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a9155ae-4971-4009-b1dc-607dc4116fed", + "metadata": {}, + "outputs": [], + "source": [ + "pf.sharpe_ratio.vbt.heatmap(\n", + " x_level='st_period', \n", + " y_level='st_multiplier',\n", + " slider_level='symbol'\n", + ").show_svg()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "855fe67d-6c43-4a9a-9e03-df18d4d2e77c", + "metadata": {}, + "outputs": [], + "source": [ + "vbt.Portfolio.from_holding(close, freq='1h').sharpe_ratio" + ] + }, + { + "cell_type": "markdown", + "id": "25a4b932-81a0-42b5-98b5-f58988319952", + "metadata": {}, + "source": [ + "## Streaming" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a132681-34a2-4a7d-ab67-5cf72645fd7c", + "metadata": {}, + "outputs": [], + "source": [ + "class SuperTrendAIS(tp.NamedTuple):\n", + " i: int\n", + " high: float\n", + " low: float\n", + " close: float\n", + " prev_close: float\n", + " prev_upper: float\n", + " prev_lower: float\n", + " prev_dir_: float\n", + " nobs: int\n", + " weighted_avg: float\n", + " old_wt: float\n", + " period: int\n", + " multiplier: float\n", + " \n", + "class SuperTrendAOS(tp.NamedTuple):\n", + " nobs: int\n", + " weighted_avg: float\n", + " old_wt: float\n", + " upper: float\n", + " lower: float\n", + " trend: float\n", + " dir_: float\n", + " long: float\n", + " short: float" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6dca1372-1c55-48f3-b569-7a193142fff6", + "metadata": {}, + "outputs": [], + "source": [ + "@njit(nogil=True)\n", + "def get_tr_one_nb(high, low, prev_close):\n", + " tr0 = abs(high - low)\n", + " tr1 = abs(high - prev_close)\n", + " tr2 = abs(low - prev_close)\n", + " if np.isnan(tr0) or np.isnan(tr1) or np.isnan(tr2):\n", + " tr = np.nan\n", + " else:\n", + " tr = max(tr0, tr1, tr2)\n", + " return tr\n", + "\n", + "@njit(nogil=True)\n", + "def get_med_price_one_nb(high, low):\n", + " return (high + low) / 2\n", + "\n", + "@njit(nogil=True)\n", + "def get_basic_bands_one_nb(high, low, atr, multiplier):\n", + " med_price = get_med_price_one_nb(high, low)\n", + " matr = multiplier * atr\n", + " upper = med_price + matr\n", + " lower = med_price - matr\n", + " return upper, lower\n", + " \n", + "@njit(nogil=True)\n", + "def get_final_bands_one_nb(close, upper, lower, prev_upper, prev_lower, prev_dir_):\n", + " if close > prev_upper:\n", + " dir_ = 1\n", + " elif close < prev_lower:\n", + " dir_ = -1\n", + " else:\n", + " dir_ = prev_dir_\n", + " if dir_ > 0 and lower < prev_lower:\n", + " lower = prev_lower\n", + " if dir_ < 0 and upper > prev_upper:\n", + " upper = prev_upper\n", + "\n", + " if dir_ > 0:\n", + " trend = long = lower\n", + " short = np.nan\n", + " else:\n", + " trend = short = upper\n", + " long = np.nan\n", + " return upper, lower, trend, dir_, long, short" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e787868d-99b9-41ea-ad97-e70480e021e8", + "metadata": {}, + "outputs": [], + "source": [ + "@njit(nogil=True)\n", + "def superfast_supertrend_acc_nb(in_state):\n", + " i = in_state.i\n", + " high = in_state.high\n", + " low = in_state.low\n", + " close = in_state.close\n", + " prev_close = in_state.prev_close\n", + " prev_upper = in_state.prev_upper\n", + " prev_lower = in_state.prev_lower\n", + " prev_dir_ = in_state.prev_dir_\n", + " nobs = in_state.nobs\n", + " weighted_avg = in_state.weighted_avg\n", + " old_wt = in_state.old_wt\n", + " period = in_state.period\n", + " multiplier = in_state.multiplier\n", + " \n", + " tr = get_tr_one_nb(high, low, prev_close)\n", + "\n", + " alpha = vbt.nb.alpha_from_wilder_nb(period)\n", + " ewm_mean_in_state = vbt.nb.EWMMeanAIS(\n", + " i=i,\n", + " value=tr,\n", + " old_wt=old_wt,\n", + " weighted_avg=weighted_avg,\n", + " nobs=nobs,\n", + " alpha=alpha,\n", + " minp=period,\n", + " adjust=False\n", + " )\n", + " ewm_mean_out_state = vbt.nb.ewm_mean_acc_nb(ewm_mean_in_state)\n", + " atr = ewm_mean_out_state.value\n", + " \n", + " upper, lower = get_basic_bands_one_nb(high, low, atr, multiplier)\n", + " \n", + " if i == 0:\n", + " trend, dir_, long, short = np.nan, 1, np.nan, np.nan\n", + " else:\n", + " upper, lower, trend, dir_, long, short = get_final_bands_one_nb(\n", + " close, upper, lower, prev_upper, prev_lower, prev_dir_)\n", + " \n", + " return SuperTrendAOS(\n", + " nobs=ewm_mean_out_state.nobs,\n", + " weighted_avg=ewm_mean_out_state.weighted_avg,\n", + " old_wt=ewm_mean_out_state.old_wt,\n", + " upper=upper,\n", + " lower=lower,\n", + " trend=trend,\n", + " dir_=dir_,\n", + " long=long,\n", + " short=short\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d5a76d7-f981-4e64-bf5f-9cd69046cfb2", + "metadata": {}, + "outputs": [], + "source": [ + "@njit(nogil=True)\n", + "def superfast_supertrend_nb(high, low, close, period=7, multiplier=3):\n", + " trend = np.empty(close.shape, dtype=np.float_)\n", + " dir_ = np.empty(close.shape, dtype=np.int_)\n", + " long = np.empty(close.shape, dtype=np.float_)\n", + " short = np.empty(close.shape, dtype=np.float_)\n", + " \n", + " if close.shape[0] == 0:\n", + " return trend, dir_, long, short\n", + "\n", + " nobs = 0\n", + " old_wt = 1.\n", + " weighted_avg = np.nan\n", + " prev_upper = np.nan\n", + " prev_lower = np.nan\n", + "\n", + " for i in range(close.shape[0]):\n", + " in_state = SuperTrendAIS(\n", + " i=i,\n", + " high=high[i],\n", + " low=low[i],\n", + " close=close[i],\n", + " prev_close=close[i - 1] if i > 0 else np.nan,\n", + " prev_upper=prev_upper,\n", + " prev_lower=prev_lower,\n", + " prev_dir_=dir_[i - 1] if i > 0 else 1,\n", + " nobs=nobs,\n", + " weighted_avg=weighted_avg,\n", + " old_wt=old_wt,\n", + " period=period,\n", + " multiplier=multiplier\n", + " )\n", + " \n", + " out_state = superfast_supertrend_acc_nb(in_state)\n", + " \n", + " nobs = out_state.nobs\n", + " weighted_avg = out_state.weighted_avg\n", + " old_wt = out_state.old_wt\n", + " prev_upper = out_state.upper\n", + " prev_lower = out_state.lower\n", + " trend[i] = out_state.trend\n", + " dir_[i] = out_state.dir_\n", + " long[i] = out_state.long\n", + " short[i] = out_state.short\n", + " \n", + " return trend, dir_, long, short" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a16e5ed-f62c-4523-a5e1-6b75e6af9a7b", + "metadata": {}, + "outputs": [], + "source": [ + "superfast_out = superfast_supertrend_nb(\n", + " high['BTCUSDT'].values,\n", + " low['BTCUSDT'].values,\n", + " close['BTCUSDT'].values\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "94757fa3-9008-4320-b66f-c72c94ab19b6", + "metadata": {}, + "outputs": [], + "source": [ + "faster_out = faster_supertrend(\n", + " high['BTCUSDT'].values,\n", + " low['BTCUSDT'].values,\n", + " close['BTCUSDT'].values\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0733554-9a70-4ebb-9962-7ac87ddb63b9", + "metadata": {}, + "outputs": [], + "source": [ + "np.testing.assert_array_equal(superfast_out[0], faster_out[0])\n", + "np.testing.assert_array_equal(superfast_out[1], faster_out[1])\n", + "np.testing.assert_array_equal(superfast_out[2], faster_out[2])\n", + "np.testing.assert_array_equal(superfast_out[3], faster_out[3])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1cc3b596-f88e-4222-947f-2fb1ca0e879c", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "superfast_supertrend_nb(\n", + " high['BTCUSDT'].values, \n", + " low['BTCUSDT'].values, \n", + " close['BTCUSDT'].values\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "08682671-abae-4f68-98d6-da623ed67c7d", + "metadata": {}, + "source": [ + "## Multithreading" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da0c1330-ee15-409c-a57a-a2d95facaa3c", + "metadata": {}, + "outputs": [], + "source": [ + "SuperTrend = vbt.IF(\n", + " class_name='SuperTrend',\n", + " short_name='st',\n", + " input_names=['high', 'low', 'close'],\n", + " param_names=['period', 'multiplier'],\n", + " output_names=['supert', 'superd', 'superl', 'supers']\n", + ").with_apply_func(\n", + " superfast_supertrend_nb, \n", + " takes_1d=True,\n", + " period=7, \n", + " multiplier=3\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72fbbf4b-107f-4efd-9a13-01b3230b8ec0", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "SuperTrend.run(high, low, close)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01592b3c-cba0-4007-82d5-0026ade663c2", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "SuperTrend.run(\n", + " high, low, close, \n", + " period=periods, \n", + " multiplier=multipliers,\n", + " param_product=True,\n", + " execute_kwargs=dict(show_progress=False)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee578ba8-c32c-4c4e-9da7-e39a92bfcb7e", + "metadata": {}, + "outputs": [], + "source": [ + "270 / 336 / 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "527a2a56-f5e8-4897-8288-40edeb023639", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "SuperTrend.run(\n", + " high, low, close, \n", + " period=periods, \n", + " multiplier=multipliers,\n", + " param_product=True,\n", + " execute_kwargs=dict(\n", + " engine='dask', \n", + " chunk_len='auto', \n", + " show_progress=False\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "ac158ed3-1c7c-498f-843d-09a6d02439c4", + "metadata": {}, + "source": [ + "## Pipelines" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5bcfaed-2d4c-4929-8eae-3505d682577d", + "metadata": {}, + "outputs": [], + "source": [ + "def pipeline(data, period=7, multiplier=3):\n", + " high = data.get('High')\n", + " low = data.get('Low')\n", + " close = data.get('Close')\n", + " st = SuperTrend.run(\n", + " high, \n", + " low, \n", + " close, \n", + " period=period, \n", + " multiplier=multiplier\n", + " )\n", + " entries = (~st.superl.isnull()).vbt.signals.fshift()\n", + " exits = (~st.supers.isnull()).vbt.signals.fshift()\n", + " pf = vbt.Portfolio.from_signals(\n", + " close, \n", + " entries=entries, \n", + " exits=exits, \n", + " fees=0.001,\n", + " save_returns=True,\n", + " max_order_records=0,\n", + " freq='1h'\n", + " )\n", + " return pf.sharpe_ratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e57ec5e5-71a3-46fa-a8d1-edc9d77876f3", + "metadata": {}, + "outputs": [], + "source": [ + "pipeline(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c87e3133-9238-4cf6-8fb2-518b36e1962d", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "pipeline(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3c94c2a-7c02-484f-9b5b-83892cc0d2e0", + "metadata": {}, + "outputs": [], + "source": [ + "336 * 32" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b23cfc27-81f4-43d6-9a3c-3e2136db980c", + "metadata": {}, + "outputs": [], + "source": [ + "op_tree = (product, periods, multipliers)\n", + "period_product, multiplier_product = vbt.generate_param_combs(op_tree)\n", + "period_product = np.asarray(period_product)\n", + "multiplier_product = np.asarray(multiplier_product)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0c220d9-bd6f-48c5-a794-f02d6a6e3eeb", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "pipeline(data, period_product, multiplier_product)" + ] + }, + { + "cell_type": "markdown", + "id": "52529de0-d7a9-4e4e-9a91-4c2449f3405f", + "metadata": {}, + "source": [ + "### Chunked pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8413e9c-5fe1-4339-a289-f9d1f7595ce6", + "metadata": {}, + "outputs": [], + "source": [ + "chunked_pipeline = vbt.chunked(\n", + " size=vbt.LenSizer(arg_query='period', single_type=int),\n", + " arg_take_spec=dict(\n", + " data=None,\n", + " period=vbt.ChunkSlicer(),\n", + " multiplier=vbt.ChunkSlicer()\n", + " ),\n", + " merge_func=lambda x: pd.concat(x).sort_index()\n", + ")(pipeline)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb15c6bc-d73e-4449-bb98-d43432dc9dad", + "metadata": {}, + "outputs": [], + "source": [ + "chunked_pipeline(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "728cadd0-a62f-49bb-8c86-c24df3642ad2", + "metadata": {}, + "outputs": [], + "source": [ + "chunked_pipeline(\n", + " data, \n", + " period_product[:4], \n", + " multiplier_product[:4],\n", + " _n_chunks=2,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc3206ff-4b53-459c-91cd-7183193d76f2", + "metadata": {}, + "outputs": [], + "source": [ + "chunk_meta, tasks = chunked_pipeline(\n", + " data, \n", + " period_product[:4], \n", + " multiplier_product[:4],\n", + " _n_chunks=2,\n", + " _return_raw_chunks=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc340146-43ee-40a2-8bb2-60494469d39e", + "metadata": {}, + "outputs": [], + "source": [ + "chunk_meta" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f734466d-59f9-49c6-ae2b-b3ff038833df", + "metadata": {}, + "outputs": [], + "source": [ + "list(tasks)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d279086d-743e-4044-9fe7-1d46e74d04e4", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "chunked_pipeline(data, period_product, multiplier_product)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5125c3af-b330-47a2-a24d-49e58d37e471", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "chunked_pipeline(data, period_product, multiplier_product, _chunk_len=1)" + ] + }, + { + "cell_type": "markdown", + "id": "0e6c55bd-6e54-4ebc-bb9a-9f2a2805403d", + "metadata": {}, + "source": [ + "### Numba pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8830575-46de-4884-a278-214c258ecc00", + "metadata": {}, + "outputs": [], + "source": [ + "@njit(nogil=True)\n", + "def pipeline_nb(high, low, close, periods=np.array([7]), multipliers=np.array([3]), ann_factor=365):\n", + " sharpe = np.empty(periods.size * close.shape[1], dtype=np.float_)\n", + " long_entries = np.empty(close.shape, dtype=np.bool_)\n", + " long_exits = np.empty(close.shape, dtype=np.bool_)\n", + " group_lens = np.full(close.shape[1], 1)\n", + " init_cash = 100.\n", + " fees = 0.001\n", + " k = 0\n", + " \n", + " for i in range(periods.size):\n", + " for col in range(close.shape[1]):\n", + " _, _, superl, supers = superfast_supertrend_nb(\n", + " high[:, col], \n", + " low[:, col], \n", + " close[:, col], \n", + " periods[i], \n", + " multipliers[i]\n", + " )\n", + " long_entries[:, col] = vbt.nb.fshift_1d_nb(~np.isnan(superl), fill_value=False)\n", + " long_exits[:, col] = vbt.nb.fshift_1d_nb(~np.isnan(supers), fill_value=False)\n", + " \n", + " sim_out = vbt.pf_nb.from_signals_nb(\n", + " target_shape=close.shape,\n", + " group_lens=group_lens,\n", + " init_cash=init_cash,\n", + " high=high,\n", + " low=low,\n", + " close=close,\n", + " long_entries=long_entries,\n", + " long_exits=long_exits,\n", + " fees=fees,\n", + " save_returns=True\n", + " )\n", + " returns = sim_out.in_outputs.returns\n", + " sharpe[k:k + close.shape[1]] = vbt.ret_nb.sharpe_ratio_nb(returns, ann_factor, ddof=1)\n", + " k += close.shape[1]\n", + " \n", + " return sharpe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2eabd3cf-5f03-4cbb-8b1d-6bba6bcbbd3d", + "metadata": {}, + "outputs": [], + "source": [ + "ann_factor = vbt.pd_acc.returns.get_ann_factor(freq='1h')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5a82762-332a-4b38-9739-0da01d06a02d", + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_nb(\n", + " high.values, \n", + " low.values, \n", + " close.values,\n", + " ann_factor=ann_factor\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd786362-5c17-4d9a-a8b7-77604c47942f", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "pipeline_nb(\n", + " high.values, \n", + " low.values, \n", + " close.values,\n", + " ann_factor=ann_factor\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1796f5ea-c0de-4249-98e5-ce14c550108f", + "metadata": {}, + "outputs": [], + "source": [ + "def merge_func(arrs, ann_args, input_columns):\n", + " arr = np.concatenate(arrs)\n", + " param_index = vbt.stack_indexes((\n", + " pd.Index(ann_args['periods']['value'], name='st_period'),\n", + " pd.Index(ann_args['multipliers']['value'], name='st_multiplier')\n", + " ))\n", + " index = vbt.combine_indexes((\n", + " param_index,\n", + " input_columns\n", + " ))\n", + " return pd.Series(arr, index=index)\n", + "\n", + "nb_chunked = vbt.chunked(\n", + " size=vbt.ArraySizer(arg_query='periods', axis=0),\n", + " arg_take_spec=dict(\n", + " high=None,\n", + " low=None,\n", + " close=None,\n", + " periods=vbt.ArraySlicer(axis=0),\n", + " multipliers=vbt.ArraySlicer(axis=0),\n", + " ann_factor=None\n", + " ),\n", + " merge_func=merge_func,\n", + " merge_kwargs=dict(\n", + " ann_args=vbt.Rep(\"ann_args\")\n", + " )\n", + ")\n", + "chunked_pipeline_nb = nb_chunked(pipeline_nb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f0a9058-bc57-4331-b9e5-31fd511c8862", + "metadata": {}, + "outputs": [], + "source": [ + "chunked_pipeline_nb(\n", + " high.values, \n", + " low.values,\n", + " close.values,\n", + " periods=period_product[:4], \n", + " multipliers=multiplier_product[:4],\n", + " ann_factor=ann_factor,\n", + " _n_chunks=2,\n", + " _merge_kwargs=dict(input_columns=close.columns)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "badba6cb-3405-498d-b366-cd9205721541", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "chunked_pipeline_nb(\n", + " high.values, \n", + " low.values, \n", + " close.values,\n", + " periods=period_product, \n", + " multipliers=multiplier_product,\n", + " ann_factor=ann_factor,\n", + " _merge_kwargs=dict(input_columns=close.columns)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5208092-e88f-428a-a2da-f43d325ab20e", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "chunked_pipeline_nb(\n", + " high.values, \n", + " low.values, \n", + " close.values,\n", + " periods=period_product, \n", + " multipliers=multiplier_product,\n", + " ann_factor=ann_factor,\n", + " _execute_kwargs=dict(engine='dask'),\n", + " _merge_kwargs=dict(input_columns=close.columns)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "b03f9632-ebfd-4073-a80b-a1671a3d3361", + "metadata": {}, + "source": [ + "### Contextualized pipeline" + ] + }, + { + "cell_type": "markdown", + "id": "3eded1d5-1a4a-410c-96ab-3f715be479a1", + "metadata": {}, + "source": [ + "#### Streaming Sharpe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03f8b212-4ae3-404e-90fe-4c44a867e556", + "metadata": {}, + "outputs": [], + "source": [ + "class RollSharpeAIS(tp.NamedTuple):\n", + " i: int\n", + " ret: float\n", + " pre_window_ret: float\n", + " cumsum: float\n", + " cumsum_sq: float\n", + " nancnt: int\n", + " window: int\n", + " minp: tp.Optional[int]\n", + " ddof: int\n", + " ann_factor: float\n", + " \n", + "class RollSharpeAOS(tp.NamedTuple):\n", + " cumsum: float\n", + " cumsum_sq: float\n", + " nancnt: int\n", + " value: float\n", + "\n", + "@njit(nogil=True)\n", + "def rolling_sharpe_acc_nb(in_state):\n", + " mean_in_state = vbt.nb.RollMeanAIS(\n", + " i=in_state.i,\n", + " value=in_state.ret,\n", + " pre_window_value=in_state.pre_window_ret,\n", + " cumsum=in_state.cumsum,\n", + " nancnt=in_state.nancnt,\n", + " window=in_state.window,\n", + " minp=in_state.minp\n", + " )\n", + " mean_out_state = vbt.nb.rolling_mean_acc_nb(mean_in_state)\n", + " \n", + " std_in_state = vbt.nb.RollStdAIS(\n", + " i=in_state.i,\n", + " value=in_state.ret,\n", + " pre_window_value=in_state.pre_window_ret,\n", + " cumsum=in_state.cumsum,\n", + " cumsum_sq=in_state.cumsum_sq,\n", + " nancnt=in_state.nancnt,\n", + " window=in_state.window,\n", + " minp=in_state.minp,\n", + " ddof=in_state.ddof\n", + " )\n", + " std_out_state = vbt.nb.rolling_std_acc_nb(std_in_state)\n", + " \n", + " mean = mean_out_state.value\n", + " std = std_out_state.value\n", + " if std == 0:\n", + " sharpe = np.nan\n", + " else:\n", + " sharpe = mean / std * np.sqrt(in_state.ann_factor)\n", + " return RollSharpeAOS(\n", + " cumsum=std_out_state.cumsum,\n", + " cumsum_sq=std_out_state.cumsum_sq,\n", + " nancnt=std_out_state.nancnt,\n", + " value=sharpe\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1339d8a8-c186-4fea-abc3-fd926b17149d", + "metadata": {}, + "outputs": [], + "source": [ + "@njit(nogil=True)\n", + "def rolling_sharpe_ratio_nb(returns, window, minp=None, ddof=0, ann_factor=365):\n", + " if window is None:\n", + " window = returns.shape[0]\n", + " if minp is None:\n", + " minp = window\n", + " out = np.empty(returns.shape, dtype=np.float_)\n", + " \n", + " if returns.shape[0] == 0:\n", + " return out\n", + "\n", + " cumsum = 0.\n", + " cumsum_sq = 0.\n", + " nancnt = 0\n", + "\n", + " for i in range(returns.shape[0]):\n", + " in_state = RollSharpeAIS(\n", + " i=i,\n", + " ret=returns[i],\n", + " pre_window_ret=returns[i - window] if i - window >= 0 else np.nan,\n", + " cumsum=cumsum,\n", + " cumsum_sq=cumsum_sq,\n", + " nancnt=nancnt,\n", + " window=window,\n", + " minp=minp,\n", + " ddof=ddof,\n", + " ann_factor=ann_factor\n", + " )\n", + " \n", + " out_state = rolling_sharpe_acc_nb(in_state)\n", + " \n", + " cumsum = out_state.cumsum\n", + " cumsum_sq = out_state.cumsum_sq\n", + " nancnt = out_state.nancnt\n", + " out[i] = out_state.value\n", + " \n", + " return out" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66ff280b-0057-4095-b473-ceee2ee1e0f1", + "metadata": {}, + "outputs": [], + "source": [ + "returns = close['BTCUSDT'].vbt.to_returns()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "084c24fc-53b0-4d85-a184-0d3a8503cd00", + "metadata": {}, + "outputs": [], + "source": [ + "np.testing.assert_allclose(\n", + " rolling_sharpe_ratio_nb(\n", + " returns=returns.values, \n", + " window=10, \n", + " ddof=1, \n", + " ann_factor=ann_factor),\n", + " returns.vbt.returns(freq='1h').rolling_sharpe_ratio(10).values\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "47c7edb2-f5de-4d7f-99a7-a35e5dcdec46", + "metadata": {}, + "source": [ + "#### Callbacks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28fba511-1030-4c6b-8592-5a965b8b6b1e", + "metadata": {}, + "outputs": [], + "source": [ + "class Memory(tp.NamedTuple):\n", + " nobs: tp.Array1d\n", + " old_wt: tp.Array1d\n", + " weighted_avg: tp.Array1d\n", + " prev_upper: tp.Array1d\n", + " prev_lower: tp.Array1d\n", + " prev_dir_: tp.Array1d\n", + " cumsum: tp.Array1d\n", + " cumsum_sq: tp.Array1d\n", + " nancnt: tp.Array1d\n", + " was_entry: tp.Array1d\n", + " was_exit: tp.Array1d\n", + "\n", + "@njit(nogil=True)\n", + "def pre_sim_func_nb(c):\n", + " memory = Memory(\n", + " nobs=np.full(c.target_shape[1], 0, dtype=np.int_),\n", + " old_wt=np.full(c.target_shape[1], 1., dtype=np.float_),\n", + " weighted_avg=np.full(c.target_shape[1], np.nan, dtype=np.float_),\n", + " prev_upper=np.full(c.target_shape[1], np.nan, dtype=np.float_),\n", + " prev_lower=np.full(c.target_shape[1], np.nan, dtype=np.float_),\n", + " prev_dir_=np.full(c.target_shape[1], np.nan, dtype=np.float_),\n", + " cumsum=np.full(c.target_shape[1], 0., dtype=np.float_),\n", + " cumsum_sq=np.full(c.target_shape[1], 0., dtype=np.float_),\n", + " nancnt=np.full(c.target_shape[1], 0, dtype=np.int_),\n", + " was_entry=np.full(c.target_shape[1], False, dtype=np.bool_),\n", + " was_exit=np.full(c.target_shape[1], False, dtype=np.bool_)\n", + " )\n", + " return (memory,)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a3f8ffd-82d1-4b86-80c5-0d9d4e9a5680", + "metadata": {}, + "outputs": [], + "source": [ + "@njit(nogil=True)\n", + "def order_func_nb(c, memory, period, multiplier):\n", + " is_entry = memory.was_entry[c.col]\n", + " is_exit = memory.was_exit[c.col]\n", + " \n", + " in_state = SuperTrendAIS(\n", + " i=c.i,\n", + " high=c.high[c.i, c.col],\n", + " low=c.low[c.i, c.col],\n", + " close=c.close[c.i, c.col],\n", + " prev_close=c.close[c.i - 1, c.col] if c.i > 0 else np.nan,\n", + " prev_upper=memory.prev_upper[c.col],\n", + " prev_lower=memory.prev_lower[c.col],\n", + " prev_dir_=memory.prev_dir_[c.col],\n", + " nobs=memory.nobs[c.col],\n", + " weighted_avg=memory.weighted_avg[c.col],\n", + " old_wt=memory.old_wt[c.col],\n", + " period=period,\n", + " multiplier=multiplier\n", + " )\n", + "\n", + " out_state = superfast_supertrend_acc_nb(in_state)\n", + "\n", + " memory.nobs[c.col] = out_state.nobs\n", + " memory.weighted_avg[c.col] = out_state.weighted_avg\n", + " memory.old_wt[c.col] = out_state.old_wt\n", + " memory.prev_upper[c.col] = out_state.upper\n", + " memory.prev_lower[c.col] = out_state.lower\n", + " memory.prev_dir_[c.col] = out_state.dir_\n", + " memory.was_entry[c.col] = not np.isnan(out_state.long)\n", + " memory.was_exit[c.col] = not np.isnan(out_state.short)\n", + " \n", + " in_position = c.position_now > 0\n", + " if is_entry and not in_position:\n", + " size = np.inf\n", + " elif is_exit and in_position:\n", + " size = -np.inf\n", + " else:\n", + " size = 0.\n", + " return vbt.pf_nb.order_nb(\n", + " size=size, \n", + " direction=vbt.pf_enums.Direction.LongOnly,\n", + " fees=0.001\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "73e93b02-3a10-4143-abf4-24ec71afb42f", + "metadata": {}, + "outputs": [], + "source": [ + "@njit(nogil=True)\n", + "def post_segment_func_nb(c, memory, ann_factor):\n", + " for col in range(c.from_col, c.to_col):\n", + " in_state = RollSharpeAIS(\n", + " i=c.i,\n", + " ret=c.last_return[col],\n", + " pre_window_ret=np.nan,\n", + " cumsum=memory.cumsum[col],\n", + " cumsum_sq=memory.cumsum_sq[col],\n", + " nancnt=memory.nancnt[col],\n", + " window=c.i + 1,\n", + " minp=0,\n", + " ddof=1,\n", + " ann_factor=ann_factor\n", + " )\n", + " out_state = rolling_sharpe_acc_nb(in_state)\n", + " memory.cumsum[col] = out_state.cumsum\n", + " memory.cumsum_sq[col] = out_state.cumsum_sq\n", + " memory.nancnt[col] = out_state.nancnt\n", + " c.in_outputs.sharpe[col] = out_state.value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe428781-32c6-4be7-a65a-97827372a31f", + "metadata": {}, + "outputs": [], + "source": [ + "class InOutputs(tp.NamedTuple):\n", + " sharpe: tp.Array1d\n", + "\n", + "@njit(nogil=True)\n", + "def ctx_pipeline_nb(high, low, close, periods=np.array([7]), multipliers=np.array([3]), ann_factor=365):\n", + " in_outputs = InOutputs(sharpe=np.empty(close.shape[1], dtype=np.float_))\n", + " sharpe = np.empty(periods.size * close.shape[1], dtype=np.float_)\n", + " group_lens = np.full(close.shape[1], 1)\n", + " init_cash = 100.\n", + " k = 0\n", + " \n", + " for i in range(periods.size):\n", + " sim_out = vbt.pf_nb.from_order_func_nb(\n", + " target_shape=close.shape,\n", + " group_lens=group_lens,\n", + " cash_sharing=False,\n", + " init_cash=init_cash,\n", + " pre_sim_func_nb=pre_sim_func_nb,\n", + " order_func_nb=order_func_nb,\n", + " order_args=(periods[i], multipliers[i]),\n", + " post_segment_func_nb=post_segment_func_nb,\n", + " post_segment_args=(ann_factor,),\n", + " high=high,\n", + " low=low,\n", + " close=close,\n", + " in_outputs=in_outputs,\n", + " fill_pos_info=False,\n", + " max_order_records=0\n", + " )\n", + " sharpe[k:k + close.shape[1]] = in_outputs.sharpe\n", + " k += close.shape[1]\n", + " \n", + " return sharpe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3a0e59c-b063-442b-bf9c-9007cabac426", + "metadata": {}, + "outputs": [], + "source": [ + "ctx_pipeline_nb(\n", + " high.values, \n", + " low.values, \n", + " close.values,\n", + " ann_factor=ann_factor\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2876b2d3-c306-4340-80ea-1106d9953f95", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "ctx_pipeline_nb(\n", + " high.values, \n", + " low.values, \n", + " close.values,\n", + " ann_factor=ann_factor\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c85d1df-5094-4664-888e-90f31e0727b9", + "metadata": {}, + "outputs": [], + "source": [ + "chunked_ctx_pipeline_nb = nb_chunked(ctx_pipeline_nb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d4ed0b0-975e-4367-b17c-d233142dddb1", + "metadata": {}, + "outputs": [], + "source": [ + "chunked_ctx_pipeline_nb(\n", + " high.values, \n", + " low.values,\n", + " close.values,\n", + " periods=period_product[:4], \n", + " multipliers=multiplier_product[:4],\n", + " ann_factor=ann_factor,\n", + " _n_chunks=2,\n", + " _merge_kwargs=dict(input_columns=close.columns)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e837057-bfbf-404f-916a-fb1826cc911a", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "chunked_ctx_pipeline_nb(\n", + " high.values, \n", + " low.values, \n", + " close.values,\n", + " periods=period_product, \n", + " multipliers=multiplier_product,\n", + " ann_factor=ann_factor,\n", + " _merge_kwargs=dict(input_columns=close.columns)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03d97ac5-d061-4b8c-a45e-857c21e7b724", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "chunked_ctx_pipeline_nb(\n", + " high.values, \n", + " low.values, \n", + " close.values,\n", + " periods=period_product, \n", + " multipliers=multiplier_product,\n", + " ann_factor=ann_factor,\n", + " _execute_kwargs=dict(engine='dask'),\n", + " _merge_kwargs=dict(input_columns=close.columns)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "c0d07ec5-9650-4db1-a3cd-def9a0950af2", + "metadata": {}, + "source": [ + "### Bonus: Own simulator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "56dcbce3-1df4-4577-8a49-122c231f309f", + "metadata": {}, + "outputs": [], + "source": [ + "@njit(nogil=True)\n", + "def raw_pipeline_nb(high, low, close, periods=np.array([7]), multipliers=np.array([3]), ann_factor=365):\n", + " out = np.empty(periods.size * close.shape[1], dtype=np.float_)\n", + " \n", + " if close.shape[0] == 0:\n", + " return out\n", + "\n", + " for k in range(len(periods)):\n", + " \n", + " for col in range(close.shape[1]):\n", + " nobs = 0\n", + " old_wt = 1.\n", + " weighted_avg = np.nan\n", + " prev_close_ = np.nan\n", + " prev_upper = np.nan\n", + " prev_lower = np.nan\n", + " prev_dir_ = 1\n", + " cumsum = 0.\n", + " cumsum_sq = 0.\n", + " nancnt = 0\n", + " was_entry = False\n", + " was_exit = False\n", + "\n", + " init_cash = 100.\n", + " cash = init_cash\n", + " position = 0.\n", + " debt = 0.\n", + " locked_cash = 0.\n", + " free_cash = init_cash\n", + " val_price = np.nan\n", + " value = init_cash\n", + " prev_value = init_cash\n", + " return_ = 0.\n", + "\n", + " for i in range(close.shape[0]):\n", + " is_entry = was_entry\n", + " is_exit = was_exit\n", + "\n", + " st_in_state = SuperTrendAIS(\n", + " i=i,\n", + " high=high[i, col],\n", + " low=low[i, col],\n", + " close=close[i, col],\n", + " prev_close=prev_close_,\n", + " prev_upper=prev_upper,\n", + " prev_lower=prev_lower,\n", + " prev_dir_=prev_dir_,\n", + " nobs=nobs,\n", + " weighted_avg=weighted_avg,\n", + " old_wt=old_wt,\n", + " period=periods[k],\n", + " multiplier=multipliers[k]\n", + " )\n", + "\n", + " st_out_state = superfast_supertrend_acc_nb(st_in_state)\n", + "\n", + " nobs = st_out_state.nobs\n", + " weighted_avg = st_out_state.weighted_avg\n", + " old_wt = st_out_state.old_wt\n", + " prev_close_ = close[i, col]\n", + " prev_upper = st_out_state.upper\n", + " prev_lower = st_out_state.lower\n", + " prev_dir_ = st_out_state.dir_\n", + " was_entry = not np.isnan(st_out_state.long)\n", + " was_exit = not np.isnan(st_out_state.short)\n", + "\n", + " if is_entry and position == 0:\n", + " size = np.inf\n", + " elif is_exit and position > 0:\n", + " size = -np.inf\n", + " else:\n", + " size = np.nan\n", + "\n", + " val_price = close[i, col]\n", + " value = cash + position * val_price\n", + " if not np.isnan(size):\n", + " exec_state = vbt.pf_enums.ExecState(\n", + " cash=cash,\n", + " position=position,\n", + " debt=debt,\n", + " locked_cash=locked_cash,\n", + " free_cash=free_cash,\n", + " val_price=val_price,\n", + " value=value\n", + " )\n", + " price_area = vbt.pf_enums.PriceArea(\n", + " open=np.nan,\n", + " high=high[i, col],\n", + " low=low[i, col],\n", + " close=close[i, col]\n", + " )\n", + " order = vbt.pf_nb.order_nb(\n", + " size=size, \n", + " direction=vbt.pf_enums.Direction.LongOnly,\n", + " fees=0.001\n", + " )\n", + " _, new_exec_state = vbt.pf_nb.execute_order_nb(exec_state, order, price_area)\n", + " cash, position, debt, locked_cash, free_cash, val_price, value = new_exec_state\n", + "\n", + " value = cash + position * val_price\n", + " return_ = vbt.ret_nb.get_return_nb(prev_value, value)\n", + " prev_value = value\n", + "\n", + " sharpe_in_state = RollSharpeAIS(\n", + " i=i,\n", + " ret=return_,\n", + " pre_window_ret=np.nan,\n", + " cumsum=cumsum,\n", + " cumsum_sq=cumsum_sq,\n", + " nancnt=nancnt,\n", + " window=i + 1,\n", + " minp=0,\n", + " ddof=1,\n", + " ann_factor=ann_factor\n", + " )\n", + " sharpe_out_state = rolling_sharpe_acc_nb(sharpe_in_state)\n", + " cumsum = sharpe_out_state.cumsum\n", + " cumsum_sq = sharpe_out_state.cumsum_sq\n", + " nancnt = sharpe_out_state.nancnt\n", + " sharpe = sharpe_out_state.value\n", + "\n", + " out[k * close.shape[1] + col] = sharpe\n", + " \n", + " return out" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99f0e6a6-68ca-4d6e-8160-a9dfa5fc6153", + "metadata": {}, + "outputs": [], + "source": [ + "raw_pipeline_nb(\n", + " high.values, \n", + " low.values, \n", + " close.values,\n", + " ann_factor=ann_factor\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e16670e-ecd3-4d94-b926-e7773eb088dd", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "raw_pipeline_nb(\n", + " high.values, \n", + " low.values, \n", + " close.values,\n", + " ann_factor=ann_factor\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14812b10-2e4f-4171-ba91-eecd5c329394", + "metadata": {}, + "outputs": [], + "source": [ + "chunked_raw_pipeline_nb = nb_chunked(raw_pipeline_nb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8339de75-6c3c-4e59-9df0-17f969e8d14c", + "metadata": {}, + "outputs": [], + "source": [ + "chunked_raw_pipeline_nb(\n", + " high.values, \n", + " low.values,\n", + " close.values,\n", + " periods=period_product[:4], \n", + " multipliers=multiplier_product[:4],\n", + " ann_factor=ann_factor,\n", + " _n_chunks=2,\n", + " _merge_kwargs=dict(input_columns=close.columns)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1fa1ac51-3cea-4ae8-86dc-d2b5068994b0", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "chunked_raw_pipeline_nb(\n", + " high.values, \n", + " low.values, \n", + " close.values,\n", + " periods=period_product, \n", + " multipliers=multiplier_product,\n", + " ann_factor=ann_factor,\n", + " _merge_kwargs=dict(input_columns=close.columns)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8115735-80ca-463e-ba6d-f0892e413b17", + "metadata": {}, + "outputs": [], + "source": [ + "%%timeit\n", + "chunked_raw_pipeline_nb(\n", + " high.values, \n", + " low.values, \n", + " close.values,\n", + " periods=period_product, \n", + " multipliers=multiplier_product,\n", + " ann_factor=ann_factor,\n", + " _execute_kwargs=dict(engine=\"dask\"),\n", + " _merge_kwargs=dict(input_columns=close.columns)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "382e71b5-5cbc-46fa-b2de-ce031ad902a0", + "metadata": {}, + "outputs": [], + "source": [ + "range_len = int(vbt.timedelta('365d') / vbt.timedelta('1h'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dabf85c8-62ac-4a93-8903-2255e66b2d09", + "metadata": {}, + "outputs": [], + "source": [ + "splitter = vbt.Splitter.from_n_rolling(high.index, n=100, length=range_len)\n", + "\n", + "roll_high = splitter.take(high, into=\"reset_stacked\")\n", + "roll_low = splitter.take(low, into=\"reset_stacked\")\n", + "roll_close = splitter.take(close, into=\"reset_stacked\")\n", + "\n", + "range_indexes = splitter.take(high.index)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "722b1a84-5307-42fa-97f1-0027301e4367", + "metadata": {}, + "outputs": [], + "source": [ + "roll_close.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5832339d-885a-4daa-982e-22b3c665d45c", + "metadata": {}, + "outputs": [], + "source": [ + "range_indexes[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5232615f-9745-48d4-8cea-3e32d19d4c77", + "metadata": {}, + "outputs": [], + "source": [ + "sharpe_ratios = chunked_raw_pipeline_nb(\n", + " roll_high.values, \n", + " roll_low.values,\n", + " roll_close.values,\n", + " periods=period_product, \n", + " multipliers=multiplier_product,\n", + " ann_factor=ann_factor,\n", + " _execute_kwargs=dict(engine=\"dask\"),\n", + " _merge_kwargs=dict(input_columns=roll_close.columns)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "190dd192-7fc0-48e1-8ad9-c9653c9c01ab", + "metadata": {}, + "outputs": [], + "source": [ + "sharpe_ratios" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "077421ca-b5d6-4e22-b53b-258c4033a8c0", + "metadata": {}, + "outputs": [], + "source": [ + "pf_hold = vbt.Portfolio.from_holding(roll_close, freq='1h')\n", + "sharpe_ratios_hold = pf_hold.sharpe_ratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "838776aa-3700-4d86-921b-d0bbabbdb47e", + "metadata": {}, + "outputs": [], + "source": [ + "sharpe_ratios_hold" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e8a6477-3235-4243-bb93-662d17daf28d", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_subperiod_sharpe(index, sharpe_ratios, sharpe_ratios_hold, range_indexes, symbol):\n", + " split = index[0]\n", + " sharpe_ratios = sharpe_ratios.xs(\n", + " symbol, \n", + " level='symbol', \n", + " drop_level=True)\n", + " sharpe_ratios = sharpe_ratios.xs(\n", + " split, \n", + " level='split', \n", + " drop_level=True)\n", + " start_date = range_indexes[split][0]\n", + " end_date = range_indexes[split][-1]\n", + " return sharpe_ratios.vbt.heatmap(\n", + " x_level='st_period', \n", + " y_level='st_multiplier',\n", + " title=\"{} - {}\".format(\n", + " start_date.strftime(\"%d %b, %Y %H:%M:%S\"),\n", + " end_date.strftime(\"%d %b, %Y %H:%M:%S\")\n", + " ),\n", + " trace_kwargs=dict(\n", + " zmin=sharpe_ratios.min(),\n", + " zmid=sharpe_ratios_hold[(split, symbol)],\n", + " zmax=sharpe_ratios.max(),\n", + " colorscale='Spectral'\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5306f36f-765f-4d52-b8a8-b4c3a4ba7b6d", + "metadata": {}, + "outputs": [], + "source": [ + "fname = 'raw_pipeline.gif'\n", + "level_idx = sharpe_ratios.index.names.index('split')\n", + "split_indices = sharpe_ratios.index.levels[level_idx]\n", + "\n", + "vbt.save_animation(\n", + " fname,\n", + " split_indices, \n", + " plot_subperiod_sharpe,\n", + " sharpe_ratios,\n", + " sharpe_ratios_hold,\n", + " range_indexes,\n", + " 'BTCUSDT',\n", + " delta=1,\n", + " fps=7,\n", + " writer_kwargs=dict(loop=0)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd826ac3-a4da-4065-a5d0-9c77ea5ad6f0", + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image, display\n", + " \n", + "with open(fname,'rb') as f:\n", + " display(Image(data=f.read(), format='png'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4743b6d1-c6a1-4fa7-85f0-0c1e7366a44c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/to_explore/notebooks/TelegramSignals.ipynb b/to_explore/notebooks/TelegramSignals.ipynb new file mode 100644 index 0000000..22f2e8b --- /dev/null +++ b/to_explore/notebooks/TelegramSignals.ipynb @@ -0,0 +1,285 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example, we will build a Telegram bot that sends a signal once any Bollinger Band has been crossed. We will periodically query for the latest OHLCV data of the selected cryptocurrencies and append this data to our data pool. Additionally to receiving signals, any Telegram user can join the group and ask the bot to provide him with the current information. If the price change is higher than some number of standard deviations from the mean, while crossing the band, the bot sends a funny GIF." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from telegram import __version__ as TG_VER\n", + "\n", + "try:\n", + " from telegram import __version_info__\n", + "except ImportError:\n", + " __version_info__ = (0, 0, 0, 0, 0)\n", + "\n", + "if __version_info__ >= (20, 0, 0, \"alpha\", 1):\n", + " raise RuntimeError(f\"This example is not compatible with your current PTB version {TG_VER}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from vectorbtpro import *\n", + "# whats_imported()\n", + "\n", + "import logging" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)\n", + "logger = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Telegram\n", + "vbt.settings.messaging['telegram']['token'] = \"YOUR_TOKEN\"\n", + "\n", + "# Giphy\n", + "vbt.settings.messaging['giphy']['api_key'] = \"YOUR_API_KEY\"\n", + "\n", + "# Data\n", + "SYMBOLS = ['BTC/USDT', 'ETH/USDT', 'ADA/USDT']\n", + "START = '1 hour ago UTC'\n", + "TIMEFRAME = '1m'\n", + "UPDATE_EVERY = vbt.utils.datetime_.interval_to_ms(TIMEFRAME) // 1000 # in seconds\n", + "DT_FORMAT = '%d %b %Y %H:%M:%S %z'\n", + "IND_PARAMS = dict(\n", + " timeperiod=20, \n", + " nbdevup=2, \n", + " nbdevdn=2\n", + ")\n", + "CHANGE_NBDEV = 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = vbt.CCXTData.pull(SYMBOLS, start=START, timeframe=TIMEFRAME)\n", + "\n", + "print(data.wrapper.index)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_bbands(data):\n", + " return vbt.IndicatorFactory.from_talib('BBANDS').run(\n", + " data.get('Close'), **IND_PARAMS, hide_params=list(IND_PARAMS.keys()))\n", + "\n", + "\n", + "def get_info(bbands):\n", + " info = dict()\n", + " info['last_price'] = bbands.close.iloc[-1]\n", + " info['last_change'] = (bbands.close.iloc[-1] - bbands.close.iloc[-2]) / bbands.close.iloc[-1]\n", + " info['last_crossed_above_upper'] = bbands.close_crossed_above(bbands.upperband).iloc[-1]\n", + " info['last_crossed_below_upper'] = bbands.close_crossed_below(bbands.upperband).iloc[-1]\n", + " info['last_crossed_below_lower'] = bbands.close_crossed_below(bbands.lowerband).iloc[-1]\n", + " info['last_crossed_above_lower'] = bbands.close_crossed_above(bbands.lowerband).iloc[-1]\n", + " info['bw'] = (bbands.upperband - bbands.lowerband) / bbands.middleband\n", + " info['last_bw_zscore'] = info['bw'].vbt.zscore().iloc[-1]\n", + " info['last_change_zscore'] = bbands.close.vbt.pct_change().vbt.zscore().iloc[-1]\n", + " info['last_change_pos'] = info['last_change_zscore'] >= CHANGE_NBDEV\n", + " info['last_change_neg'] = info['last_change_zscore'] <= -CHANGE_NBDEV\n", + " return info\n", + "\n", + "\n", + "def format_symbol_info(symbol, info):\n", + " last_change = info['last_change'][symbol]\n", + " last_price = info['last_price'][symbol]\n", + " last_bw_zscore = info['last_bw_zscore'][symbol]\n", + " return \"{} ({:.2%}, {}, {:.2f})\".format(symbol, last_change, last_price, last_bw_zscore)\n", + "\n", + "\n", + "def format_signals_info(emoji, signals, info):\n", + " symbols = signals.index[signals]\n", + " symbol_msgs = []\n", + " for symbol in symbols:\n", + " symbol_msgs.append(format_symbol_info(symbol, info))\n", + " return \"{} {}\".format(emoji, ', '.join(symbol_msgs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from telegram.ext import CommandHandler\n", + "\n", + "class MyTelegramBot(vbt.TelegramBot):\n", + " def __init__(self, data, **kwargs):\n", + " super().__init__(data=data, **kwargs)\n", + " \n", + " self.data = data\n", + " self.update_ts = data.wrapper.index[-1]\n", + " \n", + " @property\n", + " def custom_handlers(self):\n", + " return (CommandHandler('info', self.info_callback),)\n", + " \n", + " def info_callback(self, update, context):\n", + " chat_id = update.effective_chat.id\n", + " if len(context.args) != 1:\n", + " await self.send_message(chat_id, \"Please provide one symbol.\")\n", + " return\n", + " symbol = context.args[0]\n", + " if symbol not in SYMBOLS:\n", + " await self.send_message(chat_id, f\"There is no such symbol as \\\"{symbol}\\\".\")\n", + " return\n", + " \n", + " bbands = get_bbands(self.data)\n", + " info = get_info(bbands)\n", + " messages = [format_symbol_info(symbol, info)]\n", + " message = '\\n'.join([\"{}:\".format(self.update_ts.strftime(DT_FORMAT))] + messages)\n", + " await self.send_message(chat_id, message)\n", + " \n", + " @property\n", + " def start_message(self):\n", + " index = self.data.wrapper.index\n", + " return f\"\"\"Hello! \n", + "\n", + "Starting with {len(index)} rows from {index[0].strftime(DT_FORMAT)} to {index[-1].strftime(DT_FORMAT)}.\"\"\"\n", + " \n", + " @property\n", + " def help_message(self):\n", + " return \"\"\"Message format:\n", + "[event] [symbol] ([price change], [new price], [bandwidth z-score])\n", + " \n", + "Event legend:\n", + "⬆️ - Price went above upper band\n", + "⤵️ - Price retraced below upper band\n", + "⬇️ - Price went below lower band\n", + "⤴️ - Price retraced above lower band\n", + "\n", + "GIF is sent once a band is crossed and the price change is 2 stds from the mean.\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "telegram_bot = MyTelegramBot(data)\n", + "telegram_bot.start(in_background=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class MyDataUpdater(vbt.DataUpdater):\n", + " _expected_keys=None\n", + " \n", + " def __init__(self, data, telegram_bot, **kwargs):\n", + " super().__init__(data, telegram_bot=telegram_bot, **kwargs)\n", + " \n", + " self.telegram_bot = telegram_bot\n", + " self.update_ts = data.wrapper.index[-1]\n", + " \n", + " def update(self):\n", + " super().update()\n", + " self.update_ts = vbt.timestamp(tz=self.update_ts.tz)\n", + " self.telegram_bot.data = self.data\n", + " self.telegram_bot.update_ts = self.update_ts\n", + " \n", + " bbands = get_bbands(self.data)\n", + " info = get_info(bbands)\n", + " \n", + " messages = []\n", + " if info['last_crossed_above_upper'].any():\n", + " messages.append(format_signals_info('⬆️', info['last_crossed_above_upper'], info))\n", + " if info['last_crossed_below_upper'].any():\n", + " messages.append(format_signals_info('⤵️', info['last_crossed_below_upper'], info))\n", + " if info['last_crossed_below_lower'].any():\n", + " messages.append(format_signals_info('⬇️', info['last_crossed_below_lower'], info))\n", + " if info['last_crossed_above_lower'].any():\n", + " messages.append(format_signals_info('⤴️', info['last_crossed_above_lower'], info))\n", + " \n", + " if len(messages) > 0:\n", + " message = '\\n'.join([\"{}:\".format(self.update_ts.strftime(DT_FORMAT))] + messages)\n", + " self.telegram_bot.send_message_to_all(message)\n", + " if (info['last_crossed_above_upper'] & info['last_change_pos']).any():\n", + " self.telegram_bot.send_giphy_to_all(\"launch\")\n", + " if (info['last_crossed_below_lower'] & info['last_change_neg']).any():\n", + " self.telegram_bot.send_giphy_to_all(\"fall\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_updater = MyDataUpdater(data, telegram_bot)\n", + "data_updater.update_every(UPDATE_EVERY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "telegram_bot.stop()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}