{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "8b6c912e-3c56-4c04-b9ce-2ba70342aade", "metadata": {}, "outputs": [], "source": [ "from vectorbtpro import *\n", "# whats_imported()\n", "\n", "vbt.settings.set_theme(\"dark\")" ] }, { "cell_type": "code", "execution_count": null, "id": "725278d1-9e9e-48ed-ac9d-9ddf1bcc59b7", "metadata": {}, "outputs": [], "source": [ "# Pull data\n", "\n", "def date_parser(timestamps):\n", " # First column are integer timestamps, parse them into DatetimeIndex\n", " return pd.to_datetime(timestamps, utc=True, unit=\"ms\")\n", "\n", "data = vbt.CSVData.pull(\"download/xauusd-m1-bid-2021-09-01-2023-03-14.csv\", date_parser=date_parser)\n", "\n", "print(data.wrapper.shape)" ] }, { "cell_type": "code", "execution_count": null, "id": "e149c879-62a5-4924-83bb-262c00591ed9", "metadata": {}, "outputs": [], "source": [ "# Pull signals\n", "signal_data = vbt.CSVData.pull(\"download/TG_Extracted_Signals.csv\", index_col=1)\n", "\n", "print(signal_data.wrapper.shape)" ] }, { "cell_type": "code", "execution_count": null, "id": "59ba2f9b-de8f-4a9c-9313-77089b696caa", "metadata": {}, "outputs": [], "source": [ "# Numba doesn't understand strings, thus create an enumerated type for stop types\n", "\n", "# Create a type first\n", "OrderTypeT = namedtuple(\"OrderTypeT\", [\"BUY\", \"SELL\", \"BUYSTOP\", \"SELLSTOP\"])\n", "\n", "# Then create a tuple\n", "OrderType = OrderTypeT(*range(len(OrderTypeT._fields)))\n", "\n", "print(OrderType)" ] }, { "cell_type": "code", "execution_count": null, "id": "48e1efd4-cd32-4cb5-9ee7-3e21c52d8956", "metadata": {}, "outputs": [], "source": [ "# Prepare signals\n", "\n", "def transform_signal_data(df):\n", " # Select only one symbol, the one we pulled the data for\n", " df = df[df[\"Symbol\"] == \"XAUUSD\"]\n", " \n", " # Select columns of interest\n", " df = df.iloc[:, -7:]\n", " \n", " # Map order types using OrderType\n", " df[\"OrderType\"] = df[\"OrderType\"].map(lambda x: OrderType._fields.index(x.replace(\" \", \"\")))\n", " \n", " # Some entry prices are zero\n", " df = df[df[\"EntryPrice\"] > 0]\n", " \n", " return df\n", "\n", "signal_data = signal_data.transform(transform_signal_data)\n", "\n", "print(signal_data.wrapper.shape)" ] }, { "cell_type": "code", "execution_count": null, "id": "02543169-e8c1-4813-ac6c-482a79d9e32d", "metadata": {}, "outputs": [], "source": [ "# Create named tuples which will act as containers for various arrays\n", "\n", "# SignalInfo will contain signal information in a vbt-friendly format\n", "# Rows in each array correspond to signals\n", "SignalInfo = namedtuple(\"SignalInfo\", [\n", " \"timestamp\", # 1d array with timestamps in nanosecond format (int64)\n", " \"order_type\", # 1d array with order types in integer format (int64, see order_type_map)\n", " \"entry_price\", # 1d array with entry price (float64)\n", " \"sl\", # 2d array where columns are SL levels (float64)\n", " \"tp\", # 2d array where columns are TP levels (float64)\n", "])\n", "\n", "# TempInfo will contain temporary information that will be written during backtesting\n", "# You can imagine being buffer that we write and then access at a later time\n", "# Rows in each array correspond to signals\n", "TempInfo = namedtuple(\"TempInfo\", [\n", " \"ts_bar\", # 1d array with row indices where signal was hit (int64)\n", " \"entry_price_bar\", # 1d array with row indices where entry price was hit (int64)\n", " \"sl_bar\", # 2d array with row indices where each SL level was hit, same shape as SignalInfo.sl (int64)\n", " \"tp_bar\", # 2d array with row indices where each TP level was hit, same shape as SignalInfo.tp (int64)\n", "])" ] }, { "cell_type": "code", "execution_count": null, "id": "521c1736-57a9-4d9b-86c7-566814e30aba", "metadata": {}, "outputs": [], "source": [ "# Here's what we will do:\n", "# Represent each signal as a separate column with its own starting capital\n", "# Run an order function using Portfolio.from_order_func\n", "# The order function is executed at each bar and column (signal in our case)\n", "# If the current bar contains a signal, execute the signal logic\n", "# Order functions can issue only one order at bar, thus we if multiple stops were hit, we will aggregate them\n", "# We will go all in and then gradually reduce the position based on the number of stops\n", "\n", "@njit\n", "def has_data_nb(c):\n", " # Numba function to check whether OHLC is not NaN\n", " if np.isnan(vbt.pf_nb.select_nb(c, c.open)):\n", " return False\n", " if np.isnan(vbt.pf_nb.select_nb(c, c.high)):\n", " return False\n", " if np.isnan(vbt.pf_nb.select_nb(c, c.low)):\n", " return False\n", " if np.isnan(vbt.pf_nb.select_nb(c, c.close)):\n", " return False\n", " return True\n", "\n", "@njit\n", "def check_price_hit_nb(c, price, hit_below, can_use_ohlc):\n", " # Numba function to check whether a price level was hit during this bar\n", " # Use hit_below=True to check against low and hit_below=False to check against high\n", " # If can_use_ohlc is False, will check only against the close price\n", " \n", " order_price, hit_on_open, hit = vbt.pf_nb.check_price_hit_nb(\n", " open=vbt.pf_nb.select_nb(c, c.open), # OHLC are flexible arrays, always use select_nb!\n", " high=vbt.pf_nb.select_nb(c, c.high),\n", " low=vbt.pf_nb.select_nb(c, c.low),\n", " close=vbt.pf_nb.select_nb(c, c.close),\n", " price=price,\n", " hit_below=hit_below,\n", " can_use_ohlc=can_use_ohlc\n", " )\n", " # Order price here isn't necessarily the price that has been hit\n", " # For example, if the price was hit before open, order price is set to the open price\n", " return order_price, hit\n", "\n", "@njit(boundscheck=True)\n", "def order_func_nb(c, signal_info, temp_info): # first argument is context, other are our containers\n", " if not has_data_nb(c):\n", " # If this bar contains no data, skip it\n", " return vbt.pf_nb.order_nothing_nb()\n", " \n", " # Each column corresponds to a signal\n", " signal = c.col\n", " \n", " # Each row corresponds to a bar\n", " bar = c.i\n", " \n", " # Define various flags for pure convenience\n", " buy_market = signal_info.order_type[signal] == OrderType.BUY\n", " sell_market = signal_info.order_type[signal] == OrderType.SELL\n", " buy_stop = signal_info.order_type[signal] == OrderType.BUYSTOP\n", " sell_stop = signal_info.order_type[signal] == OrderType.SELLSTOP\n", " buy = buy_market or buy_stop\n", " \n", " # First, we need to check whether the current bar contains a signal\n", " can_use_ohlc = True\n", " if temp_info.ts_bar[signal] == -1:\n", " if c.index[bar] == signal_info.timestamp[signal]:\n", " # If so, store the current row index in a temporary array\n", " # such that later we know that we already discovered a signal\n", " temp_info.ts_bar[signal] = bar\n", "\n", " # The signal has the granularity of seconds, thus it belongs somewhere in the bar\n", " # We need to notify the functions below that they cannot use full OHLC information, only close\n", " # This is to avoid using prices that technically happened before the signal\n", " can_use_ohlc = False\n", " \n", " # Here comes the entry order\n", " # Check whether the signal has been discovered\n", " # -1 means hasn't been discovered yet\n", " if temp_info.ts_bar[signal] != -1:\n", " \n", " # Then, check whether the entry order hasn't been executed\n", " if temp_info.entry_price_bar[signal] == -1:\n", " \n", " # If so, execute the entry order\n", " if buy_market:\n", " # Buy market order (using closing price)\n", " \n", " # Store the current row index in a temporary array such that future bars know\n", " # that the order has already been executed\n", " temp_info.entry_price_bar[signal] = bar\n", " order_price = signal_info.entry_price[signal]\n", " return vbt.pf_nb.order_nb(np.inf, np.inf) # size, price\n", " \n", " if sell_market:\n", " # Sell market order (using closing price)\n", " temp_info.entry_price_bar[signal] = bar\n", " order_price = signal_info.entry_price[signal]\n", " return vbt.pf_nb.order_nb(-np.inf, np.inf)\n", " \n", " if buy_stop:\n", " # Buy stop order\n", " # A buy stop order is entered at a stop price above the current market price\n", " \n", " # Since it's a pending order, we first need to check whether the entry price has been hit\n", " order_price, hit = check_price_hit_nb(\n", " c,\n", " price=signal_info.entry_price[signal],\n", " hit_below=False,\n", " can_use_ohlc=can_use_ohlc,\n", " )\n", " if hit:\n", " # If so, execute the order\n", " temp_info.entry_price_bar[signal] = bar\n", " return vbt.pf_nb.order_nb(np.inf, order_price)\n", " \n", " if sell_stop:\n", " # Sell stop order\n", " # A sell stop order is entered at a stop price below the current market price\n", " order_price, hit = check_price_hit_nb(\n", " c,\n", " price=signal_info.entry_price[signal],\n", " hit_below=True,\n", " can_use_ohlc=can_use_ohlc,\n", " )\n", " if hit:\n", " temp_info.entry_price_bar[signal] = bar\n", " return vbt.pf_nb.order_nb(-np.inf, order_price)\n", " \n", " # Here comes the stop order\n", " # Check whether the entry order has been executed\n", " if temp_info.entry_price_bar[signal] != -1:\n", " \n", " # We also need to check whether we're still in a position\n", " # in case stops have already closed out the position\n", " if c.last_position[signal] != 0:\n", " \n", " # If so, start with checking for potential SL orders\n", " # (remember that SL pessimistically comes before TP)\n", " # First, we need to know the number of potential and already executed SL levels\n", " # since we want to gradually reduce the position proportially to the number of levels\n", " # For example, one signal may define [12.35, 12.29] and another [17.53, nan]\n", " n_sl_levels = 0\n", " n_sl_hits = 0\n", " sl_levels = signal_info.sl[signal] # select 1d array from 2d array\n", " sl_bar = temp_info.sl_bar[signal] # same here\n", " for k in range(len(sl_levels)):\n", " if not np.isnan(sl_levels[k]):\n", " n_sl_levels += 1\n", " if sl_bar[k] != -1:\n", " n_sl_hits += 1\n", " \n", " # We can execute only one order at the current bar\n", " # Thus, if the price crossed multiple SL levels, we need to pack them into one order\n", " # Since SL levels are guaranteed to be sorted, we will check the most distant levels first\n", " # because if a distant stop has been hit, the closer stops are automatically hit too\n", " for k in range(n_sl_levels - 1, n_sl_hits - 1, -1):\n", " if not np.isnan(sl_levels[k]) and sl_bar[k] == -1:\n", " # Check against low for buy orders and against high for sell orders\n", " order_price, hit = check_price_hit_nb(\n", " c,\n", " price=sl_levels[k],\n", " hit_below=buy,\n", " can_use_ohlc=can_use_ohlc,\n", " )\n", " if hit:\n", " sl_bar[k] = bar\n", " # The further away the stop is, the more of the position needs to be closed\n", " # We will specify a target percentage\n", " # For example, for two stops it would be 0.5 (SL1) and 0.0 (SL2)\n", " # while for three stops it would be 0.66 (SL1), 0.33 (SL2), and 0.0 (SL3)\n", " # This works only if we went all in before (size=np.inf)!\n", " size = 1 - (k + 1) / n_sl_levels\n", " size_type = vbt.pf_enums.SizeType.TargetPercent\n", " if buy:\n", " return vbt.pf_nb.order_nb(size, order_price, size_type)\n", " else:\n", " # Size must be negative for short positions\n", " return vbt.pf_nb.order_nb(-size, order_price, size_type)\n", " \n", " # Same for potential TP orders\n", " n_tp_levels = 0\n", " n_tp_hits = 0\n", " tp_levels = signal_info.tp[signal]\n", " tp_bar = temp_info.tp_bar[signal]\n", " for k in range(len(tp_levels)):\n", " if not np.isnan(tp_levels[k]):\n", " n_tp_levels += 1\n", " if tp_bar[k] != -1:\n", " n_tp_hits += 1\n", " \n", " for k in range(n_tp_levels - 1, n_tp_hits - 1, -1):\n", " if not np.isnan(tp_levels[k]) and tp_bar[k] == -1:\n", " # Check against high for buy orders and against low for sell orders\n", " order_price, hit = check_price_hit_nb(\n", " c,\n", " price=tp_levels[k],\n", " hit_below=not buy,\n", " can_use_ohlc=can_use_ohlc,\n", " )\n", " if hit:\n", " tp_bar[k] = bar\n", " size = 1 - (k + 1) / n_tp_levels\n", " size_type = vbt.pf_enums.SizeType.TargetPercent\n", " if buy:\n", " return vbt.pf_nb.order_nb(size, order_price, size_type)\n", " else:\n", " return vbt.pf_nb.order_nb(-size, order_price, size_type)\n", " \n", " # If neither of orders has been executed, order nothing\n", " return vbt.pf_nb.order_nothing_nb()" ] }, { "cell_type": "code", "execution_count": null, "id": "c543610e-e215-4bee-afb9-2278a98b354a", "metadata": {}, "outputs": [], "source": [ "# Prepare signal information\n", "\n", "timestamp = vbt.dt.to_ns(signal_data.index) # nanoseconds\n", "order_type = signal_data.get(\"OrderType\").values\n", "entry_price = signal_data.get(\"EntryPrice\").values\n", "sl = signal_data.get(\"SL\").values\n", "tp1 = signal_data.get(\"TP1\").values\n", "tp2 = signal_data.get(\"TP2\").values\n", "tp3 = signal_data.get(\"TP3\").values\n", "tp4 = signal_data.get(\"TP4\").values\n", "\n", "n_signals = len(timestamp)\n", "print(n_signals)" ] }, { "cell_type": "code", "execution_count": null, "id": "eb1c5978-a2c1-4f8e-956c-419da2d01608", "metadata": {}, "outputs": [], "source": [ "# Since the signals are of the second granularity while the data is of the minute granularity,\n", "# we need to round the timestamp of the signal to the nearest minute\n", "# Timestamps represent the opening time, thus the second \"19:28:59\" belongs to the minute \"19:28:00\"\n", "\n", "timestamp = timestamp - timestamp % vbt.dt_nb.m_ns" ] }, { "cell_type": "code", "execution_count": null, "id": "bd91be38-ed79-4d15-bc28-49c70478a6ec", "metadata": {}, "outputs": [], "source": [ "# Create a named tuple for signal information\n", "\n", "signal_info = SignalInfo(\n", " timestamp=timestamp,\n", " order_type=order_type,\n", " entry_price=entry_price,\n", " sl=np.column_stack((sl,)),\n", " tp=np.column_stack((tp1, tp2, tp3, tp4))\n", ")\n", "\n", "n_sl_levels = signal_info.sl.shape[1]\n", "print(n_sl_levels)\n", "\n", "n_tp_levels = signal_info.tp.shape[1]\n", "print(n_tp_levels)" ] }, { "cell_type": "code", "execution_count": null, "id": "33b06286-9cfe-4cce-a0ab-2e583210c048", "metadata": {}, "outputs": [], "source": [ "# Important: re-run this cell every time you're running the simulation!\n", "# Create a named tuple for temporary information\n", "# All arrays below hold row indices, thus the default value is -1\n", "\n", "def build_temp_info(signal_info):\n", " return TempInfo(\n", " ts_bar=np.full(len(signal_info.timestamp), -1),\n", " entry_price_bar=np.full(len(signal_info.timestamp), -1),\n", " sl_bar=np.full(signal_info.sl.shape, -1),\n", " tp_bar=np.full(signal_info.tp.shape, -1)\n", " )\n", "\n", "temp_info = build_temp_info(signal_info)" ] }, { "cell_type": "code", "execution_count": null, "id": "e4e9e4fa-218f-4890-b479-b141ebc64e44", "metadata": {}, "outputs": [], "source": [ "# By default, vectorbt initializes an empty order array of the same shape as data\n", "# But since our data is highly granular, it would take a lot of RAM\n", "# Let's limit the number of records to one entry order and the maximum number of SL and TP orders\n", "# It will be applied per column\n", "\n", "max_order_records = 1 + n_sl_levels + n_tp_levels\n", "\n", "print(max_order_records)" ] }, { "cell_type": "code", "execution_count": null, "id": "99c8374f-0c00-490f-917d-f551f5037531", "metadata": {}, "outputs": [], "source": [ "# Perform the actual simulation\n", "# Since we don't broadcast data against any other array, vectorbt doesn't know anything about\n", "# our signal arrays and will simulate only the one column in our data\n", "# Thus, we need to tell it to expand the number of columns by the number of signals using tiling\n", "# But don't worry: thanks to flexible indexing vectorbt won't actually tile the data - good for RAM!\n", "# (it would tile the data if it had multiple columns though!)\n", "\n", "pf = vbt.Portfolio.from_order_func(\n", " data,\n", " order_func_nb=order_func_nb,\n", " order_args=(signal_info, temp_info),\n", " broadcast_kwargs=dict(tile=n_signals), # tiling here\n", " max_order_records=max_order_records,\n", " freq=\"minute\" # we have an irregular one-minute frequency\n", ")\n", "# (may take a minute...)" ] }, { "cell_type": "code", "execution_count": null, "id": "3a17d594-4b6a-48b1-aa18-761b0d6dbca6", "metadata": {}, "outputs": [], "source": [ "# Let's print out the order records in a human-readable format\n", "\n", "print(pf.orders.records_readable)" ] }, { "cell_type": "code", "execution_count": null, "id": "eb299f83-87fe-4367-9f45-3bd089f95491", "metadata": {}, "outputs": [], "source": [ "# We can notice above that there's no information whether an order is an SL or TP order\n", "# What we can do is to create our own order records with custom fields, copy the old ones over,\n", "# and tell the portfolio to use them instead of the default ones\n", "\n", "# First, we need to create an enumerated field for stop types\n", "# SL levels will come first, TP levels second, in an incremental fashion\n", "StopTypeT = namedtuple(\"StopTypeT\", [\n", " *[f\"SL{i + 1}\" for i in range(n_sl_levels)],\n", " *[f\"TP{i + 1}\" for i in range(n_tp_levels)]\n", "])\n", "StopType = StopTypeT(*range(len(StopTypeT._fields)))\n", "\n", "print(StopType)" ] }, { "cell_type": "code", "execution_count": null, "id": "2750453a-1238-4c02-95ba-ddcf5d4b372e", "metadata": {}, "outputs": [], "source": [ "# To extend order records, we just need to append new fields and construct a new data type\n", "\n", "custom_order_dt = np.dtype(vbt.pf_enums.order_fields + [(\"order_type\", np.int_), (\"stop_type\", np.int_)])\n", "\n", "def fix_order_records(order_records, signal_info, temp_info):\n", " # This is a function that will \"fix\" our default records and return the fixed ones\n", " \n", " # Create a new empty record array with the new data type\n", " # Empty here means that the array isn't initialized yet and contains junk data\n", " # Thus, make sure to override each single element\n", " custom_order_records = np.empty(order_records.shape, dtype=custom_order_dt)\n", " \n", " # Copy over the information from our default records\n", " for field, _ in vbt.pf_enums.order_fields:\n", " custom_order_records[field] = order_records[field]\n", " \n", " # Iterate over the new records and fill the stop type\n", " for i in range(len(custom_order_records)):\n", " record = custom_order_records[i]\n", " signal = record[\"col\"] # each column corresponds to a signal\n", " \n", " # Fill the order type\n", " record[\"order_type\"] = signal_info.order_type[signal]\n", " \n", " # Concatenate SL and TP row indices of this signal into a new list\n", " # We must do it the same way as we did in StopTypeT\n", " bar = [\n", " *temp_info.sl_bar[signal],\n", " *temp_info.tp_bar[signal]\n", " ]\n", " \n", " # Check whether the row index of this order is in this list\n", " # (which means that this order is a stop order)\n", " if record[\"idx\"] in bar:\n", " # If so, get the matching position in this list and use it as order type\n", " # It will correspond to a field in StopType\n", " record[\"stop_type\"] = bar.index(record[\"idx\"])\n", " else:\n", " record[\"stop_type\"] = -1\n", " return custom_order_records\n", " \n", "custom_order_records = fix_order_records(pf.order_records, signal_info, temp_info)\n", "print(custom_order_records[:10])" ] }, { "cell_type": "code", "execution_count": null, "id": "71749025-c6bf-4ffd-ba72-27d7d593e1c5", "metadata": {}, "outputs": [], "source": [ "# Having raw order records is not enough as vbt.Orders doesn't know what to do with the new field\n", "# (remember that vbt.Orders is used to analyze the records)\n", "# Let's create our custom class that subclasses vbt.Orders\n", "# and override the field config to also include the information on the new field\n", "\n", "from vectorbtpro.records.decorators import attach_fields, override_field_config\n", "\n", "@attach_fields(dict(stop_type=dict(attach_filters=True)))\n", "@override_field_config(dict(\n", " dtype=custom_order_dt, # specify the new data type\n", " settings=dict(\n", " order_type=dict(\n", " title=\"Order Type\", # specify a human-readable title for the field\n", " mapping=OrderType, # specify the mapper for the field\n", " ),\n", " stop_type=dict(\n", " title=\"Stop Type\", # specify a human-readable title for the field\n", " mapping=StopType, # specify the mapper for the field\n", " ),\n", " )\n", "))\n", "class CustomOrders(vbt.Orders):\n", " pass" ] }, { "cell_type": "code", "execution_count": null, "id": "b9a93714-db59-4ac2-a682-ca785c7158cc", "metadata": {}, "outputs": [], "source": [ "# Finally, let's replace the order records and the class in the portfolio\n", "\n", "pf = pf.replace(order_records=custom_order_records, orders_cls=CustomOrders)" ] }, { "cell_type": "code", "execution_count": null, "id": "7907f9c9-c4f7-48a0-898d-af5ac1f8ac60", "metadata": {}, "outputs": [], "source": [ "# We can now effortlessly analyze the stop type\n", "\n", "print(pf.orders.records_readable)" ] }, { "cell_type": "code", "execution_count": null, "id": "e6a94274-4751-411a-b2eb-2dd8f1a4cf44", "metadata": {}, "outputs": [], "source": [ "# And here are the signals that correspond to these records for verification\n", "\n", "print(signal_data.get())" ] }, { "cell_type": "code", "execution_count": null, "id": "95401889-0e74-49aa-835e-49a74e956c35", "metadata": {}, "outputs": [], "source": [ "# We can see that some signals were skipped, let's remove them from the portfolio\n", "\n", "pf = pf.loc[:, pf.orders.count() >= 1]\n", "\n", "print(len(pf.wrapper.columns))" ] }, { "cell_type": "code", "execution_count": null, "id": "1401064f-b2de-4f6e-921b-4be4a5a72f3a", "metadata": {}, "outputs": [], "source": [ "# There are various ways to analyze the data\n", "# For example, we can count how many times each stop type was triggered\n", "# Since we want to combine all trades in each statistic, we need to provide grouping\n", "\n", "print(pf.orders.stop_type.stats(group_by=True))" ] }, { "cell_type": "code", "execution_count": null, "id": "8fe58a9c-178d-4742-a558-471aa21d3af4", "metadata": {}, "outputs": [], "source": [ "# We can also get the position stats for P&L information\n", "\n", "print(pf.positions.stats(group_by=True))" ] }, { "cell_type": "code", "execution_count": null, "id": "6f3b6a19-c0e0-4d07-8ab0-7961e8229438", "metadata": {}, "outputs": [], "source": [ "# Let's plot a random trade\n", "# The only issue: we have too much data for that (thanks to Plotly)\n", "# Thus, crop it before plotting to remove irrelevant data\n", "\n", "signal = np.random.choice(len(pf.wrapper.columns))\n", "pf.trades.iloc[:, signal].crop().plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "40204477-d5ca-4402-81b5-bb162226f48b", "metadata": {}, "outputs": [], "source": [ "# Let's verify that the entry price stays within each candle\n", "\n", "print(pd.concat((\n", " pf.orders.records_readable[[\"Column\", \"Order Type\", \"Stop Type\", \"Price\"]],\n", " pf.orders.bar_high.to_readable(title=\"High\", only_values=True),\n", " pf.orders.bar_low.to_readable(title=\"Low\", only_values=True),\n", " pf.orders.price_status.to_readable(title=\"Price Status\", only_values=True),\n", "), axis=1))\n", "\n", "print(pf.orders.price_status.stats(group_by=True))" ] }, { "cell_type": "code", "execution_count": null, "id": "729339d4-6653-4d0f-8c80-ce7650655a00", "metadata": {}, "outputs": [], "source": [ "# Now, what if we're interested in portfolio metrics, such as the Sharpe ratio?\n", "# The problem is that most metrics are producing multiple (intermediate) time series \n", "# of the full shape, which is disastrous for RAM since our data will have to be tiled \n", "# by the number of columns. But here's a trick: merge order records of all columns into one, \n", "# as if we did the simulation on just one column!\n", "\n", "def merge_order_records(order_records):\n", " merged_order_records = order_records.copy()\n", " \n", " # New records should have only one column\n", " merged_order_records[\"col\"][:] = 0\n", " \n", " # Sort the records by the timestamp\n", " merged_order_records = merged_order_records[np.argsort(merged_order_records[\"idx\"])]\n", " \n", " # Reset the order ids\n", " merged_order_records[\"id\"][:] = np.arange(len(merged_order_records))\n", " return merged_order_records\n", "\n", "merged_order_records = merge_order_records(custom_order_records)\n", "print(merged_order_records[:10])" ] }, { "cell_type": "code", "execution_count": null, "id": "5499c82d-f517-41a5-928d-b2fe09eb869d", "metadata": {}, "outputs": [], "source": [ "# We also need to change the wrapper because it holds the information on our columns\n", "\n", "merged_wrapper = pf.wrapper.replace(columns=[0], ndim=1)" ] }, { "cell_type": "code", "execution_count": null, "id": "e09f8492-3cf9-467f-8797-7109edeff4ee", "metadata": {}, "outputs": [], "source": [ "# Is there any other array that requires merging?\n", "# Let's introspect the portfolio instance and search for arrays of the full shape\n", "\n", "print(pf)" ] }, { "cell_type": "code", "execution_count": null, "id": "7789f4ab-207a-4de5-a19b-ccd30f2e447a", "metadata": {}, "outputs": [], "source": [ "# There are none, thus replace only the records and the wrapper\n", "# Also, the previous individual portfolios were each using the starting capital of $100\n", "# Which was used by 100%, but since we merge columns together, we now may require less starting capital\n", "# Thus, we will determine it automatically\n", "\n", "merged_pf = pf.replace(\n", " order_records=merged_order_records, \n", " wrapper=merged_wrapper,\n", " init_cash=\"auto\"\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "19abb33d-c964-43a6-9d63-7665beba4987", "metadata": {}, "outputs": [], "source": [ "# We can now get any portfolio statistic\n", "\n", "print(merged_pf.stats())" ] }, { "cell_type": "code", "execution_count": null, "id": "206e69ae-4a0c-46a5-95f2-523ff660bb74", "metadata": {}, "outputs": [], "source": [ "# You may wonder why the win rate and other trade metrics are different here\n", "# There are two reasons: \n", "# 1) portfolio stats uses exit trades (previously we used positions), \n", "# that is, each stop order is a trade\n", "# 2) after merging, there's no more information which order belongs to which trade, \n", "# thus positions are built in a sequential order\n", "\n", "# But to verify that both portfolio match, we can compare to the total profit to the previous trade P&L\n", "print(merged_pf.total_profit)\n", "print(pf.trades.pnl.sum(group_by=True))" ] }, { "cell_type": "code", "execution_count": null, "id": "5b45b35d-9860-4436-be99-00e3dd3b7b0b", "metadata": {}, "outputs": [], "source": [ "# We can now plot the entire portfolio\n", "\n", "merged_pf.resample(\"daily\").plot().show_svg()" ] }, { "cell_type": "code", "execution_count": null, "id": "eff09d0c-c36f-4f21-97e7-41240f3f07e2", "metadata": {}, "outputs": [], "source": [ "# The main issue with using from_order_func is that we need to go over the entire data \n", "# as many times as there are signals because the order function is run on single each element\n", "# A far more time-efficient approach would be processing trades in a sequential order\n", "# This is easily possible because our trades are perfectly sorted - we don't need\n", "# to process a signal if the previous signal hasn't been processed yet\n", "# Also, because the scope of this notebook assumes that signals are independent, \n", "# we can simulate them independently and stop each signal's simulation once its position has been closed out\n", "# This is only possible by writing an own simulator (which isn't as scary as it sounds!)\n", "\n", "# To avoid duplicating our signal logic, we will re-use order_func_nb by passing our own limited context\n", "# It will consist only of the fields that are required by our order_func_nb\n", "\n", "OrderContext = namedtuple(\"OrderContext\", [\n", " \"i\",\n", " \"col\",\n", " \"index\",\n", " \"open\", \n", " \"high\",\n", " \"low\",\n", " \"close\",\n", " \"last_position\"\n", "])" ] }, { "cell_type": "code", "execution_count": null, "id": "2049c971-000a-46e8-a477-c012a664e3fb", "metadata": {}, "outputs": [], "source": [ "# Let's build the simulator\n", "# Technically, it's just a regular Numba function that does whatever we want\n", "# What's special about it is that it calls the vectorbt's low-level API to place orders and \n", "# updates the simulation state such as cash balances and positions\n", "# We'll first determine the bars where the signals happen, and then run a smaller simulation\n", "# on the first signal. Once the signal's position has been closed out, we'll terminate the simulation\n", "# and continue with the next signal, until all signals are processed.\n", "\n", "@njit(boundscheck=True)\n", "def signal_simulator_nb(\n", " index, \n", " open, \n", " high, \n", " low, \n", " close, \n", " signal_info,\n", " temp_info\n", "):\n", " # Determine the number of signals, levels, and potential orders\n", " n_signals = len(signal_info.timestamp)\n", " n_sl_levels = signal_info.sl.shape[1]\n", " n_tp_levels = signal_info.tp.shape[1]\n", " max_order_records = 1 + n_sl_levels + n_tp_levels\n", " \n", " # Temporary arrays\n", " \n", " # This array will hold the bar where each signal happens\n", " signal_bars = np.full(n_signals, -1, dtype=np.int_)\n", " \n", " # This array will hold order records\n", " # Initially, order records are uninitialized (junk data) but we will fill them gradually\n", " # Notice how we use our own data type custom_order_dt - we can fill order type and stop type \n", " # fields right during the simulation\n", " order_records = np.empty((max_order_records, n_signals), dtype=custom_order_dt)\n", " \n", " # To be able to distinguish between uninitialized and initialized (filled) orders,\n", " # we'll create another array holding the number of filled orders for each signal\n", " # For example, if order_records has a maximum of 6 rows and only one record is filled,\n", " # order_counts will be 1 for this signal, so vectorbt can remove 5 unfilled orders later\n", " order_counts = np.full(n_signals, 0, dtype=np.int_)\n", " \n", " # order_func_nb requires last_position, which holds the position of each signal\n", " last_position = np.full(n_signals, 0.0, dtype=np.float_)\n", " \n", " # First, we need to determine the bars where the signals happen\n", " # Even though we know their timestamps, we need to translate them into absolute indices\n", " signal = 0\n", " bar = 0\n", " while signal < n_signals and bar < len(index):\n", " if index[bar] == signal_info.timestamp[signal]:\n", " # If there's a match, save the bar and continue with the next signal on the next bar\n", " signal_bars[signal] = bar\n", " signal += 1\n", " bar += 1\n", " elif index[bar] > signal_info.timestamp[signal]:\n", " # If we're past the signal, continue with the next signal on the same bar\n", " signal += 1\n", " else:\n", " # If we haven't hit the signal yet, continue on the next bar\n", " bar += 1\n", "\n", " # Once we know the bars, we can iterate over signals in a loop and simulate them independently\n", " for signal in range(n_signals):\n", " \n", " # If there was no match in the previous level, skip the simulation\n", " from_bar = signal_bars[signal]\n", " if from_bar == -1:\n", " continue\n", " \n", " # This is our initial execution state, which holds the most important balances\n", " # We'll start with a starting capital of $100\n", " exec_state = vbt.pf_enums.ExecState(\n", " cash=100.0,\n", " position=0.0,\n", " debt=0.0,\n", " locked_cash=0.0,\n", " free_cash=100.0,\n", " val_price=np.nan,\n", " value=np.nan\n", " )\n", " \n", " # Here comes the actual simulation that starts from the signal's bar and\n", " # ends either once we processed all bars or once the position has been closed out (see below)\n", " for bar in range(from_bar, len(index)):\n", " \n", " # Create a named tuple holding the current context (this is \"c\" in order_func_nb)\n", " c = OrderContext( \n", " i=bar,\n", " col=signal,\n", " index=index,\n", " open=open,\n", " high=high,\n", " low=low,\n", " close=close,\n", " last_position=last_position,\n", " )\n", " \n", " # If the first bar has no data, skip the simulation\n", " if bar == from_bar and not has_data_nb(c):\n", " break\n", "\n", " # Price area holds the OHLC of the current bar\n", " price_area = vbt.pf_enums.PriceArea(\n", " vbt.flex_select_nb(open, bar, signal), \n", " vbt.flex_select_nb(high, bar, signal), \n", " vbt.flex_select_nb(low, bar, signal), \n", " vbt.flex_select_nb(close, bar, signal)\n", " )\n", " \n", " # Why do we need to redefine the execution state?\n", " # Because we need to manually update the valuation price and the value of the column\n", " # to be able to use complex size types such as target percentages\n", " # As in order_func_nb, we will use the opening price as the valuation price\n", " # Why doesn't vectorbt do it on its own? Because it doesn't know anything\n", " # about other columns. For example, imagine having a grouped simulation with 100 columns sharing\n", " # the same cash: using the formula below wouldn't consider the positions of other 99 columns.\n", " exec_state = vbt.pf_enums.ExecState(\n", " cash=exec_state.cash,\n", " position=exec_state.position,\n", " debt=exec_state.debt,\n", " locked_cash=exec_state.locked_cash,\n", " free_cash=exec_state.free_cash,\n", " val_price=price_area.open,\n", " value=exec_state.cash + price_area.open * exec_state.position\n", " )\n", " \n", " # Let's run the order function, which returns an order\n", " # Remember when we used order_nothing_nb()? It also returns an order but with filled with nans\n", " order = order_func_nb(c, signal_info, temp_info)\n", " \n", " # Here's the main function in the entire simulation, which 1) executes the order,\n", " # 2) updates the execution state, and 3) updates the order_records and order_counts\n", " order_result, exec_state = vbt.pf_nb.process_order_nb(\n", " signal, \n", " signal, \n", " bar,\n", " exec_state=exec_state,\n", " order=order,\n", " price_area=price_area,\n", " order_records=order_records,\n", " order_counts=order_counts\n", " )\n", " \n", " # If the order was successful (i.e., it's now in order_records),\n", " # we need to manually set the order type and stop type\n", " if order_result.status == vbt.pf_enums.OrderStatus.Filled:\n", " \n", " # Use this line to get the last order of any signal\n", " filled_order = order_records[order_counts[signal] - 1, signal]\n", " \n", " # Fill the order type\n", " filled_order[\"order_type\"] = signal_info.order_type[signal]\n", " \n", " # Fill the stop type by going through the SL and TP levels and checking whether \n", " # the order bar matches the level bar\n", " order_is_stop = False\n", " for k in range(n_sl_levels):\n", " if filled_order[\"idx\"] == temp_info.sl_bar[signal, k]:\n", " filled_order[\"stop_type\"] = k\n", " order_is_stop = True\n", " break\n", " for k in range(n_tp_levels):\n", " if filled_order[\"idx\"] == temp_info.tp_bar[signal, k]:\n", " filled_order[\"stop_type\"] = n_sl_levels + k # TP indices come after SL indices\n", " order_is_stop = True\n", " break\n", " \n", " # If order bar hasn't been matched, it's not a stop order\n", " if not order_is_stop:\n", " filled_order[\"stop_type\"] = -1\n", " \n", " # If we're not in position after an entry anymore, terminate the simulation\n", " if temp_info.entry_price_bar[signal] != -1:\n", " if exec_state.position == 0:\n", " break\n", " \n", " # Don't forget to update the position array\n", " last_position[signal] = exec_state.position\n", " \n", " # Remove uninitialized order records and flatten 2d array into a 1d array\n", " return vbt.nb.repartition_nb(order_records, order_counts)" ] }, { "cell_type": "code", "execution_count": null, "id": "a0b0f696-b9b6-4b72-9c8c-69d0ed8acf5c", "metadata": {}, "outputs": [], "source": [ "# Numba requires arrays in a NumPy format, and to avoid preparing them each time,\n", "# let's create a function that only takes the data and signal information, and does everything else for us\n", "\n", "def signal_simulator(data, signal_info):\n", " temp_info = build_temp_info(signal_info)\n", " \n", " custom_order_records = signal_simulator_nb(\n", " index=data.index.vbt.to_ns(), # convert to nanoseconds\n", " open=vbt.to_2d_array(data.open), # flexible indexing requires inputs to be 2d\n", " high=vbt.to_2d_array(data.high),\n", " low=vbt.to_2d_array(data.low),\n", " close=vbt.to_2d_array(data.close),\n", " signal_info=signal_info,\n", " temp_info=temp_info\n", " )\n", " \n", " # We have order records, what's left is wrapping them with a Portfolio\n", " # Required are three things: 1) array wrapper with index and columns, 2) order records, and 3) prices\n", " # We also need to specify the starting capital that we used during the simulation\n", " return vbt.Portfolio(\n", " wrapper=vbt.ArrayWrapper(\n", " index=data.index, \n", " columns=range(len(signal_info.timestamp)), # one column per signal\n", " freq=\"minute\"\n", " ),\n", " order_records=custom_order_records,\n", " open=data.open,\n", " high=data.high,\n", " low=data.low,\n", " close=data.close,\n", " init_cash=100.0,\n", " orders_cls=CustomOrders\n", " )\n", "\n", "# That's it!\n", "pf = signal_simulator(data, signal_info)\n", "\n", "print(pf.trades.pnl.sum(group_by=True))" ] }, { "cell_type": "code", "execution_count": null, "id": "c4532223-7171-47d1-a99a-41d13d617239", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }