vectorized aggregator, minor changes

This commit is contained in:
David Brazda
2024-05-17 14:04:48 +02:00
parent 9e7d974ebd
commit dc46ab2b49
17 changed files with 36066 additions and 41746 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,316 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Loading trades and vectorized aggregation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from numba import jit\n",
"from alpaca.data.historical import StockHistoricalDataClient\n",
"from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n",
"from alpaca.data.requests import StockTradesRequest\n",
"from v2realbot.enums.enums import BarType\n",
"import time\n",
"\n",
"from datetime import datetime\n",
"from v2realbot.utils.utils import parse_alpaca_timestamp, ltp, zoneNY, send_to_telegram, fetch_calendar_data\n",
"import pyarrow\n",
"from v2realbot.loader.aggregator_vectorized import fetch_daily_stock_trades, fetch_trades_parallel, generate_time_bars_nb, aggregate_trades\n",
"import vectorbtpro as vbt\n",
"\n",
"vbt.settings.set_theme(\"dark\")\n",
"vbt.settings['plotting']['layout']['width'] = 1280\n",
"vbt.settings.plotting.auto_rangebreaks = True\n",
"# Set the option to display with pagination\n",
"pd.set_option('display.notebook_repr_html', True)\n",
"pd.set_option('display.max_rows', 10) # Number of rows per page"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"symbol = \"SPY\"\n",
"#datetime in zoneNY \n",
"day_start = datetime(2024, 5, 15, 9, 30, 0)\n",
"day_stop = datetime(2024, 5, 16, 16, 00, 0)\n",
"day_start = zoneNY.localize(day_start)\n",
"day_stop = zoneNY.localize(day_stop)\n",
"#neslo by zrychlit, kdyz se zobrazuje pomalu Searching cache - nejaky bottle neck?\n",
"df = fetch_trades_parallel(symbol, day_start, day_stop, minsize=50) #exclude_conditions=['C','O','4','B','7','V','P','W','U','Z','F'])\n",
"ohlcv_df = aggregate_trades(symbol=symbol, trades_df=df, resolution=1, type=BarType.TIME)\n",
"#df.info()\n",
"ohlcv_df\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"basic_data = vbt.Data.from_data(vbt.symbol_dict({symbol: ohlcv_df}), tz_convert=zoneNY)\n",
"vbt.settings['plotting']['auto_rangebreaks'] = True\n",
"basic_data.ohlcv.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n",
"import gzip\n",
"\n",
"file_path = f\"{DATA_DIR}/tradecache/BAC-1709044200-1709067600.cache.gz\"\n",
"\n",
"with gzip.open(file_path, 'rb') as fp:\n",
" tradesResponse = pickle.load(fp)\n",
"\n",
"tradesResponse"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def convert_dict_to_multiindex_df(tradesResponse):\n",
" # Create a DataFrame for each key and add the key as part of the MultiIndex\n",
" dfs = []\n",
" for key, values in tradesResponse.items():\n",
" df = pd.DataFrame(values)\n",
" # Rename columns\n",
" # Select and order columns explicitly\n",
" #print(df)\n",
" df = df[['t', 'x', 'p', 's', 'i', 'c','z']]\n",
" df.rename(columns={'t': 'timestamp', 'c': 'conditions', 'p': 'price', 's': 'size', 'x': 'exchange', 'z':'tape', 'i':'id'}, inplace=True)\n",
" df['symbol'] = key # Add ticker as a column\n",
" df['timestamp'] = pd.to_datetime(df['timestamp']) # Convert 't' from string to datetime before setting it as an index\n",
" df.set_index(['symbol', 'timestamp'], inplace=True) # Set the multi-level index using both 'ticker' and 't'\n",
" df = df.tz_convert(zoneNY, level='timestamp')\n",
" dfs.append(df)\n",
"\n",
" # Concatenate all DataFrames into a single DataFrame with MultiIndex\n",
" final_df = pd.concat(dfs)\n",
"\n",
" return final_df\n",
"\n",
"# Convert and print the DataFrame\n",
"df = convert_dict_to_multiindex_df(tradesResponse)\n",
"df\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_df = aggregate_trades(symbol=symbol, trades_df=df, resolution=1000, type=\"dollar\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_df.index.strftime('%Y-%m-%d %H').unique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#ohlcv_df.groupby(ohlcv_df.index.date).size()\n",
"ohlcv_df.head(100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#access just BCA\n",
"df_filtered = df.loc[\"BAC\"]\n",
"\n",
"df_filtered.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_filtered= df_filtered.reset_index()\n",
"ticks = df_filtered[['timestamp', 'price', 'size']].to_numpy()\n",
"ticks\n",
"timestamps = ticks[:, 0]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_filtered= df_filtered.reset_index()\n",
"ticks = df_filtered[['timestamp', 'price', 'size']].to_numpy()\n",
"\n",
"#timestamp to integer\n",
"# Extract the timestamps column (assuming it's the first column)\n",
"timestamps = ticks[:, 0]\n",
"\n",
"# Convert the timestamps to Unix timestamps in seconds with microsecond precision\n",
"unix_timestamps_s = np.array([ts.timestamp() for ts in timestamps], dtype='float64')\n",
"\n",
"# Replace the original timestamps in the NumPy array with the converted Unix timestamps\n",
"ticks[:, 0] = unix_timestamps_s\n",
"\n",
"#ticks[:, 0] = pd.to_datetime(ticks[:, 0]).astype('int64') // 1_000_000_000 # Convert to Unix timestamp\n",
"ticks\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ticks = ticks.astype(np.float64)\n",
"ticks"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"resolution = 1 # Example resolution of 60 seconds\n",
"ohlcv_bars = generate_time_bars_nb(ticks, resolution)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_bars"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Convert the resulting array back to a DataFrame\n",
"columns = ['time', 'open', 'high', 'low', 'close', 'volume', 'trades']\n",
"ohlcv_df = pd.DataFrame(ohlcv_bars, columns=columns)\n",
"ohlcv_df['time'] = pd.to_datetime(ohlcv_df['time'], unit='s')\n",
"ohlcv_df.set_index('time', inplace=True)\n",
"ohlcv_df.index = ohlcv_df.index.tz_localize('UTC').tz_convert(zoneNY)\n",
"#ohlcv_df = ohlcv_df.loc[\"2024-03-1 15:50:00\":\"2024-03-28 13:40:00\"]\n",
"#ohlcv_df.index.strftime('%Y-%m-%d %H').unique()\n",
"\n",
"ohlcv_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

26673
research/rsi_alpaca.ipynb Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

421
research/test1sbars.ipynb Normal file
View File

@ -0,0 +1,421 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from v2realbot.tools.loadbatch import load_batch\n",
"from v2realbot.utils.utils import zoneNY\n",
"import pandas as pd\n",
"import numpy as np\n",
"import vectorbtpro as vbt\n",
"from itables import init_notebook_mode, show\n",
"\n",
"init_notebook_mode(all_interactive=True)\n",
"\n",
"vbt.settings.set_theme(\"dark\")\n",
"vbt.settings['plotting']['layout']['width'] = 1280\n",
"vbt.settings.plotting.auto_rangebreaks = True\n",
"# Set the option to display with pagination\n",
"pd.set_option('display.notebook_repr_html', True)\n",
"pd.set_option('display.max_rows', 10) # Number of rows per page\n",
"\n",
"res, df = load_batch(batch_id=\"0fb5043a\", #46 days 1.3 - 6.5.\n",
" space_resolution_evenly=False,\n",
" indicators_columns=[\"Rsi14\"],\n",
" main_session_only=True,\n",
" verbose = False)\n",
"if res < 0:\n",
" print(\"Error\" + str(res) + str(df))\n",
"df = df[\"bars\"]\n",
"\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# filter dates"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#filter na dny\n",
"# dates_of_interest = pd.to_datetime(['2024-04-22', '2024-04-23']).tz_localize('US/Eastern')\n",
"# filtered_df = df.loc[df.index.normalize().isin(dates_of_interest)]\n",
"\n",
"# df = filtered_df\n",
"# df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import plotly.io as pio\n",
"pio.renderers.default = 'notebook'\n",
"\n",
"#naloadujeme do vbt symbol as column\n",
"basic_data = vbt.Data.from_data({\"BAC\": df}, tz_convert=zoneNY)\n",
"start_date = pd.Timestamp('2024-03-12 09:30', tz=zoneNY)\n",
"end_date = pd.Timestamp('2024-03-13 16:00', tz=zoneNY)\n",
"\n",
"#basic_data = basic_data.transform(lambda df: df[df.index.date == start_date.date()])\n",
"#basic_data = basic_data.transform(lambda df: df[(df.index >= start_date) & (df.index <= end_date)])\n",
"#basic_data.data[\"BAC\"].info()\n",
"\n",
"# fig = basic_data.plot(plot_volume=False)\n",
"# pivot_info = basic_data.run(\"pivotinfo\", up_th=0.003, down_th=0.002)\n",
"# #pivot_info.plot()\n",
"# pivot_info.plot(fig=fig, conf_value_trace_kwargs=dict(visible=True))\n",
"# fig.show()\n",
"\n",
"\n",
"# rsi14 = basic_data.data[\"BAC\"][\"Rsi14\"].rename(\"Rsi14\")\n",
"\n",
"# rsi14.vbt.plot().show()\n",
"#basic_data.xloc[\"09:30\":\"10:00\"].data[\"BAC\"].vbt.ohlcv.plot().show()\n",
"\n",
"vbt.settings.plotting.auto_rangebreaks = True\n",
"#basic_data.data[\"BAC\"].vbt.ohlcv.plot()\n",
"\n",
"#basic_data.data[\"BAC\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n",
"\n",
"m1_data.data[\"BAC\"]\n",
"#m5_data = m1_data.resample(\"5T\")\n",
"\n",
"#m5_data.data[\"BAC\"].head(10)\n",
"\n",
"# m15_data = m1_data.resample(\"15T\")\n",
"\n",
"# m15 = m15_data.data[\"BAC\"]\n",
"\n",
"# m15.vbt.ohlcv.plot()\n",
"\n",
"# m1_data.wrapper.index\n",
"\n",
"# m1_resampler = m1_data.wrapper.get_resampler(\"1T\")\n",
"# m1_resampler.index_difference(reverse=True)\n",
"\n",
"\n",
"# m5_resampler.prettify()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# defining ENTRY WINDOW and forced EXIT window"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#m1_data.data[\"BAC\"].info()\n",
"import datetime\n",
"# Define the market open and close times\n",
"market_open = datetime.time(9, 30)\n",
"market_close = datetime.time(16, 0)\n",
"entry_window_opens = 1\n",
"entry_window_closes = 350\n",
"\n",
"forced_exit_start = 380\n",
"forced_exit_end = 390\n",
"\n",
"forced_exit = m1_data.symbol_wrapper.fill(False)\n",
"entry_window_open= m1_data.symbol_wrapper.fill(False)\n",
"\n",
"# Calculate the time difference in minutes from market open for each timestamp\n",
"elapsed_min_from_open = (forced_exit.index.hour - market_open.hour) * 60 + (forced_exit.index.minute - market_open.minute)\n",
"\n",
"entry_window_open[(elapsed_min_from_open >= entry_window_opens) & (elapsed_min_from_open < entry_window_closes)] = True\n",
"forced_exit[(elapsed_min_from_open >= forced_exit_start) & (elapsed_min_from_open < forced_exit_end)] = True\n",
"\n",
"#entry_window_open.info()\n",
"# forced_exit.tail(100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"close = m1_data.close\n",
"\n",
"rsi = vbt.RSI.run(close, window=14)\n",
"\n",
"long_entries = (rsi.rsi.vbt.crossed_below(20) & entry_window_open)\n",
"long_exits = (rsi.rsi.vbt.crossed_above(70) | forced_exit)\n",
"#long_entries.info()\n",
"#number of trues and falses in long_entries\n",
"long_entries.value_counts()\n",
"#long_exits.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def plot_rsi(rsi, close, entries, exits):\n",
" fig = vbt.make_subplots(rows=1, cols=1, shared_xaxes=True, specs=[[{\"secondary_y\": True}]], vertical_spacing=0.02, subplot_titles=(\"RSI\", \"Price\" ))\n",
" close.vbt.plot(fig=fig, add_trace_kwargs=dict(secondary_y=True))\n",
" rsi.plot(fig=fig, add_trace_kwargs=dict(secondary_y=False))\n",
" entries.vbt.signals.plot_as_entries(rsi.rsi, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n",
" exits.vbt.signals.plot_as_exits(rsi.rsi, fig=fig, add_trace_kwargs=dict(secondary_y=False)) \n",
" return fig\n",
"\n",
"plot_rsi(rsi, close, long_entries, long_exits)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"vbt.phelp(vbt.Portfolio.from_signals)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sl_stop = np.arange(0.03/100, 0.2/100, 0.02/100).tolist()\n",
"# Using the round function\n",
"sl_stop = [round(val, 4) for val in sl_stop]\n",
"print(sl_stop)\n",
"sl_stop = vbt.Param(sl_stop) #np.nan mean s no stoploss\n",
"\n",
"pf = vbt.Portfolio.from_signals(close=close, entries=long_entries, sl_stop=sl_stop, tp_stop = sl_stop, exits=long_exits,fees=0.0167/100, freq=\"1s\") #sl_stop=sl_stop, tp_stop = sl_stop, \n",
"\n",
"#pf.stats()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pf.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pf[(0.0015,0.0013)].plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pf[0.03].plot_trade_signals()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# pristup k pf jako multi index"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#pf[0.03].plot()\n",
"#pf.order_records\n",
"pf[(0.03)].stats()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#zgrupovane statistiky\n",
"stats_df = pf.stats([\n",
" 'total_return',\n",
" 'total_trades',\n",
" 'win_rate',\n",
" 'expectancy'\n",
"], agg_func=None)\n",
"stats_df\n",
"\n",
"\n",
"stats_df.nlargest(50, 'Total Return [%]')\n",
"#stats_df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pf[(0.0011,0.0013)].plot()\n",
"\n",
"#pf[(0.0011,0.0013000000000000002)].plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pandas.tseries.offsets import DateOffset\n",
"\n",
"temp_data = basic_data['2024-4-22']\n",
"temp_data\n",
"res1m = temp_data[[\"Open\", \"High\", \"Low\", \"Close\", \"Volume\"]]\n",
"\n",
"# Define a custom date offset that starts at 9:30 AM and spans 4 hours\n",
"custom_offset = DateOffset(hours=4, minutes=30)\n",
"\n",
"# res1m = res1m.get().resample(\"4H\").agg({ \n",
"# \"Open\": \"first\",\n",
"# \"High\": \"max\",\n",
"# \"Low\": \"min\",\n",
"# \"Close\": \"last\",\n",
"# \"Volume\": \"sum\"\n",
"# })\n",
"\n",
"res4h = res1m.resample(\"1h\", resample_kwargs=dict(origin=\"start\"))\n",
"\n",
"res4h.data\n",
"\n",
"res15m = res1m.resample(\"15T\", resample_kwargs=dict(origin=\"start\"))\n",
"\n",
"res15m.data[\"BAC\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"@vbt.njit\n",
"def long_entry_place_func_nb(c, low, close, time_in_ns, rsi14, window_open, window_close):\n",
" market_open_minutes = 570 # 9 hours * 60 minutes + 30 minutes\n",
"\n",
" for out_i in range(len(c.out)):\n",
" i = c.from_i + out_i\n",
"\n",
" current_minutes = vbt.dt_nb.hour_nb(time_in_ns[i]) * 60 + vbt.dt_nb.minute_nb(time_in_ns[i])\n",
" #print(\"current_minutes\", current_minutes)\n",
" # Calculate elapsed minutes since market open at 9:30 AM\n",
" elapsed_from_open = current_minutes - market_open_minutes\n",
" elapsed_from_open = elapsed_from_open if elapsed_from_open >= 0 else 0\n",
" #print( \"elapsed_from_open\", elapsed_from_open)\n",
"\n",
" #elapsed_from_open = elapsed_minutes_from_open_nb(time_in_ns) \n",
" in_window = elapsed_from_open > window_open and elapsed_from_open < window_close\n",
" #print(\"in_window\", in_window)\n",
" # if in_window:\n",
" # print(\"in window\")\n",
"\n",
" if in_window and rsi14[i] > 60: # and low[i, c.col] <= hit_price: # and hour == 9: # (4)!\n",
" return out_i\n",
" return -1\n",
"\n",
"@vbt.njit\n",
"def long_exit_place_func_nb(c, high, close, time_index, tp, sl): # (5)!\n",
" entry_i = c.from_i - c.wait\n",
" entry_price = close[entry_i, c.col]\n",
" hit_price = entry_price * (1 + tp)\n",
" stop_price = entry_price * (1 - sl)\n",
" for out_i in range(len(c.out)):\n",
" i = c.from_i + out_i\n",
" last_bar_of_day = vbt.dt_nb.day_changed_nb(time_index[i], time_index[i + 1])\n",
"\n",
" #print(next_day)\n",
" if last_bar_of_day: #pokud je dalsi next day, tak zavirame posledni\n",
" print(\"ted\",out_i)\n",
" return out_i\n",
" if close[i, c.col] >= hit_price or close[i, c.col] <= stop_price :\n",
" return out_i\n",
" return -1\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(np.random.random(size=(5, 10)), columns=list('abcdefghij'))\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.sum()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

View File

@ -1,620 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import pyarrow\n",
"import numpy as np\n",
"from numba import jit\n",
"import v2realbot.utils.config_handler as cfh"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Další info k pokračování je zde https://blog.quantinsti.com/tick-tick-ohlc-data-pandas-tutorial/"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"DatetimeIndex: 190261 entries, 2024-04-22 13:30:00.267711+00:00 to 2024-04-22 19:59:59.987614+00:00\n",
"Data columns (total 6 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 exchange 190261 non-null object \n",
" 1 price 190261 non-null float64\n",
" 2 size 190261 non-null float64\n",
" 3 id 190261 non-null int64 \n",
" 4 conditions 190261 non-null object \n",
" 5 tape 190261 non-null object \n",
"dtypes: float64(2), int64(1), object(3)\n",
"memory usage: 10.2+ MB\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>exchange</th>\n",
" <th>price</th>\n",
" <th>size</th>\n",
" <th>id</th>\n",
" <th>conditions</th>\n",
" <th>tape</th>\n",
" </tr>\n",
" <tr>\n",
" <th>timestamp</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2024-04-22 13:30:00.267711+00:00</th>\n",
" <td>K</td>\n",
" <td>36.890</td>\n",
" <td>5.0</td>\n",
" <td>52983525037630</td>\n",
" <td>[ , F, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 13:30:00.300501+00:00</th>\n",
" <td>D</td>\n",
" <td>37.005</td>\n",
" <td>1.0</td>\n",
" <td>71675241117014</td>\n",
" <td>[ , I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 13:30:00.305439+00:00</th>\n",
" <td>D</td>\n",
" <td>37.005</td>\n",
" <td>1.0</td>\n",
" <td>71675241117496</td>\n",
" <td>[ , I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 13:30:00.314520+00:00</th>\n",
" <td>D</td>\n",
" <td>37.005</td>\n",
" <td>1.0</td>\n",
" <td>71675241118034</td>\n",
" <td>[ , I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 13:30:00.335201+00:00</th>\n",
" <td>D</td>\n",
" <td>37.005</td>\n",
" <td>1.0</td>\n",
" <td>71675241121369</td>\n",
" <td>[ , I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 19:59:59.902614+00:00</th>\n",
" <td>V</td>\n",
" <td>37.750</td>\n",
" <td>1100.0</td>\n",
" <td>56480705310575</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 19:59:59.977134+00:00</th>\n",
" <td>N</td>\n",
" <td>37.745</td>\n",
" <td>300.0</td>\n",
" <td>52983559963478</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 19:59:59.977137+00:00</th>\n",
" <td>N</td>\n",
" <td>37.740</td>\n",
" <td>7300.0</td>\n",
" <td>52983559963696</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 19:59:59.978626+00:00</th>\n",
" <td>V</td>\n",
" <td>37.750</td>\n",
" <td>16.0</td>\n",
" <td>56480706886228</td>\n",
" <td>[ , I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 19:59:59.987614+00:00</th>\n",
" <td>N</td>\n",
" <td>37.745</td>\n",
" <td>30.0</td>\n",
" <td>52983559963958</td>\n",
" <td>[ , I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>190261 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" exchange price size id \\\n",
"timestamp \n",
"2024-04-22 13:30:00.267711+00:00 K 36.890 5.0 52983525037630 \n",
"2024-04-22 13:30:00.300501+00:00 D 37.005 1.0 71675241117014 \n",
"2024-04-22 13:30:00.305439+00:00 D 37.005 1.0 71675241117496 \n",
"2024-04-22 13:30:00.314520+00:00 D 37.005 1.0 71675241118034 \n",
"2024-04-22 13:30:00.335201+00:00 D 37.005 1.0 71675241121369 \n",
"... ... ... ... ... \n",
"2024-04-22 19:59:59.902614+00:00 V 37.750 1100.0 56480705310575 \n",
"2024-04-22 19:59:59.977134+00:00 N 37.745 300.0 52983559963478 \n",
"2024-04-22 19:59:59.977137+00:00 N 37.740 7300.0 52983559963696 \n",
"2024-04-22 19:59:59.978626+00:00 V 37.750 16.0 56480706886228 \n",
"2024-04-22 19:59:59.987614+00:00 N 37.745 30.0 52983559963958 \n",
"\n",
" conditions tape \n",
"timestamp \n",
"2024-04-22 13:30:00.267711+00:00 [ , F, I] A \n",
"2024-04-22 13:30:00.300501+00:00 [ , I] A \n",
"2024-04-22 13:30:00.305439+00:00 [ , I] A \n",
"2024-04-22 13:30:00.314520+00:00 [ , I] A \n",
"2024-04-22 13:30:00.335201+00:00 [ , I] A \n",
"... ... ... \n",
"2024-04-22 19:59:59.902614+00:00 [ ] A \n",
"2024-04-22 19:59:59.977134+00:00 [ ] A \n",
"2024-04-22 19:59:59.977137+00:00 [ ] A \n",
"2024-04-22 19:59:59.978626+00:00 [ , I] A \n",
"2024-04-22 19:59:59.987614+00:00 [ , I] A \n",
"\n",
"[190261 rows x 6 columns]"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tdf=pd.read_parquet('trades_bac.parquet',engine='pyarrow')\n",
"#print(df)\n",
"df = tdf.loc['BAC']\n",
"df.info()\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"@jit(nopython=True)\n",
"def ohlcv_bars(ticks, start_time, end_time, resolution):\n",
" \"\"\"\n",
" Generate OHLCV bars from tick data, skipping intervals without trading activity.\n",
" \n",
" Parameters:\n",
" - ticks: numpy array with columns [timestamp, price, size]\n",
" - start_time: the start timestamp for bars (Unix timestamp)\n",
" - end_time: the end timestamp for bars (Unix timestamp)\n",
" - resolution: time resolution in seconds\n",
" \n",
" Returns:\n",
" - OHLCV bars as a numpy array\n",
" \"\"\"\n",
" num_bars = (end_time - start_time) // resolution + 1\n",
" bar_list = []\n",
"\n",
" for i in range(num_bars):\n",
" bar_start_time = start_time + i * resolution\n",
" bar_end_time = bar_start_time + resolution\n",
" bar_ticks = ticks[(ticks[:, 0] >= bar_start_time) & (ticks[:, 0] < bar_end_time)]\n",
" \n",
" if bar_ticks.shape[0] == 0:\n",
" continue # Skip this bar as there are no ticks\n",
"\n",
" # Calculate OHLCV values\n",
" open_price = bar_ticks[0, 1] # open\n",
" high_price = np.max(bar_ticks[:, 1]) # high\n",
" low_price = np.min(bar_ticks[:, 1]) # low\n",
" close_price = bar_ticks[-1, 1] # close\n",
" volume = np.sum(bar_ticks[:, 2]) # volume\n",
" bar_time = bar_start_time # timestamp for the bar\n",
"\n",
" bar_list.append([open_price, high_price, low_price, close_price, volume, bar_time])\n",
"\n",
" # Convert list to numpy array\n",
" if bar_list:\n",
" ohlcv = np.array(bar_list)\n",
" else:\n",
" ohlcv = np.empty((0, 6)) # return an empty array if no bars were created\n",
"\n",
" return ohlcv\n"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"DatetimeIndex: 190261 entries, 2024-04-22 13:30:00.267711+00:00 to 2024-04-22 19:59:59.987614+00:00\n",
"Data columns (total 6 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 exchange 190261 non-null object \n",
" 1 price 190261 non-null float64\n",
" 2 size 190261 non-null float64\n",
" 3 id 190261 non-null int64 \n",
" 4 conditions 190261 non-null object \n",
" 5 tape 190261 non-null object \n",
"dtypes: float64(2), int64(1), object(3)\n",
"memory usage: 10.2+ MB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['C', 'O', '4', 'B', '7', 'V', 'P', 'W', 'U', 'Z', 'F']\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"DatetimeIndex: 143751 entries, 2024-04-22 13:30:00.300501+00:00 to 2024-04-22 19:59:59.987614+00:00\n",
"Data columns (total 6 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 exchange 143751 non-null object \n",
" 1 price 143751 non-null float64\n",
" 2 size 143751 non-null float64\n",
" 3 id 143751 non-null int64 \n",
" 4 conditions 143751 non-null object \n",
" 5 tape 143751 non-null object \n",
"dtypes: float64(2), int64(1), object(3)\n",
"memory usage: 7.7+ MB\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>exchange</th>\n",
" <th>price</th>\n",
" <th>size</th>\n",
" <th>id</th>\n",
" <th>conditions</th>\n",
" <th>tape</th>\n",
" </tr>\n",
" <tr>\n",
" <th>timestamp</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2024-04-22 13:30:00.300501+00:00</th>\n",
" <td>D</td>\n",
" <td>37.005</td>\n",
" <td>1.0</td>\n",
" <td>71675241117014</td>\n",
" <td>[ , I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 13:30:00.305439+00:00</th>\n",
" <td>D</td>\n",
" <td>37.005</td>\n",
" <td>1.0</td>\n",
" <td>71675241117496</td>\n",
" <td>[ , I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 13:30:00.314520+00:00</th>\n",
" <td>D</td>\n",
" <td>37.005</td>\n",
" <td>1.0</td>\n",
" <td>71675241118034</td>\n",
" <td>[ , I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 13:30:00.335201+00:00</th>\n",
" <td>D</td>\n",
" <td>37.005</td>\n",
" <td>1.0</td>\n",
" <td>71675241121369</td>\n",
" <td>[ , I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 13:30:00.346219+00:00</th>\n",
" <td>D</td>\n",
" <td>37.005</td>\n",
" <td>1.0</td>\n",
" <td>71675241122389</td>\n",
" <td>[ , I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 19:59:59.902614+00:00</th>\n",
" <td>V</td>\n",
" <td>37.750</td>\n",
" <td>1100.0</td>\n",
" <td>56480705310575</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 19:59:59.977134+00:00</th>\n",
" <td>N</td>\n",
" <td>37.745</td>\n",
" <td>300.0</td>\n",
" <td>52983559963478</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 19:59:59.977137+00:00</th>\n",
" <td>N</td>\n",
" <td>37.740</td>\n",
" <td>7300.0</td>\n",
" <td>52983559963696</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 19:59:59.978626+00:00</th>\n",
" <td>V</td>\n",
" <td>37.750</td>\n",
" <td>16.0</td>\n",
" <td>56480706886228</td>\n",
" <td>[ , I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-04-22 19:59:59.987614+00:00</th>\n",
" <td>N</td>\n",
" <td>37.745</td>\n",
" <td>30.0</td>\n",
" <td>52983559963958</td>\n",
" <td>[ , I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>143751 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" exchange price size id \\\n",
"timestamp \n",
"2024-04-22 13:30:00.300501+00:00 D 37.005 1.0 71675241117014 \n",
"2024-04-22 13:30:00.305439+00:00 D 37.005 1.0 71675241117496 \n",
"2024-04-22 13:30:00.314520+00:00 D 37.005 1.0 71675241118034 \n",
"2024-04-22 13:30:00.335201+00:00 D 37.005 1.0 71675241121369 \n",
"2024-04-22 13:30:00.346219+00:00 D 37.005 1.0 71675241122389 \n",
"... ... ... ... ... \n",
"2024-04-22 19:59:59.902614+00:00 V 37.750 1100.0 56480705310575 \n",
"2024-04-22 19:59:59.977134+00:00 N 37.745 300.0 52983559963478 \n",
"2024-04-22 19:59:59.977137+00:00 N 37.740 7300.0 52983559963696 \n",
"2024-04-22 19:59:59.978626+00:00 V 37.750 16.0 56480706886228 \n",
"2024-04-22 19:59:59.987614+00:00 N 37.745 30.0 52983559963958 \n",
"\n",
" conditions tape \n",
"timestamp \n",
"2024-04-22 13:30:00.300501+00:00 [ , I] A \n",
"2024-04-22 13:30:00.305439+00:00 [ , I] A \n",
"2024-04-22 13:30:00.314520+00:00 [ , I] A \n",
"2024-04-22 13:30:00.335201+00:00 [ , I] A \n",
"2024-04-22 13:30:00.346219+00:00 [ , I] A \n",
"... ... ... \n",
"2024-04-22 19:59:59.902614+00:00 [ ] A \n",
"2024-04-22 19:59:59.977134+00:00 [ ] A \n",
"2024-04-22 19:59:59.977137+00:00 [ ] A \n",
"2024-04-22 19:59:59.978626+00:00 [ , I] A \n",
"2024-04-22 19:59:59.987614+00:00 [ , I] A \n",
"\n",
"[143751 rows x 6 columns]"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"excludes = cfh.config_handler.get_val('AGG_EXCLUDED_TRADES')\n",
"print(excludes)\n",
"#excludes = [\"F\", \"I\"]\n",
"# FILTER EXCLUDED TRADES\n",
"# Filter rows to exclude those where 'conditions' contains 'F' or 'I'\n",
"# This simplifies the logic by directly using ~ (bitwise not operator) with np.isin\n",
"df = df[~df['conditions'].apply(lambda x: np.isin(x, excludes).any())]"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/8p/dwqnp65s0s77jdbm4_6z4vp80000gn/T/ipykernel_52602/3341929382.py:2: DeprecationWarning: parsing timezone aware datetimes is deprecated; this will raise an error in the future\n",
" structured_array = np.array(list(zip(df.index, df['price'], df['size'])),\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[('2024-04-22T13:30:00.300501000', 37.005, 1.0e+00)\n",
" ('2024-04-22T13:30:00.305439000', 37.005, 1.0e+00)\n",
" ('2024-04-22T13:30:00.314520000', 37.005, 1.0e+00) ...\n",
" ('2024-04-22T19:59:59.977137000', 37.74 , 7.3e+03)\n",
" ('2024-04-22T19:59:59.978626000', 37.75 , 1.6e+01)\n",
" ('2024-04-22T19:59:59.987614000', 37.745, 3.0e+01)]\n"
]
},
{
"data": {
"text/plain": [
"array([('2024-04-22T13:30:00.300501000', 37.005, 1.0e+00),\n",
" ('2024-04-22T13:30:00.305439000', 37.005, 1.0e+00),\n",
" ('2024-04-22T13:30:00.314520000', 37.005, 1.0e+00), ...,\n",
" ('2024-04-22T19:59:59.977137000', 37.74 , 7.3e+03),\n",
" ('2024-04-22T19:59:59.978626000', 37.75 , 1.6e+01),\n",
" ('2024-04-22T19:59:59.987614000', 37.745, 3.0e+01)],\n",
" dtype=[('timestamp', '<M8[ns]'), ('price', '<f8'), ('size', '<f8')])"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Creating a structured array with the timestamp as the first element\n",
"structured_array = np.array(list(zip(df.index, df['price'], df['size'])),\n",
" dtype=[('timestamp', 'datetime64[ns]'), ('price', 'float'), ('size', 'float')])\n",
"\n",
"print(structured_array)\n",
"structured_array\n",
"\n",
"# ticks = df[['index', 'price', 'size']].to_numpy()\n",
"# # ticks[:, 0] = pd.to_datetime(ticks[:, 0]).astype('int64') // 1_000_000_000 # \n",
"# ticks"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"resolution_seconds = 1 # 1 second resolution\n",
"ohlcv_data = ohlcv_bars(structured_array, resolution_seconds)\n",
"\n",
"# Converting the result back to DataFrame for better usability\n",
"ohlcv_df = pd.DataFrame(ohlcv_data, columns=['Open', 'High', 'Low', 'Close', 'Volume', 'Time'])\n",
"ohlcv_df['Time'] = pd.to_datetime(ohlcv_df['Time'], unit='s') # Convert timestamps back to datetime\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -1,6 +1,11 @@
from enum import Enum
from alpaca.trading.enums import OrderSide, OrderStatus, OrderType
class BarType(str, Enum):
TIME = "time"
VOLUME = "volume"
DOLLAR = "dollar"
class Env(str, Enum):
PROD = "prod"
TEST = "test"

File diff suppressed because it is too large Load Diff

View File

@ -2,19 +2,129 @@ import pandas as pd
import numpy as np
from numba import jit
from alpaca.data.historical import StockHistoricalDataClient
from sqlalchemy import column
from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR
from alpaca.data.requests import StockTradesRequest
import time
from datetime import datetime
import time as time_module
from v2realbot.utils.utils import parse_alpaca_timestamp, ltp, zoneNY, send_to_telegram, fetch_calendar_data
import pyarrow
from traceback import format_exc
from datetime import timedelta, datetime, time
from concurrent.futures import ThreadPoolExecutor
import os
import gzip
import pickle
import random
from alpaca.data.models import BarSet, QuoteSet, TradeSet
import v2realbot.utils.config_handler as cfh
from v2realbot.enums.enums import BarType
""""
WIP - for later use
Module used for vectorized aggregation of trades.
Includes fetch (remote/cached) methods and numba aggregator function for TIME BASED, VOLUME BASED and DOLLAR BARS
"""""
def fetch_stock_trades(symbol, start, end, max_retries=5, backoff_factor=1):
def aggregate_trades(symbol: str, trades_df: pd.DataFrame, resolution: int, type: BarType = BarType.TIME):
""""
Accepts dataframe with trades keyed by symbol. Preparess dataframe to
numpy and call nNumba optimized aggregator for given bar type. (time/volume/dollar)
"""""
trades_df = trades_df.loc[symbol]
trades_df= trades_df.reset_index()
ticks = trades_df[['timestamp', 'price', 'size']].to_numpy()
# Extract the timestamps column (assuming it's the first column)
timestamps = ticks[:, 0]
# Convert the timestamps to Unix timestamps in seconds with microsecond precision
unix_timestamps_s = np.array([ts.timestamp() for ts in timestamps], dtype='float64')
# Replace the original timestamps in the NumPy array with the converted Unix timestamps
ticks[:, 0] = unix_timestamps_s
ticks = ticks.astype(np.float64)
#based on type, specific aggregator function is called
match type:
case BarType.TIME:
ohlcv_bars = generate_time_bars_nb(ticks, resolution)
case BarType.VOLUME:
ohlcv_bars = generate_volume_bars_nb(ticks, resolution)
case BarType.DOLLAR:
ohlcv_bars = generate_dollar_bars_nb(ticks, resolution)
case _:
raise ValueError("Invalid bar type. Supported types are 'time', 'volume' and 'dollar'.")
# Convert the resulting array back to a DataFrame
columns = ['time', 'open', 'high', 'low', 'close', 'volume', 'trades']
if type == BarType.DOLLAR:
columns.append('amount')
ohlcv_df = pd.DataFrame(ohlcv_bars, columns=columns)
ohlcv_df['time'] = pd.to_datetime(ohlcv_df['time'], unit='s')
ohlcv_df.set_index('time', inplace=True)
ohlcv_df.index = ohlcv_df.index.tz_localize('UTC').tz_convert(zoneNY)
return ohlcv_df
def convert_dict_to_multiindex_df(tradesResponse):
""""
Converts dictionary from cache or from remote (raw input) to multiindex dataframe.
"""""
# Create a DataFrame for each key and add the key as part of the MultiIndex
dfs = []
for key, values in tradesResponse.items():
df = pd.DataFrame(values)
# Rename columns
# Select and order columns explicitly
#print(df)
df = df[['t', 'x', 'p', 's', 'i', 'c','z']]
df.rename(columns={'t': 'timestamp', 'c': 'conditions', 'p': 'price', 's': 'size', 'x': 'exchange', 'z':'tape', 'i':'id'}, inplace=True)
df['symbol'] = key # Add ticker as a column
df['timestamp'] = pd.to_datetime(df['timestamp']) # Convert 't' from string to datetime before setting it as an index
df.set_index(['symbol', 'timestamp'], inplace=True) # Set the multi-level index using both 'ticker' and 't'
df = df.tz_convert(zoneNY, level='timestamp')
dfs.append(df)
# Concatenate all DataFrames into a single DataFrame with MultiIndex
final_df = pd.concat(dfs)
return final_df
def dict_to_df(tradesResponse, start, end, exclude_conditions = None, minsize = None):
""""
Transforms dict to Tradeset, then df and to zone aware
Also filters to start and end if necessary (ex. 9:30 to 15:40 is required only)
NOTE: prepodkladame, ze tradesResponse je dict from Raw data (cached/remote)
"""""
df = convert_dict_to_multiindex_df(tradesResponse)
#REQUIRED FILTERING
#pokud je zacatek pozdeji nebo konec driv tak orizneme
if (start.time() > time(9, 30) or end.time() < time(16, 0)):
print(f"filtrujeme {start.time()} {end.time()}")
# Define the time range
# start_time = pd.Timestamp(start.time(), tz=zoneNY).time()
# end_time = pd.Timestamp(end.time(), tz=zoneNY).time()
# Create a mask to filter rows within the specified time range
mask = (df.index.get_level_values('timestamp') >= start) & \
(df.index.get_level_values('timestamp') <= end)
# Apply the mask to the DataFrame
df = df[mask]
if exclude_conditions is not None:
print(f"excluding conditions {exclude_conditions}")
# Create a mask to exclude rows with any of the specified conditions
mask = df['conditions'].apply(lambda x: any(cond in exclude_conditions for cond in x))
# Filter out the rows with specified conditions
df = df[~mask]
if minsize is not None:
print(f"minsize {minsize}")
#exclude conditions
df = df[df['size'] >= minsize]
return df
#fetches daily stock tradess - currently only main session is supported
def fetch_daily_stock_trades_old(symbol, start, end, exclude_conditions = None, minsize = None, force_remote = False, max_retries=5, backoff_factor=1):
"""
Attempts to fetch stock trades with exponential backoff. Raises an exception if all retries fail.
@ -25,98 +135,401 @@ def fetch_stock_trades(symbol, start, end, max_retries=5, backoff_factor=1):
:param backoff_factor: Factor to determine the next sleep time.
:return: TradesResponse object.
:raises: ConnectionError if all retries fail.
We use tradecache only for main sessison request = 9:30 to 16:00
"""
client = StockHistoricalDataClient(ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY)
use_daily_tradecache = False
if (start.time() >= time(9, 30) and end.time() <= time(16, 0)):
use_daily_tradecache = True
filename_start = zoneNY.localize(datetime.combine(start.date(), time(9, 30)))
filename_end= zoneNY.localize(datetime.combine(end.date(), time(16, 0)))
daily_file = "TS" + str(symbol) + '-' + str(int(filename_start.timestamp())) + '-' + str(int(filename_end.timestamp())) + '.cache.gz'
file_path = DATA_DIR + "/tradecache/"+daily_file
if use_daily_tradecache and not force_remote and os.path.exists(file_path):
print("Searching cache: " + daily_file)
with gzip.open (file_path, 'rb') as fp:
tradesResponse = pickle.load(fp)
print("FOUND in CACHE", daily_file)
#response je vzdy ulozena jako raw(dict), davame zpet do TradeSetu, ktery umi i df
return dict_to_df(tradesResponse, start, end, exclude_conditions, minsize)
#daily file doesnt exist
else:
print("NOT FOUND. Fetching from remote")
client = StockHistoricalDataClient(ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, raw_data=False)
stockTradeRequest = StockTradesRequest(symbol_or_symbols=symbol, start=start, end=end)
last_exception = None
for attempt in range(max_retries):
try:
tradesResponse = client.get_stock_trades(stockTradeRequest)
is_empty = not tradesResponse[symbol]
print(f"Remote fetched: {is_empty=}", start, end)
#pokud jde o dnešní den a nebyl konec trhu tak cache neukládáme, pripadne pri iex datapointu necachujeme
if use_daily_tradecache and not is_empty:
if (start < datetime.now().astimezone(zoneNY) < end):
print("not saving trade cache, market still open today")
else:
with gzip.open(file_path, 'wb') as fp:
pickle.dump(tradesResponse, fp)
print("Saving to Trade CACHE", file_path)
return pd.DataFrame() if is_empty else dict_to_df(tradesResponse, start, end)
except Exception as e:
print(f"Attempt {attempt + 1} failed: {e}")
last_exception = e
time_module.sleep(backoff_factor * (2 ** attempt))
print("All attempts to fetch data failed.")
raise ConnectionError(f"Failed to fetch stock trades after {max_retries} retries. Last exception: {str(last_exception)} and {format_exc()}")
def fetch_daily_stock_trades(symbol, start, end, exclude_conditions=None, minsize=None, force_remote=False, max_retries=5, backoff_factor=1):
#doc for this function
"""
Attempts to fetch stock trades either from cache or remote. When remote, it uses retry mechanism with exponential backoff.
Also it stores the data to cache if it is not already there.
by using force_remote - forcess using remote data always and thus refreshing cache for these dates
Attributes:
:param symbol: The stock symbol to fetch trades for.
:param start: The start time for the trade data.
:param end: The end time for the trade data.
:exclude_conditions: list of string conditions to exclude from the data
:minsize minimum size of trade to be included in the data
:force_remote will always use remote data and refresh cache
:param max_retries: Maximum number of retries.
:param backoff_factor: Factor to determine the next sleep time.
:return: TradesResponse object.
:raises: ConnectionError if all retries fail.
We use tradecache only for main sessison requests = 9:30 to 16:00
Do budoucna ukládat celý den BAC-20240203.cache.gz a z toho si pak filtrovat bud main sesssionu a extended
Ale zatim je uloženo jen main session v BAC-timestampopenu-timestampclose.cache.gz
"""
# Determine if the requested times fall within the main session
in_main_session = (time(9, 30) <= start.time() < time(16, 0)) and (time(9, 30) <= end.time() <= time(16, 0))
file_path = ''
if in_main_session:
filename_start = zoneNY.localize(datetime.combine(start.date(), time(9, 30)))
filename_end = zoneNY.localize(datetime.combine(end.date(), time(16, 0)))
daily_file = f"{symbol}-{int(filename_start.timestamp())}-{int(filename_end.timestamp())}.cache.gz"
file_path = f"{DATA_DIR}/tradecache/{daily_file}"
if not force_remote and os.path.exists(file_path):
print("Searching cache: " + daily_file)
with gzip.open(file_path, 'rb') as fp:
tradesResponse = pickle.load(fp)
print("FOUND in CACHE", daily_file)
return dict_to_df(tradesResponse, start, end, exclude_conditions, minsize)
print("NOT FOUND. Fetching from remote")
client = StockHistoricalDataClient(ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, raw_data=True)
stockTradeRequest = StockTradesRequest(symbol_or_symbols=symbol, start=start, end=end)
last_exception = None
for attempt in range(max_retries):
try:
tradesResponse = client.get_stock_trades(stockTradeRequest)
print("Remote Fetch DAY DATA Complete", start, end)
return tradesResponse
is_empty = not tradesResponse[symbol]
print(f"Remote fetched: {is_empty=}", start, end)
if in_main_session and not is_empty:
current_time = datetime.now().astimezone(zoneNY)
if not (start < current_time < end):
with gzip.open(file_path, 'wb') as fp:
pickle.dump(tradesResponse, fp)
print("Saving to Trade CACHE", file_path)
else: # Don't save the cache if the market is still open
print("Not saving trade cache, market still open today")
return pd.DataFrame() if is_empty else dict_to_df(tradesResponse, start, end)
except Exception as e:
print(f"Attempt {attempt + 1} failed: {e}")
last_exception = e
time.sleep(backoff_factor * (2 ** attempt))
time_module.sleep(backoff_factor * (2 ** attempt) + random.uniform(0, 1)) # Adding random jitter
print("All attempts to fetch data failed.")
raise ConnectionError(f"Failed to fetch stock trades after {max_retries} retries. Last exception: {str(last_exception)} and {format_exc()}")
@jit(nopython=True)
def ohlcv_bars(ticks, start_time, end_time, resolution):
def fetch_trades_parallel(symbol, start_date, end_date, exclude_conditions = cfh.config_handler.get_val('AGG_EXCLUDED_TRADES'), minsize = 100, force_remote = False):
"""
Generate OHLCV bars from tick data, skipping intervals without trading activity.
Parameters:
- ticks: numpy array with columns [timestamp, price, size]
- start_time: the start timestamp for bars (Unix timestamp)
- end_time: the end timestamp for bars (Unix timestamp)
- resolution: time resolution in seconds
Returns:
- OHLCV bars as a numpy array
"""
num_bars = (end_time - start_time) // resolution + 1
bar_list = []
Fetches trades for each day between start_date and end_date during market hours (9:30-16:00) in parallel and concatenates them into a single DataFrame.
for i in range(num_bars):
bar_start_time = start_time + i * resolution
bar_end_time = bar_start_time + resolution
bar_ticks = ticks[(ticks[:, 0] >= bar_start_time) & (ticks[:, 0] < bar_end_time)]
:param symbol: Stock symbol.
:param start_date: Start date as datetime.
:param end_date: End date as datetime.
:return: DataFrame containing all trades from start_date to end_date.
"""
futures = []
results = []
market_open_days = fetch_calendar_data(start_date, end_date)
day_count = len(market_open_days)
print("Contains", day_count, " market days")
max_workers = min(10, max(5, day_count // 2)) # Heuristic: half the days to process, but at least 1 and no more than 10
with ThreadPoolExecutor(max_workers=max_workers) as executor:
#for single_date in (start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)):
for market_day in market_open_days:
#start = datetime.combine(single_date, time(9, 30)) # Market opens at 9:30 AM
#end = datetime.combine(single_date, time(16, 0)) # Market closes at 4:00 PM
interval_from = zoneNY.localize(market_day.open)
interval_to = zoneNY.localize(market_day.close)
#pripadne orizneme pokud je pozadovane pozdejsi zacatek a drivejsi konek
start = start_date if interval_from < start_date else interval_from
#start = max(start_date, interval_from)
end = end_date if interval_to > end_date else interval_to
#end = min(end_date, interval_to)
future = executor.submit(fetch_daily_stock_trades, symbol, start, end, exclude_conditions, minsize, force_remote)
futures.append(future)
if bar_ticks.shape[0] == 0:
continue # Skip this bar as there are no ticks
for future in futures:
try:
result = future.result()
results.append(result)
except Exception as e:
print(f"Error fetching data for a day: {e}")
# Calculate OHLCV values
open_price = bar_ticks[0, 1] # open
high_price = np.max(bar_ticks[:, 1]) # high
low_price = np.min(bar_ticks[:, 1]) # low
close_price = bar_ticks[-1, 1] # close
volume = np.sum(bar_ticks[:, 2]) # volume
bar_time = bar_start_time # timestamp for the bar
return pd.concat(results, ignore_index=False)
bar_list.append([open_price, high_price, low_price, close_price, volume, bar_time])
@jit(nopython=True)
def generate_dollar_bars_nb(ticks, amount_per_bar):
""""
Generates Dollar based bars from ticks.
# Convert list to numpy array
if bar_list:
ohlcv = np.array(bar_list)
else:
ohlcv = np.empty((0, 6)) # return an empty array if no bars were created
There is also simple prevention of aggregation from different days
as described here https://chatgpt.com/c/17804fc1-a7bc-495d-8686-b8392f3640a2
Downside: split days by UTC (which is ok for main session, but when extended hours it should be reworked by preprocessing new column identifying session)
When trade is split into multiple bars it is counted as trade in each of the bars.
Other option: trade count can be proportionally distributed by weight (0.2 to 1st bar, 0.8 to 2nd bar) - but this is not implemented yet
https://chatgpt.com/c/ff4802d9-22a2-4b72-8ab7-97a91e7a515f
"""""
ohlcv_bars = []
remaining_amount = amount_per_bar
return ohlcv
# Initialize bar values based on the first tick to avoid uninitialized values
open_price = ticks[0, 1]
high_price = ticks[0, 1]
low_price = ticks[0, 1]
close_price = ticks[0, 1]
volume = 0
trades_count = 0
current_day = np.floor(ticks[0, 0] / 86400) # Calculate the initial day from the first tick timestamp
bar_time = ticks[0, 0] # Initialize bar time with the time of the first tick
for tick in ticks:
tick_time = tick[0]
price = tick[1]
tick_volume = tick[2]
tick_amount = price * tick_volume
tick_day = np.floor(tick_time / 86400) # Calculate the day of the current tick
# Check if the new tick is from a different day, then close the current bar
if tick_day != current_day:
if trades_count > 0:
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count, amount_per_bar])
# Reset for the new day using the current tick data
open_price = price
high_price = price
low_price = price
close_price = price
volume = 0
trades_count = 0
remaining_amount = amount_per_bar
current_day = tick_day
bar_time = tick_time
# Start new bar if needed because of the dollar value
while tick_amount > 0:
if tick_amount < remaining_amount:
# Add the entire tick to the current bar
high_price = max(high_price, price)
low_price = min(low_price, price)
close_price = price
volume += tick_volume
remaining_amount -= tick_amount
trades_count += 1
tick_amount = 0
else:
# Calculate the amount of volume that fits within the remaining dollar amount
volume_to_add = remaining_amount / price
volume += volume_to_add # Update the volume here before appending and resetting
# Append the partially filled bar to the list
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count + 1, amount_per_bar])
# Fill the current bar and continue with a new bar
tick_volume -= volume_to_add
tick_amount -= remaining_amount
# Reset bar values for the new bar using the current tick data
open_price = price
high_price = price
low_price = price
close_price = price
volume = 0 # Reset volume for the new bar
trades_count = 0
remaining_amount = amount_per_bar
# Increment bar time if splitting a trade
if tick_volume > 0: #pokud v tradu je jeste zbytek nastavujeme cas o nanosekundu vetsi
bar_time = tick_time + 1e-6
else:
bar_time = tick_time #jinak nastavujeme cas ticku
#bar_time = tick_time
# Add the last bar if it contains any trades
if trades_count > 0:
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count, amount_per_bar])
return np.array(ohlcv_bars)
@jit(nopython=True)
def generate_volume_bars_nb(ticks, volume_per_bar):
""""
Generates Volume based bars from ticks.
NOTE: UTC day split here (doesnt aggregate trades from different days)
but realized from UTC (ok for main session) - but needs rework for extension by preprocessing ticks_df and introduction sesssion column
When trade is split into multiple bars it is counted as trade in each of the bars.
Other option: trade count can be proportionally distributed by weight (0.2 to 1st bar, 0.8 to 2nd bar) - but this is not implemented yet
https://chatgpt.com/c/ff4802d9-22a2-4b72-8ab7-97a91e7a515f
"""""
ohlcv_bars = []
remaining_volume = volume_per_bar
# Initialize bar values based on the first tick to avoid uninitialized values
open_price = ticks[0, 1]
high_price = ticks[0, 1]
low_price = ticks[0, 1]
close_price = ticks[0, 1]
volume = 0
trades_count = 0
current_day = np.floor(ticks[0, 0] / 86400) # Calculate the initial day from the first tick timestamp
bar_time = ticks[0, 0] # Initialize bar time with the time of the first tick
for tick in ticks:
tick_time = tick[0]
price = tick[1]
tick_volume = tick[2]
tick_day = np.floor(tick_time / 86400) # Calculate the day of the current tick
# Check if the new tick is from a different day, then close the current bar
if tick_day != current_day:
if trades_count > 0:
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count])
# Reset for the new day using the current tick data
open_price = price
high_price = price
low_price = price
close_price = price
volume = 0
trades_count = 0
remaining_volume = volume_per_bar
current_day = tick_day
bar_time = tick_time # Update bar time to the current tick time
# Start new bar if needed because of the volume
while tick_volume > 0:
if tick_volume < remaining_volume:
# Add the entire tick to the current bar
high_price = max(high_price, price)
low_price = min(low_price, price)
close_price = price
volume += tick_volume
remaining_volume -= tick_volume
trades_count += 1
tick_volume = 0
else:
# Fill the current bar and continue with a new bar
volume_to_add = remaining_volume
volume += volume_to_add
tick_volume -= volume_to_add
trades_count += 1
# Append the completed bar to the list
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count])
# Reset bar values for the new bar using the current tick data
open_price = price
high_price = price
low_price = price
close_price = price
volume = 0
trades_count = 0
remaining_volume = volume_per_bar
# Increment bar time if splitting a trade
if tick_volume > 0: #pokud v tradu je jeste zbytek nastavujeme cas o nanosekundu vetsi
bar_time = tick_time + 1e-6
else:
bar_time = tick_time #jinak nastavujeme cas ticku
# Add the last bar if it contains any trades
if trades_count > 0:
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count])
return np.array(ohlcv_bars)
@jit(nopython=True)
def generate_time_bars_nb(ticks, resolution):
# Initialize the start and end time
start_time = np.floor(ticks[0, 0] / resolution) * resolution
end_time = np.floor(ticks[-1, 0] / resolution) * resolution
# # Calculate number of bars
# num_bars = int((end_time - start_time) // resolution + 1)
# Using a list to append data only when trades exist
ohlcv_bars = []
# Variables to track the current bar
current_bar_index = -1
open_price = 0
high_price = -np.inf
low_price = np.inf
close_price = 0
volume = 0
trades_count = 0
for tick in ticks:
tick_time = np.floor(tick[0] / resolution) * resolution
price = tick[1]
tick_volume = tick[2]
# Check if the tick belongs to a new bar
if tick_time != start_time + current_bar_index * resolution:
if current_bar_index >= 0 and trades_count > 0: # Save the previous bar if trades happened
ohlcv_bars.append([start_time + current_bar_index * resolution, open_price, high_price, low_price, close_price, volume, trades_count])
# Reset bar values
current_bar_index = int((tick_time - start_time) / resolution)
open_price = price
high_price = price
low_price = price
volume = 0
trades_count = 0
# Update the OHLCV values for the current bar
high_price = max(high_price, price)
low_price = min(low_price, price)
close_price = price
volume += tick_volume
trades_count += 1
# Save the last processed bar
if trades_count > 0:
ohlcv_bars.append([start_time + current_bar_index * resolution, open_price, high_price, low_price, close_price, volume, trades_count])
return np.array(ohlcv_bars)
# Example usage
if __name__ == '__main__':
# symbol = ["BAC"]
# #datetime in zoneNY
# day_start = datetime(2024, 4, 22, 9, 30, 0)
# day_stop = datetime(2024, 4, 22, 16, 00, 0)
# day_start = zoneNY.localize(day_start)
# day_stop = zoneNY.localize(day_stop)
# tradesResponse = fetch_stock_trades(symbol, day_start, day_stop)
# df = tradesResponse.df
# df.to_parquet('trades_bac.parquet', engine='pyarrow')
df=pd.read_parquet('trades_bac.parquet',engine='pyarrow')
print(df)
#df = pd.read_csv('tick_data.csv') # DF with tick data
# Assuming 'df' is your DataFrame with columns 'time', 'price', 'size', 'condition'
exclude_conditions = ['ConditionA', 'ConditionB'] # Conditions to exclude
df_filtered = df[~df['condition'].isin(exclude_conditions)]
# Define your start and end times based on your trading session, ensure these are Unix timestamps
start_time = pd.to_datetime('2023-01-01 09:30:00').timestamp()
end_time = pd.to_datetime('2023-01-01 16:00:00').timestamp()
ticks = df[['time', 'price', 'size']].to_numpy()
ticks[:, 0] = pd.to_datetime(ticks[:, 0]).astype('int64') // 1_000_000_000 # Convert to Unix timestamp
resolution_seconds = 1 # 1 second resolution
ohlcv_data = ohlcv_bars(ticks, start_time, end_time, resolution_seconds)
# Converting the result back to DataFrame for better usability
ohlcv_df = pd.DataFrame(ohlcv_data, columns=['Open', 'High', 'Low', 'Close', 'Volume', 'Time'])
ohlcv_df['Time'] = pd.to_datetime(ohlcv_df['Time'], unit='s') # Convert timestamps back to datetime
pass
#example in agg_vect.ipynb

View File

@ -1150,7 +1150,7 @@
<script src="/static/js/config.js?v=1.04"></script>
<!-- tady zacina polska docasna lokalizace -->
<!-- <script type="text/javascript" src="https://unpkg.com/lightweight-charts/dist/lightweight-charts.standalone.production.js"></script> -->
<script type="text/javascript" src="/static/js/libs/lightweightcharts/lightweight-charts.standalone.production410.js"></script>
<script type="text/javascript" src="/static/js/libs/lightweightcharts/lightweight-charts.standalone.production413.js"></script>
<script src="/static/js/dynamicbuttons.js?v=1.05"></script>

File diff suppressed because one or more lines are too long

View File

@ -371,9 +371,10 @@ function initialize_chart() {
}
chart = LightweightCharts.createChart(document.getElementById('chart'), chartOptions);
chart.applyOptions({ timeScale: { visible: true, timeVisible: true, secondsVisible: true }, crosshair: {
chart.applyOptions({ timeScale: { visible: true, timeVisible: true, secondsVisible: true, minBarSpacing: 0.003}, crosshair: {
mode: LightweightCharts.CrosshairMode.Normal, labelVisible: true
}})
console.log("chart intiialized")
}
//mozna atributy last value visible

View File

@ -9,7 +9,7 @@ from typing import List
from enum import Enum
import numpy as np
import v2realbot.controller.services as cs
from rich import print
from rich import print as richprint
from v2realbot.common.model import AnalyzerInputs
from v2realbot.common.PrescribedTradeModel import TradeDirection, TradeStatus, Trade, TradeStoplossType
from v2realbot.utils.utils import isrising, isfalling,zoneNY, price2dec, safe_get#, print
@ -94,7 +94,11 @@ def convert_to_dataframe(ohlcv):
return df
def load_batch(runner_ids: List = None, batch_id: str = None, space_resolution_evenly = False, main_session_only = True, merge_ind2bars = True, bars_columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'Vwap'], indicators_columns = []) -> Tuple[int, dict]:
def print(v, *args, **kwargs):
if v:
richprint(*args, **kwargs)
def load_batch(runner_ids: List = None, batch_id: str = None, space_resolution_evenly = False, main_session_only = True, merge_ind2bars = True, bars_columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'Vwap'], indicators_columns = [], verbose = False) -> Tuple[int, dict]:
"""Load batches (all runners from single batch) into pandas dataframes
Args:
@ -136,7 +140,7 @@ def load_batch(runner_ids: List = None, batch_id: str = None, space_resolution_e
if resolution is None:
resolution = sada["bars"]["resolution"][0]
print(f"Resolution : {resolution}")
print(verbose, f"Resolution : {resolution}")
#add daily bars limited to required columns, we keep updated as its mapping column to indicators
bars = convert_to_dataframe(sada["bars"])[bars_columns + ["updated"]]
@ -169,11 +173,11 @@ def load_batch(runner_ids: List = None, batch_id: str = None, space_resolution_e
num_duplicates = concat_df.index.duplicated().sum()
if num_duplicates > 0:
print(f"NOTE: DUPLICATES {num_duplicates}/{len(concat_df)} in {key}. REMOVING.")
print(verbose, f"NOTE: DUPLICATES {num_duplicates}/{len(concat_df)} in {key}. REMOVING.")
concat_df = concat_df[~concat_df.index.duplicated()]
num_duplicates = concat_df.index.duplicated().sum()
print(f"Now there are {num_duplicates}/{len(concat_df)}")
print(verbose, f"Now there are {num_duplicates}/{len(concat_df)}")
if space_resolution_evenly and key != "cbar_indicators":
# Apply rounding to the datetime index according to resolution (in seconds)

View File

@ -5,6 +5,7 @@ from alpaca.data.enums import DataFeed
import v2realbot.utils.config_defaults as config_defaults
from v2realbot.enums.enums import FillCondition
from rich import print
# from v2realbot.utils.utils import print
def aggregate_configurations(module):
return {key: getattr(module, key) for key in dir(module) if key.isupper()}
@ -48,8 +49,8 @@ class ConfigHandler:
self.active_config = self.default_config.copy()
self.active_config.update(override_configuration)
self.active_profile = profile_name
print(f"Profile {profile_name} loaded successfully.")
print("Current values:", self.active_config)
#print(f"Profile {profile_name} loaded successfully.")
#print("Current values:", self.active_config)
else:
print(f"Profile {profile_name} does not exist in config item: {config_directive}")
except Exception as e:
@ -102,8 +103,8 @@ class ConfigHandler:
# Global configuratio - it is imported by modules that need it. In the future can be changed to Dependency Ingestion (each service will have the config instance as input parameter)
config_handler = ConfigHandler()
print(f"{config_handler.active_profile=}")
print("config handler initialized")
#print(f"{config_handler.active_profile=}")
#print("config handler initialized")
#this is how to get value
#config_handler.get_val('BT_FILL_PRICE_MARKET_ORDER_PREMIUM')