52 KiB
52 KiB
In [1]:
import pandas as pd import numpy as np from numba import jit from alpaca.data.historical import StockHistoricalDataClient from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR from alpaca.data.requests import StockTradesRequest from v2realbot.enums.enums import BarType import time from datetime import datetime from v2realbot.utils.utils import parse_alpaca_timestamp, ltp, zoneNY, send_to_telegram, fetch_calendar_data import pyarrow from v2realbot.loader.aggregator_vectorized import fetch_daily_stock_trades, fetch_trades_parallel, generate_time_bars_nb, aggregate_trades import vectorbtpro as vbt vbt.settings.set_theme("dark") vbt.settings['plotting']['layout']['width'] = 1280 vbt.settings.plotting.auto_rangebreaks = True # Set the option to display with pagination pd.set_option('display.notebook_repr_html', True) pd.set_option('display.max_rows', 10) # Number of rows per page
Activating profile profile1
In [2]:
symbol = "SPY" #datetime in zoneNY day_start = datetime(2024, 5, 15, 9, 30, 0) day_stop = datetime(2024, 5, 16, 16, 00, 0) day_start = zoneNY.localize(day_start) day_stop = zoneNY.localize(day_stop) #neslo by zrychlit, kdyz se zobrazuje pomalu Searching cache - nejaky bottle neck? df = fetch_trades_parallel(symbol, day_start, day_stop, minsize=50) #exclude_conditions=['C','O','4','B','7','V','P','W','U','Z','F']) ohlcv_df = aggregate_trades(symbol=symbol, trades_df=df, resolution=1, type=BarType.TIME) #df.info() ohlcv_df
Calendar data fetch successful 2024-05-15 2024-05-16
Contains 2 market days Searching cache: SPY-1715779800-1715803200.cache.gz Searching cache: SPY-1715866200-1715889600.cache.gz FOUND in CACHE SPY-1715866200-1715889600.cache.gz FOUND in CACHE SPY-1715779800-1715803200.cache.gz excluding conditions ['C', 'O', '4', 'B', '7', 'V', 'P', 'W', 'U', 'Z', 'F'] minsize 50 excluding conditions ['C', 'O', '4', 'B', '7', 'V', 'P', 'W', 'U', 'Z', 'F'] minsize 50
Out[2]:
<style scoped="">
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| open | high | low | close | volume | trades | |
|---|---|---|---|---|---|---|
| time | ||||||
| 2024-05-15 09:30:00-04:00 | 525.8300 | 525.96 | 525.830 | 525.890 | 163507.0 | 145.0 |
| 2024-05-15 09:30:01-04:00 | 525.8900 | 525.91 | 525.790 | 525.810 | 14254.0 | 93.0 |
| 2024-05-15 09:30:02-04:00 | 525.8200 | 525.92 | 525.800 | 525.860 | 2937.0 | 22.0 |
| 2024-05-15 09:30:03-04:00 | 525.8800 | 525.89 | 525.850 | 525.860 | 5520.0 | 34.0 |
| 2024-05-15 09:30:04-04:00 | 525.8450 | 525.87 | 525.720 | 525.740 | 73191.0 | 289.0 |
| ... | ... | ... | ... | ... | ... | ... |
| 2024-05-16 15:59:55-04:00 | 528.6300 | 528.65 | 528.605 | 528.640 | 37439.0 | 139.0 |
| 2024-05-16 15:59:56-04:00 | 528.6400 | 528.73 | 528.640 | 528.696 | 21836.0 | 72.0 |
| 2024-05-16 15:59:57-04:00 | 528.7000 | 528.74 | 528.680 | 528.695 | 11066.0 | 60.0 |
| 2024-05-16 15:59:58-04:00 | 528.7100 | 528.83 | 528.710 | 528.830 | 28015.0 | 65.0 |
| 2024-05-16 15:59:59-04:00 | 528.8298 | 528.83 | 528.560 | 528.660 | 25043.0 | 84.0 |
38150 rows × 6 columns
In [5]:
df
Out[5]:
<style scoped="">
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| exchange | price | size | id | conditions | tape | ||
|---|---|---|---|---|---|---|---|
| symbol | timestamp | ||||||
| BAC | 2024-03-01 09:30:01.069206528-05:00 | N | 34.520 | 456915 | 52983525028686 | [ , Q] | A |
| 2024-03-01 09:30:01.071717376-05:00 | P | 34.520 | 50 | 52983525359944 | [ , I] | A | |
| 2024-03-01 09:30:01.071723776-05:00 | P | 34.520 | 50 | 52983525359945 | [ , I] | A | |
| 2024-03-01 09:30:01.072288768-05:00 | P | 34.510 | 100 | 52983525359946 | [ ] | A | |
| 2024-03-01 09:30:01.072291840-05:00 | P | 34.510 | 300 | 52983525359947 | [ ] | A | |
| ... | ... | ... | ... | ... | ... | ... | |
| 2024-03-01 15:49:58.064368128-05:00 | T | 34.395 | 100 | 62880154539876 | [ ] | A | |
| 2024-03-01 15:49:58.077368064-05:00 | T | 34.395 | 100 | 62880154540085 | [ ] | A | |
| 2024-03-01 15:49:58.088362240-05:00 | T | 34.395 | 100 | 62880154540101 | [ ] | A | |
| 2024-03-01 15:49:58.590776576-05:00 | D | 34.390 | 120 | 71709618548788 | [ ] | A | |
| 2024-03-01 15:49:58.591035136-05:00 | D | 34.395 | 120 | 79372108382794 | [ ] | A |
56737 rows × 6 columns
In [4]:
basic_data = vbt.Data.from_data(vbt.symbol_dict({symbol: ohlcv_df}), tz_convert=zoneNY) vbt.settings['plotting']['auto_rangebreaks'] = True basic_data.ohlcv.plot()
Out[4]:
FigureWidget({
'data': [{'close': array([525.89 , 525.81 , 525.86 , ..., 528.695, 528.83 , 528.66 ]),
'decreasing': {'fillcolor': '#ee534f', 'line': {'color': '#ee534f'}},
'high': array([525.96, 525.91, 525.92, ..., 528.74, 528.83, 528.83]),
'increasing': {'fillcolor': '#26a69a', 'line': {'color': '#26a69a'}},
'low': array([525.83, 525.79, 525.8 , ..., 528.68, 528.71, 528.56]),
'name': 'OHLC',
'opacity': 0.75,
'open': array([525.83 , 525.89 , 525.82 , ..., 528.7 , 528.71 , 528.8298]),
'type': 'candlestick',
'uid': 'ace5a21b-2317-4646-b45b-de0447bc533c',
'x': array([datetime.datetime(2024, 5, 15, 9, 30, tzinfo=<DstTzInfo 'US/Eastern' EDT-1 day, 20:00:00 DST>),
datetime.datetime(2024, 5, 15, 9, 30, 1, tzinfo=<DstTzInfo 'US/Eastern' EDT-1 day, 20:00:00 DST>),
datetime.datetime(2024, 5, 15, 9, 30, 2, tzinfo=<DstTzInfo 'US/Eastern' EDT-1 day, 20:00:00 DST>),
...,
datetime.datetime(2024, 5, 16, 15, 59, 57, tzinfo=<DstTzInfo 'US/Eastern' EDT-1 day, 20:00:00 DST>),
datetime.datetime(2024, 5, 16, 15, 59, 58, tzinfo=<DstTzInfo 'US/Eastern' EDT-1 day, 20:00:00 DST>),
datetime.datetime(2024, 5, 16, 15, 59, 59, tzinfo=<DstTzInfo 'US/Eastern' EDT-1 day, 20:00:00 DST>)],
dtype=object),
'xaxis': 'x',
'yaxis': 'y'},
{'marker': {'color': array(['#26a69a', '#ee534f', '#26a69a', ..., '#ee534f', '#26a69a', '#ee534f'],
dtype=object),
'line': {'width': 0}},
'name': 'Volume',
'opacity': 0.5,
'type': 'bar',
'uid': '5015d1bc-4c51-4185-aad0-8829974921aa',
'x': array([datetime.datetime(2024, 5, 15, 9, 30, tzinfo=<DstTzInfo 'US/Eastern' EDT-1 day, 20:00:00 DST>),
datetime.datetime(2024, 5, 15, 9, 30, 1, tzinfo=<DstTzInfo 'US/Eastern' EDT-1 day, 20:00:00 DST>),
datetime.datetime(2024, 5, 15, 9, 30, 2, tzinfo=<DstTzInfo 'US/Eastern' EDT-1 day, 20:00:00 DST>),
...,
datetime.datetime(2024, 5, 16, 15, 59, 57, tzinfo=<DstTzInfo 'US/Eastern' EDT-1 day, 20:00:00 DST>),
datetime.datetime(2024, 5, 16, 15, 59, 58, tzinfo=<DstTzInfo 'US/Eastern' EDT-1 day, 20:00:00 DST>),
datetime.datetime(2024, 5, 16, 15, 59, 59, tzinfo=<DstTzInfo 'US/Eastern' EDT-1 day, 20:00:00 DST>)],
dtype=object),
'xaxis': 'x2',
'y': array([163507., 14254., 2937., ..., 11066., 28015., 25043.]),
'yaxis': 'y2'}],
'layout': {'height': 350,
'legend': {'orientation': 'h',
'traceorder': 'normal',
'x': 1,
'xanchor': 'right',
'y': 1.02,
'yanchor': 'bottom'},
'margin': {'b': 30, 'l': 30, 'r': 30, 't': 30},
'showlegend': True,
'template': '...',
'width': 1280,
'xaxis': {'anchor': 'y',
'domain': [0.0, 1.0],
'matches': 'x2',
'rangeslider': {'visible': False},
'showgrid': True,
'showticklabels': False},
'xaxis2': {'anchor': 'y2', 'domain': [0.0, 1.0], 'showgrid': True},
'yaxis': {'anchor': 'x', 'domain': [0.3, 1.0], 'showgrid': True},
'yaxis2': {'anchor': 'x2', 'domain': [0.0, 0.3], 'showgrid': True}}
})
In [ ]:
import pickle from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR import gzip file_path = f"{DATA_DIR}/tradecache/BAC-1709044200-1709067600.cache.gz" with gzip.open(file_path, 'rb') as fp: tradesResponse = pickle.load(fp) tradesResponse
In [14]:
def convert_dict_to_multiindex_df(tradesResponse): # Create a DataFrame for each key and add the key as part of the MultiIndex dfs = [] for key, values in tradesResponse.items(): df = pd.DataFrame(values) # Rename columns # Select and order columns explicitly #print(df) df = df[['t', 'x', 'p', 's', 'i', 'c','z']] df.rename(columns={'t': 'timestamp', 'c': 'conditions', 'p': 'price', 's': 'size', 'x': 'exchange', 'z':'tape', 'i':'id'}, inplace=True) df['symbol'] = key # Add ticker as a column df['timestamp'] = pd.to_datetime(df['timestamp']) # Convert 't' from string to datetime before setting it as an index df.set_index(['symbol', 'timestamp'], inplace=True) # Set the multi-level index using both 'ticker' and 't' df = df.tz_convert(zoneNY, level='timestamp') dfs.append(df) # Concatenate all DataFrames into a single DataFrame with MultiIndex final_df = pd.concat(dfs) return final_df # Convert and print the DataFrame df = convert_dict_to_multiindex_df(tradesResponse) df
Out[14]:
<style scoped="">
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| exchange | price | size | id | conditions | tape | ||
|---|---|---|---|---|---|---|---|
| symbol | timestamp | ||||||
| BAC | 2024-02-27 09:30:00.002164736-05:00 | P | 33.680 | 638 | 52983525158993 | [ , F, T] | A |
| 2024-02-27 09:30:00.128029184-05:00 | P | 33.690 | 7 | 52983525159224 | [ , I] | A | |
| 2024-02-27 09:30:00.128032256-05:00 | P | 33.690 | 7 | 52983525159225 | [ , Q] | A | |
| 2024-02-27 09:30:00.261718272-05:00 | K | 33.700 | 3 | 52983525302111 | [ , F, I] | A | |
| 2024-02-27 09:30:00.349298176-05:00 | D | 33.695 | 1 | 71675256256563 | [ , I] | A | |
| ... | ... | ... | ... | ... | ... | ... | |
| 2024-02-27 15:59:59.996081408-05:00 | T | 34.270 | 1 | 62880189999698 | [ , I] | A | |
| 2024-02-27 15:59:59.996084480-05:00 | T | 34.270 | 100 | 62880189999699 | [ ] | A | |
| 2024-02-27 15:59:59.997648384-05:00 | N | 34.270 | 400 | 52983576998465 | [ ] | A | |
| 2024-02-27 15:59:59.998087168-05:00 | T | 34.270 | 1 | 62880189999929 | [ , I] | A | |
| 2024-02-27 15:59:59.998089984-05:00 | T | 34.270 | 100 | 62880189999930 | [ ] | A |
169811 rows × 6 columns
In [6]:
df.info()
<class 'pandas.core.frame.DataFrame'> MultiIndex: 169811 entries, (0, 'BAC') to (169810, 'BAC') Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 c 169811 non-null object 1 i 169811 non-null int64 2 p 169811 non-null float64 3 s 169811 non-null int64 4 t 169811 non-null datetime64[ns, UTC] 5 x 169811 non-null object 6 z 169811 non-null object dtypes: datetime64[ns, UTC](1), float64(1), int64(2), object(3) memory usage: 9.9+ MB
In [4]:
ohlcv_df.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 46344 entries, 2024-03-01 09:30:00-05:00 to 2024-03-04 15:59:59-05:00 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 open 46344 non-null float64 1 high 46344 non-null float64 2 low 46344 non-null float64 3 close 46344 non-null float64 4 volume 46344 non-null float64 5 trades 46344 non-null float64 dtypes: float64(6) memory usage: 2.5 MB
In [6]:
ohlcv_df.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 46344 entries, 2024-03-01 09:30:00-05:00 to 2024-03-04 15:59:59-05:00 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 open 46344 non-null float64 1 high 46344 non-null float64 2 low 46344 non-null float64 3 close 46344 non-null float64 4 volume 46344 non-null float64 5 trades 46344 non-null float64 dtypes: float64(6) memory usage: 2.5 MB
In [3]:
ohlcv_df = aggregate_trades(symbol=symbol, trades_df=df, resolution=1000, type="dollar")
In [5]:
ohlcv_df.index.strftime('%Y-%m-%d %H').unique()
Out[5]:
Index(['2024-03-01 09', '2024-03-01 10', '2024-03-01 11', '2024-03-01 12',
'2024-03-01 13', '2024-03-01 14', '2024-03-01 15', '2024-03-04 09',
'2024-03-04 10', '2024-03-04 11', '2024-03-04 12', '2024-03-04 13',
'2024-03-04 14', '2024-03-04 15'],
dtype='object', name='time')
In [5]:
#ohlcv_df.groupby(ohlcv_df.index.date).size() ohlcv_df.head(100)
Out[5]:
<style scoped="">
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| open | high | low | close | volume | trades | |
|---|---|---|---|---|---|---|
| time | ||||||
| 2024-03-01 09:34:00.145446062-05:00 | 34.555 | 34.555 | 34.555 | 34.555 | 28.939372 | 1.0 |
| 2024-03-01 09:34:00.145447016-05:00 | 34.555 | 34.555 | 34.555 | 34.555 | 28.939372 | 1.0 |
| 2024-03-01 09:34:00.145447016-05:00 | 34.555 | 34.555 | 34.555 | 34.555 | 28.939372 | 1.0 |
| 2024-03-01 09:34:00.145447016-05:00 | 34.555 | 34.555 | 34.555 | 34.555 | 28.939372 | 1.0 |
| 2024-03-01 09:34:00.145447016-05:00 | 34.555 | 34.555 | 34.555 | 34.555 | 28.939372 | 1.0 |
| ... | ... | ... | ... | ... | ... | ... |
| 2024-03-01 09:34:05.011623859-05:00 | 34.560 | 34.560 | 34.560 | 34.560 | 28.935185 | 1.0 |
| 2024-03-01 09:34:05.011623859-05:00 | 34.560 | 34.560 | 34.560 | 34.560 | 28.935185 | 1.0 |
| 2024-03-01 09:34:05.011623859-05:00 | 34.560 | 34.560 | 34.560 | 34.560 | 28.935185 | 1.0 |
| 2024-03-01 09:34:05.011623859-05:00 | 34.560 | 34.560 | 34.560 | 34.560 | 28.935185 | 1.0 |
| 2024-03-01 09:34:05.011623859-05:00 | 34.560 | 34.560 | 34.560 | 34.560 | 28.935185 | 2.0 |
100 rows × 6 columns
In [6]:
df
Out[6]:
<style scoped="">
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| exchange | price | size | id | conditions | tape | ||
|---|---|---|---|---|---|---|---|
| symbol | timestamp | ||||||
| BAC | 2024-03-01 09:34:00.145446-05:00 | D | 34.5550 | 500.0 | 71675373899865 | [ ] | A |
| 2024-03-01 09:34:00.864348-05:00 | D | 34.5563 | 157.0 | 71675373958977 | [ ] | A | |
| 2024-03-01 09:34:00.960608-05:00 | D | 34.5500 | 100.0 | 71675373961523 | [ ] | A | |
| 2024-03-01 09:34:01.584619-05:00 | D | 34.5550 | 100.0 | 71675373965623 | [ ] | A | |
| 2024-03-01 09:34:01.793712-05:00 | D | 34.5550 | 108.0 | 71675373966644 | [ ] | A | |
| ... | ... | ... | ... | ... | ... | ... | |
| 2024-03-04 15:54:59.940080-05:00 | Y | 35.1600 | 63.0 | 52983525230401 | [ , I] | A | |
| 2024-03-04 15:54:59.940107-05:00 | Z | 35.1550 | 65.0 | 52983526682176 | [ , I] | A | |
| 2024-03-04 15:54:59.940110-05:00 | Z | 35.1550 | 200.0 | 52983526682177 | [ ] | A | |
| 2024-03-04 15:54:59.940113-05:00 | Z | 35.1600 | 100.0 | 52983526682179 | [ ] | A | |
| 2024-03-04 15:54:59.940615-05:00 | V | 35.1500 | 96.0 | 56471089803589 | [ , I] | A |
168972 rows × 6 columns
In [ ]:
#access just BCA df_filtered = df.loc["BAC"] df_filtered.info()
In [ ]:
df_filtered= df_filtered.reset_index() ticks = df_filtered[['timestamp', 'price', 'size']].to_numpy() ticks timestamps = ticks[:, 0]
In [ ]:
df_filtered= df_filtered.reset_index() ticks = df_filtered[['timestamp', 'price', 'size']].to_numpy() #timestamp to integer # Extract the timestamps column (assuming it's the first column) timestamps = ticks[:, 0] # Convert the timestamps to Unix timestamps in seconds with microsecond precision unix_timestamps_s = np.array([ts.timestamp() for ts in timestamps], dtype='float64') # Replace the original timestamps in the NumPy array with the converted Unix timestamps ticks[:, 0] = unix_timestamps_s #ticks[:, 0] = pd.to_datetime(ticks[:, 0]).astype('int64') // 1_000_000_000 # Convert to Unix timestamp ticks
In [ ]:
ticks = ticks.astype(np.float64) ticks
In [ ]:
resolution = 1 # Example resolution of 60 seconds ohlcv_bars = generate_time_bars_nb(ticks, resolution)
In [ ]:
ohlcv_bars
In [ ]:
# Convert the resulting array back to a DataFrame columns = ['time', 'open', 'high', 'low', 'close', 'volume', 'trades'] ohlcv_df = pd.DataFrame(ohlcv_bars, columns=columns) ohlcv_df['time'] = pd.to_datetime(ohlcv_df['time'], unit='s') ohlcv_df.set_index('time', inplace=True) ohlcv_df.index = ohlcv_df.index.tz_localize('UTC').tz_convert(zoneNY) #ohlcv_df = ohlcv_df.loc["2024-03-1 15:50:00":"2024-03-28 13:40:00"] #ohlcv_df.index.strftime('%Y-%m-%d %H').unique() ohlcv_df
In [ ]: