317 lines
8.5 KiB
Plaintext
317 lines
8.5 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Loading trades and vectorized aggregation"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"from numba import jit\n",
|
|
"from alpaca.data.historical import StockHistoricalDataClient\n",
|
|
"from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n",
|
|
"from alpaca.data.requests import StockTradesRequest\n",
|
|
"from v2realbot.enums.enums import BarType\n",
|
|
"import time\n",
|
|
"\n",
|
|
"from datetime import datetime\n",
|
|
"from v2realbot.utils.utils import parse_alpaca_timestamp, ltp, zoneNY, send_to_telegram, fetch_calendar_data\n",
|
|
"import pyarrow\n",
|
|
"from v2realbot.loader.aggregator_vectorized import fetch_daily_stock_trades, fetch_trades_parallel, generate_time_bars_nb, aggregate_trades\n",
|
|
"import vectorbtpro as vbt\n",
|
|
"\n",
|
|
"vbt.settings.set_theme(\"dark\")\n",
|
|
"vbt.settings['plotting']['layout']['width'] = 1280\n",
|
|
"vbt.settings.plotting.auto_rangebreaks = True\n",
|
|
"# Set the option to display with pagination\n",
|
|
"pd.set_option('display.notebook_repr_html', True)\n",
|
|
"pd.set_option('display.max_rows', 10) # Number of rows per page"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"symbol = \"SPY\"\n",
|
|
"#datetime in zoneNY \n",
|
|
"day_start = datetime(2024, 5, 15, 9, 30, 0)\n",
|
|
"day_stop = datetime(2024, 5, 16, 16, 00, 0)\n",
|
|
"day_start = zoneNY.localize(day_start)\n",
|
|
"day_stop = zoneNY.localize(day_stop)\n",
|
|
"#neslo by zrychlit, kdyz se zobrazuje pomalu Searching cache - nejaky bottle neck?\n",
|
|
"df = fetch_trades_parallel(symbol, day_start, day_stop, minsize=50) #exclude_conditions=['C','O','4','B','7','V','P','W','U','Z','F'])\n",
|
|
"ohlcv_df = aggregate_trades(symbol=symbol, trades_df=df, resolution=1, type=BarType.TIME)\n",
|
|
"#df.info()\n",
|
|
"ohlcv_df\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"basic_data = vbt.Data.from_data(vbt.symbol_dict({symbol: ohlcv_df}), tz_convert=zoneNY)\n",
|
|
"vbt.settings['plotting']['auto_rangebreaks'] = True\n",
|
|
"basic_data.ohlcv.plot()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pickle\n",
|
|
"from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n",
|
|
"import gzip\n",
|
|
"\n",
|
|
"file_path = f\"{DATA_DIR}/tradecache/BAC-1709044200-1709067600.cache.gz\"\n",
|
|
"\n",
|
|
"with gzip.open(file_path, 'rb') as fp:\n",
|
|
" tradesResponse = pickle.load(fp)\n",
|
|
"\n",
|
|
"tradesResponse"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def convert_dict_to_multiindex_df(tradesResponse):\n",
|
|
" # Create a DataFrame for each key and add the key as part of the MultiIndex\n",
|
|
" dfs = []\n",
|
|
" for key, values in tradesResponse.items():\n",
|
|
" df = pd.DataFrame(values)\n",
|
|
" # Rename columns\n",
|
|
" # Select and order columns explicitly\n",
|
|
" #print(df)\n",
|
|
" df = df[['t', 'x', 'p', 's', 'i', 'c','z']]\n",
|
|
" df.rename(columns={'t': 'timestamp', 'c': 'conditions', 'p': 'price', 's': 'size', 'x': 'exchange', 'z':'tape', 'i':'id'}, inplace=True)\n",
|
|
" df['symbol'] = key # Add ticker as a column\n",
|
|
" df['timestamp'] = pd.to_datetime(df['timestamp']) # Convert 't' from string to datetime before setting it as an index\n",
|
|
" df.set_index(['symbol', 'timestamp'], inplace=True) # Set the multi-level index using both 'ticker' and 't'\n",
|
|
" df = df.tz_convert(zoneNY, level='timestamp')\n",
|
|
" dfs.append(df)\n",
|
|
"\n",
|
|
" # Concatenate all DataFrames into a single DataFrame with MultiIndex\n",
|
|
" final_df = pd.concat(dfs)\n",
|
|
"\n",
|
|
" return final_df\n",
|
|
"\n",
|
|
"# Convert and print the DataFrame\n",
|
|
"df = convert_dict_to_multiindex_df(tradesResponse)\n",
|
|
"df\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ohlcv_df.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ohlcv_df.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ohlcv_df = aggregate_trades(symbol=symbol, trades_df=df, resolution=1000, type=\"dollar\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ohlcv_df.index.strftime('%Y-%m-%d %H').unique()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#ohlcv_df.groupby(ohlcv_df.index.date).size()\n",
|
|
"ohlcv_df.head(100)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#access just BCA\n",
|
|
"df_filtered = df.loc[\"BAC\"]\n",
|
|
"\n",
|
|
"df_filtered.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_filtered= df_filtered.reset_index()\n",
|
|
"ticks = df_filtered[['timestamp', 'price', 'size']].to_numpy()\n",
|
|
"ticks\n",
|
|
"timestamps = ticks[:, 0]\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_filtered= df_filtered.reset_index()\n",
|
|
"ticks = df_filtered[['timestamp', 'price', 'size']].to_numpy()\n",
|
|
"\n",
|
|
"#timestamp to integer\n",
|
|
"# Extract the timestamps column (assuming it's the first column)\n",
|
|
"timestamps = ticks[:, 0]\n",
|
|
"\n",
|
|
"# Convert the timestamps to Unix timestamps in seconds with microsecond precision\n",
|
|
"unix_timestamps_s = np.array([ts.timestamp() for ts in timestamps], dtype='float64')\n",
|
|
"\n",
|
|
"# Replace the original timestamps in the NumPy array with the converted Unix timestamps\n",
|
|
"ticks[:, 0] = unix_timestamps_s\n",
|
|
"\n",
|
|
"#ticks[:, 0] = pd.to_datetime(ticks[:, 0]).astype('int64') // 1_000_000_000 # Convert to Unix timestamp\n",
|
|
"ticks\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ticks = ticks.astype(np.float64)\n",
|
|
"ticks"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"resolution = 1 # Example resolution of 60 seconds\n",
|
|
"ohlcv_bars = generate_time_bars_nb(ticks, resolution)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ohlcv_bars"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Convert the resulting array back to a DataFrame\n",
|
|
"columns = ['time', 'open', 'high', 'low', 'close', 'volume', 'trades']\n",
|
|
"ohlcv_df = pd.DataFrame(ohlcv_bars, columns=columns)\n",
|
|
"ohlcv_df['time'] = pd.to_datetime(ohlcv_df['time'], unit='s')\n",
|
|
"ohlcv_df.set_index('time', inplace=True)\n",
|
|
"ohlcv_df.index = ohlcv_df.index.tz_localize('UTC').tz_convert(zoneNY)\n",
|
|
"#ohlcv_df = ohlcv_df.loc[\"2024-03-1 15:50:00\":\"2024-03-28 13:40:00\"]\n",
|
|
"#ohlcv_df.index.strftime('%Y-%m-%d %H').unique()\n",
|
|
"\n",
|
|
"ohlcv_df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.11"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|