{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Loading trades and vectorized aggregation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from numba import jit\n", "from alpaca.data.historical import StockHistoricalDataClient\n", "from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n", "from alpaca.data.requests import StockTradesRequest\n", "from v2realbot.enums.enums import BarType\n", "import time\n", "\n", "from datetime import datetime\n", "from v2realbot.utils.utils import parse_alpaca_timestamp, ltp, zoneNY, send_to_telegram, fetch_calendar_data\n", "import pyarrow\n", "from v2realbot.loader.aggregator_vectorized import fetch_daily_stock_trades, fetch_trades_parallel, generate_time_bars_nb, aggregate_trades\n", "import vectorbtpro as vbt\n", "\n", "vbt.settings.set_theme(\"dark\")\n", "vbt.settings['plotting']['layout']['width'] = 1280\n", "vbt.settings.plotting.auto_rangebreaks = True\n", "# Set the option to display with pagination\n", "pd.set_option('display.notebook_repr_html', True)\n", "pd.set_option('display.max_rows', 10) # Number of rows per page" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "symbol = \"SPY\"\n", "#datetime in zoneNY \n", "day_start = datetime(2024, 5, 15, 9, 30, 0)\n", "day_stop = datetime(2024, 5, 16, 16, 00, 0)\n", "day_start = zoneNY.localize(day_start)\n", "day_stop = zoneNY.localize(day_stop)\n", "#neslo by zrychlit, kdyz se zobrazuje pomalu Searching cache - nejaky bottle neck?\n", "df = fetch_trades_parallel(symbol, day_start, day_stop, minsize=50) #exclude_conditions=['C','O','4','B','7','V','P','W','U','Z','F'])\n", "ohlcv_df = aggregate_trades(symbol=symbol, trades_df=df, resolution=1, type=BarType.TIME)\n", "#df.info()\n", "ohlcv_df\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "basic_data = vbt.Data.from_data(vbt.symbol_dict({symbol: ohlcv_df}), tz_convert=zoneNY)\n", "vbt.settings['plotting']['auto_rangebreaks'] = True\n", "basic_data.ohlcv.plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n", "import gzip\n", "\n", "file_path = f\"{DATA_DIR}/tradecache/BAC-1709044200-1709067600.cache.gz\"\n", "\n", "with gzip.open(file_path, 'rb') as fp:\n", " tradesResponse = pickle.load(fp)\n", "\n", "tradesResponse" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def convert_dict_to_multiindex_df(tradesResponse):\n", " # Create a DataFrame for each key and add the key as part of the MultiIndex\n", " dfs = []\n", " for key, values in tradesResponse.items():\n", " df = pd.DataFrame(values)\n", " # Rename columns\n", " # Select and order columns explicitly\n", " #print(df)\n", " df = df[['t', 'x', 'p', 's', 'i', 'c','z']]\n", " df.rename(columns={'t': 'timestamp', 'c': 'conditions', 'p': 'price', 's': 'size', 'x': 'exchange', 'z':'tape', 'i':'id'}, inplace=True)\n", " df['symbol'] = key # Add ticker as a column\n", " df['timestamp'] = pd.to_datetime(df['timestamp']) # Convert 't' from string to datetime before setting it as an index\n", " df.set_index(['symbol', 'timestamp'], inplace=True) # Set the multi-level index using both 'ticker' and 't'\n", " df = df.tz_convert(zoneNY, level='timestamp')\n", " dfs.append(df)\n", "\n", " # Concatenate all DataFrames into a single DataFrame with MultiIndex\n", " final_df = pd.concat(dfs)\n", "\n", " return final_df\n", "\n", "# Convert and print the DataFrame\n", "df = convert_dict_to_multiindex_df(tradesResponse)\n", "df\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ohlcv_df.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ohlcv_df.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ohlcv_df = aggregate_trades(symbol=symbol, trades_df=df, resolution=1000, type=\"dollar\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ohlcv_df.index.strftime('%Y-%m-%d %H').unique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#ohlcv_df.groupby(ohlcv_df.index.date).size()\n", "ohlcv_df.head(100)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#access just BCA\n", "df_filtered = df.loc[\"BAC\"]\n", "\n", "df_filtered.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df_filtered= df_filtered.reset_index()\n", "ticks = df_filtered[['timestamp', 'price', 'size']].to_numpy()\n", "ticks\n", "timestamps = ticks[:, 0]\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df_filtered= df_filtered.reset_index()\n", "ticks = df_filtered[['timestamp', 'price', 'size']].to_numpy()\n", "\n", "#timestamp to integer\n", "# Extract the timestamps column (assuming it's the first column)\n", "timestamps = ticks[:, 0]\n", "\n", "# Convert the timestamps to Unix timestamps in seconds with microsecond precision\n", "unix_timestamps_s = np.array([ts.timestamp() for ts in timestamps], dtype='float64')\n", "\n", "# Replace the original timestamps in the NumPy array with the converted Unix timestamps\n", "ticks[:, 0] = unix_timestamps_s\n", "\n", "#ticks[:, 0] = pd.to_datetime(ticks[:, 0]).astype('int64') // 1_000_000_000 # Convert to Unix timestamp\n", "ticks\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ticks = ticks.astype(np.float64)\n", "ticks" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "resolution = 1 # Example resolution of 60 seconds\n", "ohlcv_bars = generate_time_bars_nb(ticks, resolution)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ohlcv_bars" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Convert the resulting array back to a DataFrame\n", "columns = ['time', 'open', 'high', 'low', 'close', 'volume', 'trades']\n", "ohlcv_df = pd.DataFrame(ohlcv_bars, columns=columns)\n", "ohlcv_df['time'] = pd.to_datetime(ohlcv_df['time'], unit='s')\n", "ohlcv_df.set_index('time', inplace=True)\n", "ohlcv_df.index = ohlcv_df.index.tz_localize('UTC').tz_convert(zoneNY)\n", "#ohlcv_df = ohlcv_df.loc[\"2024-03-1 15:50:00\":\"2024-03-28 13:40:00\"]\n", "#ohlcv_df.index.strftime('%Y-%m-%d %H').unique()\n", "\n", "ohlcv_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" } }, "nbformat": 4, "nbformat_minor": 2 }