Files
v2realbot/research/prepare_aggregatied_data.ipynb
2024-06-13 11:02:41 +02:00

1603 lines
57 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Loading trades and vectorized aggregation\n",
"This notebook fetches the trades from remote or local cache and aggregates them to bars of given type (time, volume, dollar) and resolution\n",
"\n",
"`fetch_trades_parallel` enables to fetch trades of given symbol and interval, also can filter conditions and minimum size. return `trades_df`\n",
"`aggregate_trades` acceptss `trades_df` and ressolution and type of bars (VOLUME, TIME, DOLLAR) and return aggregated ohlcv dataframe `ohlcv_df`"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/Users/davidbrazda/Library/Application Support/v2realbot/notebooks/trades_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet\n",
"/Users/davidbrazda/Library/Application Support/v2realbot/notebooks/ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet\n",
"trades_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet\n",
"trades_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet\n",
"trades_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n",
"trades_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
"ohlcv_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet\n",
"trades_df-BAC-2024-05-15T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
"ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
"ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n",
"ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet\n",
"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from numba import jit\n",
"from alpaca.data.historical import StockHistoricalDataClient\n",
"from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n",
"from alpaca.data.requests import StockTradesRequest\n",
"from v2realbot.enums.enums import BarType\n",
"import time\n",
"from datetime import datetime\n",
"from v2realbot.utils.utils import parse_alpaca_timestamp, ltp, zoneNY, send_to_telegram, fetch_calendar_data\n",
"import pyarrow\n",
"from v2realbot.loader.aggregator_vectorized import fetch_daily_stock_trades, fetch_trades_parallel, generate_time_bars_nb, aggregate_trades\n",
"import vectorbtpro as vbt\n",
"import v2realbot.utils.config_handler as cfh\n",
"\n",
"vbt.settings.set_theme(\"dark\")\n",
"vbt.settings['plotting']['layout']['width'] = 1280\n",
"vbt.settings.plotting.auto_rangebreaks = True\n",
"# Set the option to display with pagination\n",
"pd.set_option('display.notebook_repr_html', True)\n",
"pd.set_option('display.max_rows', 20) # Number of rows per page\n",
"# pd.set_option('display.float_format', '{:.9f}'.format)\n",
"\n",
"\n",
"#trade filtering\n",
"exclude_conditions = cfh.config_handler.get_val('AGG_EXCLUDED_TRADES') #standard ['C','O','4','B','7','V','P','W','U','Z','F']\n",
"minsize = 100\n",
"\n",
"symbol = \"BAC\"\n",
"#datetime in zoneNY \n",
"day_start = datetime(2023, 1, 1, 9, 30, 0)\n",
"day_stop = datetime(2024, 5, 25, 15, 30, 0)\n",
"day_start = zoneNY.localize(day_start)\n",
"day_stop = zoneNY.localize(day_stop)\n",
"#filename of trades_df parquet, date are in isoformat but without time zone part\n",
"dir = DATA_DIR + \"/notebooks/\"\n",
"#parquet interval cache contains exclude conditions and minsize filtering\n",
"file_trades = dir + f\"trades_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H_%M_%S')}-{day_stop.strftime('%Y-%m-%dT%H_%M_%S')}-{''.join(exclude_conditions)}-{minsize}.parquet\"\n",
"#file_trades = dir + f\"trades_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}.parquet\"\n",
"file_ohlcv = dir + f\"ohlcv_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H_%M_%S')}-{day_stop.strftime('%Y-%m-%dT%H_%M_%S')}-{''.join(exclude_conditions)}-{minsize}.parquet\"\n",
"print(file_trades)\n",
"print(file_ohlcv)\n",
"#PRINT all parquet in directory\n",
"import os\n",
"files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n",
"for f in files:\n",
" print(f)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"#Either load trades or ohlcv from parquet if exists\n",
"#trades_df = fetch_trades_parallel(symbol, day_start, day_stop, exclude_conditions=exclude_conditions, minsize=minsize, max_workers=30) #exclude_conditions=['C','O','4','B','7','V','P','W','U','Z','F'])\n",
"#trades_df.to_parquet(file_trades, engine='pyarrow', compression='gzip')\n",
"#trades_df.to_parquet(file_trades, engine='pyarrow', compression='gzip')\n",
"#filenames = [dir+\"trades_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet\",dir+\"trades_df-BAC-2024-05-15T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\"]\n",
"trades_df = pd.read_parquet(dir+\"trades_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\",engine='pyarrow')\n",
"#focused = trades_df.loc[\"2024-02-16 11:23:11\":\"2024-02-16 11:24:26\"]\n",
"#focused\n",
"ohlcv_df = aggregate_trades(symbol=symbol, trades_df=trades_df, resolution=1, type=BarType.TIME)\n",
"ohlcv_df.to_parquet(file_ohlcv, engine='pyarrow', compression='gzip')\n",
"\n",
"#ohlcv_df = pd.read_parquet(file_ohlcv,engine='pyarrow')\n",
"# trades_df = pd.read_parquet(file_trades,engine='pyarrow')\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"DatetimeIndex: 4549772 entries, 2023-01-03 09:30:01-05:00 to 2024-05-24 15:59:59-04:00\n",
"Data columns (total 10 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 open float64 \n",
" 1 high float64 \n",
" 2 low float64 \n",
" 3 close float64 \n",
" 4 volume float64 \n",
" 5 trades float64 \n",
" 6 updated datetime64[ns, US/Eastern]\n",
" 7 vwap float64 \n",
" 8 buyvolume float64 \n",
" 9 sellvolume float64 \n",
"dtypes: datetime64[ns, US/Eastern](1), float64(9)\n",
"memory usage: 381.8 MB\n"
]
}
],
"source": [
"ohlcv_df.info()\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>exchange</th>\n",
" <th>price</th>\n",
" <th>size</th>\n",
" <th>id</th>\n",
" <th>conditions</th>\n",
" <th>tape</th>\n",
" </tr>\n",
" <tr>\n",
" <th>symbol</th>\n",
" <th>timestamp</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"11\" valign=\"top\">BAC</th>\n",
" <th>2024-02-16 09:30:00.708657-05:00</th>\n",
" <td>P</td>\n",
" <td>33.92</td>\n",
" <td>200</td>\n",
" <td>52983525230899</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:30:00.708657-05:00</th>\n",
" <td>P</td>\n",
" <td>33.92</td>\n",
" <td>200</td>\n",
" <td>52983525230900</td>\n",
" <td>[ , Q]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:30:00.708660-05:00</th>\n",
" <td>P</td>\n",
" <td>33.92</td>\n",
" <td>200</td>\n",
" <td>52983525230901</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:30:00.708687-05:00</th>\n",
" <td>P</td>\n",
" <td>33.92</td>\n",
" <td>200</td>\n",
" <td>52983525230903</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:30:00.708744-05:00</th>\n",
" <td>P</td>\n",
" <td>33.92</td>\n",
" <td>100</td>\n",
" <td>52983525230904</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:32:10.624595-05:00</th>\n",
" <td>P</td>\n",
" <td>33.86</td>\n",
" <td>400</td>\n",
" <td>52983525421904</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:32:10.625001-05:00</th>\n",
" <td>K</td>\n",
" <td>33.86</td>\n",
" <td>400</td>\n",
" <td>52983525100399</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:32:10.625151-05:00</th>\n",
" <td>T</td>\n",
" <td>33.86</td>\n",
" <td>100</td>\n",
" <td>62879199619393</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:32:10.625157-05:00</th>\n",
" <td>T</td>\n",
" <td>33.86</td>\n",
" <td>150</td>\n",
" <td>62879199619395</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:32:10.625157-05:00</th>\n",
" <td>T</td>\n",
" <td>33.86</td>\n",
" <td>400</td>\n",
" <td>62879199619396</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>839 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" exchange price size id \\\n",
"symbol timestamp \n",
"BAC 2024-02-16 09:30:00.708657-05:00 P 33.92 200 52983525230899 \n",
" 2024-02-16 09:30:00.708657-05:00 P 33.92 200 52983525230900 \n",
" 2024-02-16 09:30:00.708660-05:00 P 33.92 200 52983525230901 \n",
" 2024-02-16 09:30:00.708687-05:00 P 33.92 200 52983525230903 \n",
" 2024-02-16 09:30:00.708744-05:00 P 33.92 100 52983525230904 \n",
"... ... ... ... ... \n",
" 2024-02-16 09:32:10.624595-05:00 P 33.86 400 52983525421904 \n",
" 2024-02-16 09:32:10.625001-05:00 K 33.86 400 52983525100399 \n",
" 2024-02-16 09:32:10.625151-05:00 T 33.86 100 62879199619393 \n",
" 2024-02-16 09:32:10.625157-05:00 T 33.86 150 62879199619395 \n",
" 2024-02-16 09:32:10.625157-05:00 T 33.86 400 62879199619396 \n",
"\n",
" conditions tape \n",
"symbol timestamp \n",
"BAC 2024-02-16 09:30:00.708657-05:00 [ ] A \n",
" 2024-02-16 09:30:00.708657-05:00 [ , Q] A \n",
" 2024-02-16 09:30:00.708660-05:00 [ ] A \n",
" 2024-02-16 09:30:00.708687-05:00 [ ] A \n",
" 2024-02-16 09:30:00.708744-05:00 [ ] A \n",
"... ... ... \n",
" 2024-02-16 09:32:10.624595-05:00 [ ] A \n",
" 2024-02-16 09:32:10.625001-05:00 [ ] A \n",
" 2024-02-16 09:32:10.625151-05:00 [ ] A \n",
" 2024-02-16 09:32:10.625157-05:00 [ ] A \n",
" 2024-02-16 09:32:10.625157-05:00 [ ] A \n",
"\n",
"[839 rows x 6 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = trades_df.loc[(\"BAC\", \"2024-02-16 09:30\"):(\"BAC\",\"2024-02-16 09:32:11\")]\n",
"a"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>open</th>\n",
" <th>high</th>\n",
" <th>low</th>\n",
" <th>close</th>\n",
" <th>volume</th>\n",
" <th>trades</th>\n",
" <th>updated</th>\n",
" <th>vwap</th>\n",
" <th>buyvolume</th>\n",
" <th>sellvolume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>time</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2024-02-15 15:58:00-05:00</th>\n",
" <td>34.0450</td>\n",
" <td>34.0499</td>\n",
" <td>34.0400</td>\n",
" <td>34.045</td>\n",
" <td>7032.0</td>\n",
" <td>19.0</td>\n",
" <td>2024-02-15 15:58:01.013622-05:00</td>\n",
" <td>34.045109</td>\n",
" <td>1976.0</td>\n",
" <td>1663.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-15 15:58:01-05:00</th>\n",
" <td>34.0450</td>\n",
" <td>34.0500</td>\n",
" <td>34.0401</td>\n",
" <td>34.045</td>\n",
" <td>9216.0</td>\n",
" <td>39.0</td>\n",
" <td>2024-02-15 15:58:02.103157-05:00</td>\n",
" <td>34.048374</td>\n",
" <td>600.0</td>\n",
" <td>733.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-15 15:58:02-05:00</th>\n",
" <td>34.0499</td>\n",
" <td>34.0500</td>\n",
" <td>34.0401</td>\n",
" <td>34.050</td>\n",
" <td>2404.0</td>\n",
" <td>20.0</td>\n",
" <td>2024-02-15 15:58:03.139942-05:00</td>\n",
" <td>34.048829</td>\n",
" <td>622.0</td>\n",
" <td>382.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-15 15:58:03-05:00</th>\n",
" <td>34.0499</td>\n",
" <td>34.0500</td>\n",
" <td>34.0425</td>\n",
" <td>34.050</td>\n",
" <td>2481.0</td>\n",
" <td>11.0</td>\n",
" <td>2024-02-15 15:58:04.047101-05:00</td>\n",
" <td>34.046025</td>\n",
" <td>501.0</td>\n",
" <td>565.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-15 15:58:04-05:00</th>\n",
" <td>34.0500</td>\n",
" <td>34.0599</td>\n",
" <td>34.0500</td>\n",
" <td>34.055</td>\n",
" <td>21532.0</td>\n",
" <td>57.0</td>\n",
" <td>2024-02-15 15:58:05.143236-05:00</td>\n",
" <td>34.051678</td>\n",
" <td>1465.0</td>\n",
" <td>1637.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:34:52-05:00</th>\n",
" <td>33.7050</td>\n",
" <td>33.7050</td>\n",
" <td>33.7050</td>\n",
" <td>33.705</td>\n",
" <td>100.0</td>\n",
" <td>1.0</td>\n",
" <td>2024-02-16 09:34:53.669980-05:00</td>\n",
" <td>33.705000</td>\n",
" <td>0.0</td>\n",
" <td>100.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:34:53-05:00</th>\n",
" <td>33.7050</td>\n",
" <td>33.7050</td>\n",
" <td>33.7050</td>\n",
" <td>33.705</td>\n",
" <td>300.0</td>\n",
" <td>1.0</td>\n",
" <td>2024-02-16 09:34:55.212184-05:00</td>\n",
" <td>33.705000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:34:55-05:00</th>\n",
" <td>33.7092</td>\n",
" <td>33.7092</td>\n",
" <td>33.7050</td>\n",
" <td>33.705</td>\n",
" <td>304.0</td>\n",
" <td>2.0</td>\n",
" <td>2024-02-16 09:34:56.304454-05:00</td>\n",
" <td>33.706382</td>\n",
" <td>100.0</td>\n",
" <td>204.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:34:56-05:00</th>\n",
" <td>33.7050</td>\n",
" <td>33.7150</td>\n",
" <td>33.7050</td>\n",
" <td>33.715</td>\n",
" <td>900.0</td>\n",
" <td>9.0</td>\n",
" <td>2024-02-16 09:34:58.166409-05:00</td>\n",
" <td>33.711667</td>\n",
" <td>200.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:34:58-05:00</th>\n",
" <td>33.7150</td>\n",
" <td>33.7150</td>\n",
" <td>33.7100</td>\n",
" <td>33.710</td>\n",
" <td>2461.0</td>\n",
" <td>14.0</td>\n",
" <td>2024-02-16 09:34:58.958611-05:00</td>\n",
" <td>33.710707</td>\n",
" <td>100.0</td>\n",
" <td>200.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>324 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" open high low close volume trades \\\n",
"time \n",
"2024-02-15 15:58:00-05:00 34.0450 34.0499 34.0400 34.045 7032.0 19.0 \n",
"2024-02-15 15:58:01-05:00 34.0450 34.0500 34.0401 34.045 9216.0 39.0 \n",
"2024-02-15 15:58:02-05:00 34.0499 34.0500 34.0401 34.050 2404.0 20.0 \n",
"2024-02-15 15:58:03-05:00 34.0499 34.0500 34.0425 34.050 2481.0 11.0 \n",
"2024-02-15 15:58:04-05:00 34.0500 34.0599 34.0500 34.055 21532.0 57.0 \n",
"... ... ... ... ... ... ... \n",
"2024-02-16 09:34:52-05:00 33.7050 33.7050 33.7050 33.705 100.0 1.0 \n",
"2024-02-16 09:34:53-05:00 33.7050 33.7050 33.7050 33.705 300.0 1.0 \n",
"2024-02-16 09:34:55-05:00 33.7092 33.7092 33.7050 33.705 304.0 2.0 \n",
"2024-02-16 09:34:56-05:00 33.7050 33.7150 33.7050 33.715 900.0 9.0 \n",
"2024-02-16 09:34:58-05:00 33.7150 33.7150 33.7100 33.710 2461.0 14.0 \n",
"\n",
" updated vwap \\\n",
"time \n",
"2024-02-15 15:58:00-05:00 2024-02-15 15:58:01.013622-05:00 34.045109 \n",
"2024-02-15 15:58:01-05:00 2024-02-15 15:58:02.103157-05:00 34.048374 \n",
"2024-02-15 15:58:02-05:00 2024-02-15 15:58:03.139942-05:00 34.048829 \n",
"2024-02-15 15:58:03-05:00 2024-02-15 15:58:04.047101-05:00 34.046025 \n",
"2024-02-15 15:58:04-05:00 2024-02-15 15:58:05.143236-05:00 34.051678 \n",
"... ... ... \n",
"2024-02-16 09:34:52-05:00 2024-02-16 09:34:53.669980-05:00 33.705000 \n",
"2024-02-16 09:34:53-05:00 2024-02-16 09:34:55.212184-05:00 33.705000 \n",
"2024-02-16 09:34:55-05:00 2024-02-16 09:34:56.304454-05:00 33.706382 \n",
"2024-02-16 09:34:56-05:00 2024-02-16 09:34:58.166409-05:00 33.711667 \n",
"2024-02-16 09:34:58-05:00 2024-02-16 09:34:58.958611-05:00 33.710707 \n",
"\n",
" buyvolume sellvolume \n",
"time \n",
"2024-02-15 15:58:00-05:00 1976.0 1663.0 \n",
"2024-02-15 15:58:01-05:00 600.0 733.0 \n",
"2024-02-15 15:58:02-05:00 622.0 382.0 \n",
"2024-02-15 15:58:03-05:00 501.0 565.0 \n",
"2024-02-15 15:58:04-05:00 1465.0 1637.0 \n",
"... ... ... \n",
"2024-02-16 09:34:52-05:00 0.0 100.0 \n",
"2024-02-16 09:34:53-05:00 0.0 0.0 \n",
"2024-02-16 09:34:55-05:00 100.0 204.0 \n",
"2024-02-16 09:34:56-05:00 200.0 0.0 \n",
"2024-02-16 09:34:58-05:00 100.0 200.0 \n",
"\n",
"[324 rows x 10 columns]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ohlcv_df"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>exchange</th>\n",
" <th>price</th>\n",
" <th>size</th>\n",
" <th>id</th>\n",
" <th>conditions</th>\n",
" <th>tape</th>\n",
" </tr>\n",
" <tr>\n",
" <th>symbol</th>\n",
" <th>timestamp</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"11\" valign=\"top\">BAC</th>\n",
" <th>2024-02-16 09:30:00.708657-05:00</th>\n",
" <td>P</td>\n",
" <td>33.920</td>\n",
" <td>200</td>\n",
" <td>52983525230899</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:30:00.708657-05:00</th>\n",
" <td>P</td>\n",
" <td>33.920</td>\n",
" <td>200</td>\n",
" <td>52983525230900</td>\n",
" <td>[ , Q]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:30:00.708660-05:00</th>\n",
" <td>P</td>\n",
" <td>33.920</td>\n",
" <td>200</td>\n",
" <td>52983525230901</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:30:00.708687-05:00</th>\n",
" <td>P</td>\n",
" <td>33.920</td>\n",
" <td>200</td>\n",
" <td>52983525230903</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:30:00.708744-05:00</th>\n",
" <td>P</td>\n",
" <td>33.920</td>\n",
" <td>100</td>\n",
" <td>52983525230904</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 10:24:25.203728-05:00</th>\n",
" <td>D</td>\n",
" <td>33.930</td>\n",
" <td>434</td>\n",
" <td>71679923009363</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 10:24:25.204664-05:00</th>\n",
" <td>D</td>\n",
" <td>33.930</td>\n",
" <td>118</td>\n",
" <td>71679923009364</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 10:24:25.269020-05:00</th>\n",
" <td>D</td>\n",
" <td>33.930</td>\n",
" <td>580</td>\n",
" <td>71679923009601</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 10:24:25.296243-05:00</th>\n",
" <td>D</td>\n",
" <td>33.930</td>\n",
" <td>325</td>\n",
" <td>71679923009612</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 10:24:26.092502-05:00</th>\n",
" <td>N</td>\n",
" <td>33.925</td>\n",
" <td>275</td>\n",
" <td>52983526543427</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>12032 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" exchange price size \\\n",
"symbol timestamp \n",
"BAC 2024-02-16 09:30:00.708657-05:00 P 33.920 200 \n",
" 2024-02-16 09:30:00.708657-05:00 P 33.920 200 \n",
" 2024-02-16 09:30:00.708660-05:00 P 33.920 200 \n",
" 2024-02-16 09:30:00.708687-05:00 P 33.920 200 \n",
" 2024-02-16 09:30:00.708744-05:00 P 33.920 100 \n",
"... ... ... ... \n",
" 2024-02-16 10:24:25.203728-05:00 D 33.930 434 \n",
" 2024-02-16 10:24:25.204664-05:00 D 33.930 118 \n",
" 2024-02-16 10:24:25.269020-05:00 D 33.930 580 \n",
" 2024-02-16 10:24:25.296243-05:00 D 33.930 325 \n",
" 2024-02-16 10:24:26.092502-05:00 N 33.925 275 \n",
"\n",
" id conditions tape \n",
"symbol timestamp \n",
"BAC 2024-02-16 09:30:00.708657-05:00 52983525230899 [ ] A \n",
" 2024-02-16 09:30:00.708657-05:00 52983525230900 [ , Q] A \n",
" 2024-02-16 09:30:00.708660-05:00 52983525230901 [ ] A \n",
" 2024-02-16 09:30:00.708687-05:00 52983525230903 [ ] A \n",
" 2024-02-16 09:30:00.708744-05:00 52983525230904 [ ] A \n",
"... ... ... ... \n",
" 2024-02-16 10:24:25.203728-05:00 71679923009363 [ ] A \n",
" 2024-02-16 10:24:25.204664-05:00 71679923009364 [ ] A \n",
" 2024-02-16 10:24:25.269020-05:00 71679923009601 [ ] A \n",
" 2024-02-16 10:24:25.296243-05:00 71679923009612 [ ] A \n",
" 2024-02-16 10:24:26.092502-05:00 52983526543427 [ ] A \n",
"\n",
"[12032 rows x 6 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#trades_df.info()\n",
"focused = trades_df.loc[(\"BAC\", \"2024-02-16 09:30:00\"):(\"BAC\", \"2024-02-16 10:24:26\")]\n",
"focused"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>exchange</th>\n",
" <th>price</th>\n",
" <th>size</th>\n",
" <th>id</th>\n",
" <th>conditions</th>\n",
" <th>tape</th>\n",
" </tr>\n",
" <tr>\n",
" <th>symbol</th>\n",
" <th>timestamp</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [exchange, price, size, id, conditions, tape]\n",
"Index: []"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trades_df.loc[\"2024-02-16 09:30:00\":\"2024-02-16 10:24:26\"]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>open</th>\n",
" <th>high</th>\n",
" <th>low</th>\n",
" <th>close</th>\n",
" <th>volume</th>\n",
" <th>trades</th>\n",
" <th>updated</th>\n",
" <th>vwap</th>\n",
" <th>buyvolume</th>\n",
" <th>sellvolume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>time</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2024-02-16 09:30:00-05:00</th>\n",
" <td>33.920</td>\n",
" <td>33.92</td>\n",
" <td>33.900</td>\n",
" <td>33.900</td>\n",
" <td>1700.0</td>\n",
" <td>12.0</td>\n",
" <td>2024-02-16 09:30:01.023464-05:00</td>\n",
" <td>33.915882</td>\n",
" <td>0.0</td>\n",
" <td>400.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:30:01-05:00</th>\n",
" <td>33.870</td>\n",
" <td>33.96</td>\n",
" <td>33.860</td>\n",
" <td>33.960</td>\n",
" <td>931216.0</td>\n",
" <td>110.0</td>\n",
" <td>2024-02-16 09:30:02.118683-05:00</td>\n",
" <td>33.949280</td>\n",
" <td>912802.0</td>\n",
" <td>4471.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:30:02-05:00</th>\n",
" <td>33.960</td>\n",
" <td>33.98</td>\n",
" <td>33.945</td>\n",
" <td>33.945</td>\n",
" <td>1160.0</td>\n",
" <td>5.0</td>\n",
" <td>2024-02-16 09:30:03.440994-05:00</td>\n",
" <td>33.959655</td>\n",
" <td>180.0</td>\n",
" <td>300.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:30:03-05:00</th>\n",
" <td>33.930</td>\n",
" <td>33.95</td>\n",
" <td>33.921</td>\n",
" <td>33.925</td>\n",
" <td>5376.0</td>\n",
" <td>13.0</td>\n",
" <td>2024-02-16 09:30:04.515116-05:00</td>\n",
" <td>33.934600</td>\n",
" <td>1548.0</td>\n",
" <td>991.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 09:30:04-05:00</th>\n",
" <td>33.900</td>\n",
" <td>33.94</td>\n",
" <td>33.870</td>\n",
" <td>33.890</td>\n",
" <td>4759.0</td>\n",
" <td>13.0</td>\n",
" <td>2024-02-16 09:30:05.163964-05:00</td>\n",
" <td>33.874182</td>\n",
" <td>570.0</td>\n",
" <td>2889.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 10:24:22-05:00</th>\n",
" <td>33.930</td>\n",
" <td>33.93</td>\n",
" <td>33.930</td>\n",
" <td>33.930</td>\n",
" <td>1626.0</td>\n",
" <td>8.0</td>\n",
" <td>2024-02-16 10:24:23.246513-05:00</td>\n",
" <td>33.930000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 10:24:23-05:00</th>\n",
" <td>33.930</td>\n",
" <td>33.93</td>\n",
" <td>33.925</td>\n",
" <td>33.930</td>\n",
" <td>2200.0</td>\n",
" <td>12.0</td>\n",
" <td>2024-02-16 10:24:24.061319-05:00</td>\n",
" <td>33.929545</td>\n",
" <td>400.0</td>\n",
" <td>200.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 10:24:24-05:00</th>\n",
" <td>33.930</td>\n",
" <td>33.93</td>\n",
" <td>33.930</td>\n",
" <td>33.930</td>\n",
" <td>5054.0</td>\n",
" <td>10.0</td>\n",
" <td>2024-02-16 10:24:25.017519-05:00</td>\n",
" <td>33.930000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 10:24:25-05:00</th>\n",
" <td>33.930</td>\n",
" <td>33.93</td>\n",
" <td>33.930</td>\n",
" <td>33.930</td>\n",
" <td>3712.0</td>\n",
" <td>8.0</td>\n",
" <td>2024-02-16 10:24:26.092502-05:00</td>\n",
" <td>33.930000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-16 10:24:26-05:00</th>\n",
" <td>33.925</td>\n",
" <td>33.93</td>\n",
" <td>33.915</td>\n",
" <td>33.920</td>\n",
" <td>7457.0</td>\n",
" <td>33.0</td>\n",
" <td>2024-02-16 10:24:27.112140-05:00</td>\n",
" <td>33.920636</td>\n",
" <td>699.0</td>\n",
" <td>1461.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2077 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" open high low close volume trades \\\n",
"time \n",
"2024-02-16 09:30:00-05:00 33.920 33.92 33.900 33.900 1700.0 12.0 \n",
"2024-02-16 09:30:01-05:00 33.870 33.96 33.860 33.960 931216.0 110.0 \n",
"2024-02-16 09:30:02-05:00 33.960 33.98 33.945 33.945 1160.0 5.0 \n",
"2024-02-16 09:30:03-05:00 33.930 33.95 33.921 33.925 5376.0 13.0 \n",
"2024-02-16 09:30:04-05:00 33.900 33.94 33.870 33.890 4759.0 13.0 \n",
"... ... ... ... ... ... ... \n",
"2024-02-16 10:24:22-05:00 33.930 33.93 33.930 33.930 1626.0 8.0 \n",
"2024-02-16 10:24:23-05:00 33.930 33.93 33.925 33.930 2200.0 12.0 \n",
"2024-02-16 10:24:24-05:00 33.930 33.93 33.930 33.930 5054.0 10.0 \n",
"2024-02-16 10:24:25-05:00 33.930 33.93 33.930 33.930 3712.0 8.0 \n",
"2024-02-16 10:24:26-05:00 33.925 33.93 33.915 33.920 7457.0 33.0 \n",
"\n",
" updated vwap \\\n",
"time \n",
"2024-02-16 09:30:00-05:00 2024-02-16 09:30:01.023464-05:00 33.915882 \n",
"2024-02-16 09:30:01-05:00 2024-02-16 09:30:02.118683-05:00 33.949280 \n",
"2024-02-16 09:30:02-05:00 2024-02-16 09:30:03.440994-05:00 33.959655 \n",
"2024-02-16 09:30:03-05:00 2024-02-16 09:30:04.515116-05:00 33.934600 \n",
"2024-02-16 09:30:04-05:00 2024-02-16 09:30:05.163964-05:00 33.874182 \n",
"... ... ... \n",
"2024-02-16 10:24:22-05:00 2024-02-16 10:24:23.246513-05:00 33.930000 \n",
"2024-02-16 10:24:23-05:00 2024-02-16 10:24:24.061319-05:00 33.929545 \n",
"2024-02-16 10:24:24-05:00 2024-02-16 10:24:25.017519-05:00 33.930000 \n",
"2024-02-16 10:24:25-05:00 2024-02-16 10:24:26.092502-05:00 33.930000 \n",
"2024-02-16 10:24:26-05:00 2024-02-16 10:24:27.112140-05:00 33.920636 \n",
"\n",
" buyvolume sellvolume \n",
"time \n",
"2024-02-16 09:30:00-05:00 0.0 400.0 \n",
"2024-02-16 09:30:01-05:00 912802.0 4471.0 \n",
"2024-02-16 09:30:02-05:00 180.0 300.0 \n",
"2024-02-16 09:30:03-05:00 1548.0 991.0 \n",
"2024-02-16 09:30:04-05:00 570.0 2889.0 \n",
"... ... ... \n",
"2024-02-16 10:24:22-05:00 0.0 0.0 \n",
"2024-02-16 10:24:23-05:00 400.0 200.0 \n",
"2024-02-16 10:24:24-05:00 0.0 0.0 \n",
"2024-02-16 10:24:25-05:00 0.0 0.0 \n",
"2024-02-16 10:24:26-05:00 699.0 1461.0 \n",
"\n",
"[2077 rows x 10 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"focohlc = ohlcv_df.loc[\"2024-02-16 09:30:00\":\"2024-02-16 10:24:26\"]\n",
"focohlc\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'focohlc' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[8], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mfocohlc\u001b[49m\u001b[38;5;241m.\u001b[39minfo()\n",
"\u001b[0;31mNameError\u001b[0m: name 'focohlc' is not defined"
]
}
],
"source": [
"focohlc.info()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#trades_df.to_parquet(dir + \"trades_df-BAC-2024-01-01T09:30:00-2024-05-14T16:00:00-CO4B7VPWUZF-100.parquet\", engine='pyarrow', compression='gzip')\n",
"#trades_df = pd.read_parquet(dir + \"trades_df-BAC-2024-01-01T09:30:00-2024-05-14T16:00:00-CO4B7VPWUZF-100.parquet\",engine='pyarrow')\n",
"\n",
"#trades_df.to_parquet(file_trades, engine='pyarrow', compression='gzip')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>exchange</th>\n",
" <th>price</th>\n",
" <th>size</th>\n",
" <th>id</th>\n",
" <th>conditions</th>\n",
" <th>tape</th>\n",
" </tr>\n",
" <tr>\n",
" <th>symbol</th>\n",
" <th>timestamp</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"11\" valign=\"top\">BAC</th>\n",
" <th>2024-01-02 09:30:00.329729-05:00</th>\n",
" <td>T</td>\n",
" <td>33.390</td>\n",
" <td>9684</td>\n",
" <td>62879149669684</td>\n",
" <td>[ , Q]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-02 09:30:00.758040-05:00</th>\n",
" <td>P</td>\n",
" <td>33.430</td>\n",
" <td>700</td>\n",
" <td>52983525495600</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-02 09:30:00.758997-05:00</th>\n",
" <td>P</td>\n",
" <td>33.440</td>\n",
" <td>400</td>\n",
" <td>52983525495601</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-02 09:30:01.086662-05:00</th>\n",
" <td>N</td>\n",
" <td>33.460</td>\n",
" <td>429483</td>\n",
" <td>52983525028681</td>\n",
" <td>[ , Q]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-02 09:30:01.086708-05:00</th>\n",
" <td>P</td>\n",
" <td>33.450</td>\n",
" <td>100</td>\n",
" <td>52983525495863</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-05-14 15:59:59.976990-04:00</th>\n",
" <td>N</td>\n",
" <td>38.490</td>\n",
" <td>42501</td>\n",
" <td>52983576482130</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-05-14 15:59:59.977038-04:00</th>\n",
" <td>N</td>\n",
" <td>38.495</td>\n",
" <td>100</td>\n",
" <td>52983576482132</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-05-14 15:59:59.977065-04:00</th>\n",
" <td>N</td>\n",
" <td>38.495</td>\n",
" <td>140</td>\n",
" <td>52983576482133</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-05-14 15:59:59.977618-04:00</th>\n",
" <td>T</td>\n",
" <td>38.495</td>\n",
" <td>140</td>\n",
" <td>62883460167226</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-05-14 15:59:59.978364-04:00</th>\n",
" <td>D</td>\n",
" <td>38.495</td>\n",
" <td>100</td>\n",
" <td>71714533561907</td>\n",
" <td>[ ]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>6467196 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" exchange price size \\\n",
"symbol timestamp \n",
"BAC 2024-01-02 09:30:00.329729-05:00 T 33.390 9684 \n",
" 2024-01-02 09:30:00.758040-05:00 P 33.430 700 \n",
" 2024-01-02 09:30:00.758997-05:00 P 33.440 400 \n",
" 2024-01-02 09:30:01.086662-05:00 N 33.460 429483 \n",
" 2024-01-02 09:30:01.086708-05:00 P 33.450 100 \n",
"... ... ... ... \n",
" 2024-05-14 15:59:59.976990-04:00 N 38.490 42501 \n",
" 2024-05-14 15:59:59.977038-04:00 N 38.495 100 \n",
" 2024-05-14 15:59:59.977065-04:00 N 38.495 140 \n",
" 2024-05-14 15:59:59.977618-04:00 T 38.495 140 \n",
" 2024-05-14 15:59:59.978364-04:00 D 38.495 100 \n",
"\n",
" id conditions tape \n",
"symbol timestamp \n",
"BAC 2024-01-02 09:30:00.329729-05:00 62879149669684 [ , Q] A \n",
" 2024-01-02 09:30:00.758040-05:00 52983525495600 [ ] A \n",
" 2024-01-02 09:30:00.758997-05:00 52983525495601 [ ] A \n",
" 2024-01-02 09:30:01.086662-05:00 52983525028681 [ , Q] A \n",
" 2024-01-02 09:30:01.086708-05:00 52983525495863 [ ] A \n",
"... ... ... ... \n",
" 2024-05-14 15:59:59.976990-04:00 52983576482130 [ ] A \n",
" 2024-05-14 15:59:59.977038-04:00 52983576482132 [ ] A \n",
" 2024-05-14 15:59:59.977065-04:00 52983576482133 [ ] A \n",
" 2024-05-14 15:59:59.977618-04:00 62883460167226 [ ] A \n",
" 2024-05-14 15:59:59.978364-04:00 71714533561907 [ ] A \n",
"\n",
"[6467196 rows x 6 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trades_df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'/Users/davidbrazda/Library/Application Support/v2realbot/notebooks/trades_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"file_trades"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#list all files is dir directory with parquet extension\n",
"dir = DATA_DIR + \"/notebooks/\"\n",
"import os\n",
"files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n",
"file_name = \"\"\n",
"ohlcv_df = pd.read_parquet(file_ohlcv,engine='pyarrow')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"# Calculate daily returns\n",
"ohlcv_df['returns'] = ohlcv_df['close'].pct_change().dropna()\n",
"#same as above but pct_change is from 3 datapoints back, but only if it is the same date, else na\n",
"\n",
"\n",
"# Plot the probability distribution curve\n",
"plt.figure(figsize=(10, 6))\n",
"sns.histplot(df['returns'].dropna(), kde=True, stat='probability', bins=30)\n",
"plt.title('Probability Distribution of Daily Returns')\n",
"plt.xlabel('Daily Returns')\n",
"plt.ylabel('Probability')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"# Define the intervals from 5 to 20 s, returns for each interval\n",
"#maybe use rolling window?\n",
"intervals = range(5, 21, 5)\n",
"\n",
"# Create columns for percentage returns\n",
"rolling_window = 50\n",
"\n",
"# Normalize the returns using rolling mean and std\n",
"for N in intervals:\n",
" column_name = f'returns_{N}'\n",
" rolling_mean = ohlcv_df[column_name].rolling(window=rolling_window).mean()\n",
" rolling_std = ohlcv_df[column_name].rolling(window=rolling_window).std()\n",
" ohlcv_df[f'norm_{column_name}'] = (ohlcv_df[column_name] - rolling_mean) / rolling_std\n",
"\n",
"# Display the dataframe with normalized return columns\n",
"ohlcv_df\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Calculate the sum of the normalized return columns for each row\n",
"ohlcv_df['sum_norm_returns'] = ohlcv_df[[f'norm_returns_{N}' for N in intervals]].sum(axis=1)\n",
"\n",
"# Sort the DataFrame based on the sum of normalized returns in descending order\n",
"df_sorted = ohlcv_df.sort_values(by='sum_norm_returns', ascending=False)\n",
"\n",
"# Display the top rows with the highest sum of normalized returns\n",
"df_sorted\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Drop initial rows with NaN values due to pct_change\n",
"ohlcv_df.dropna(inplace=True)\n",
"\n",
"# Plotting the probability distribution curves\n",
"plt.figure(figsize=(14, 8))\n",
"for N in intervals:\n",
" sns.kdeplot(ohlcv_df[f'returns_{N}'].dropna(), label=f'Returns {N}', fill=True)\n",
"\n",
"plt.title('Probability Distribution of Percentage Returns')\n",
"plt.xlabel('Percentage Return')\n",
"plt.ylabel('Density')\n",
"plt.legend()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"# Plot the probability distribution curve\n",
"plt.figure(figsize=(10, 6))\n",
"sns.histplot(ohlcv_df['returns'].dropna(), kde=True, stat='probability', bins=30)\n",
"plt.title('Probability Distribution of Daily Returns')\n",
"plt.xlabel('Daily Returns')\n",
"plt.ylabel('Probability')\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#show only rows from ohlcv_df where returns > 0.005\n",
"ohlcv_df[ohlcv_df['returns'] > 0.0005]\n",
"\n",
"#ohlcv_df[ohlcv_df['returns'] < -0.005]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#ohlcv where index = date 2024-03-13 and between hour 12\n",
"\n",
"a = ohlcv_df.loc['2024-03-13 12:00:00':'2024-03-13 13:00:00']\n",
"a"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"trades_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"trades_df.to_parquet(\"trades_df-spy-0111-0111.parquett\", engine='pyarrow', compression='gzip')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"trades_df.to_parquet(\"trades_df-spy-111-0516.parquett\", engine='pyarrow', compression='gzip', allow_truncated_timestamps=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_df.to_parquet(\"ohlcv_df-spy-111-0516.parquett\", engine='pyarrow', compression='gzip')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"basic_data = vbt.Data.from_data(vbt.symbol_dict({symbol: ohlcv_df}), tz_convert=zoneNY)\n",
"vbt.settings['plotting']['auto_rangebreaks'] = True\n",
"basic_data.ohlcv.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#access just BCA\n",
"#df_filtered = df.loc[\"BAC\"]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}