854 lines
30 KiB
Plaintext
854 lines
30 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Create aggregated data from trades\n",
|
||
"\n",
|
||
"This is how new aggregated data are created and stored to cache, where can they be loaded. It is created for given symbol, interval and aggregation type/resolution. For example OHLCV_1m, or OHLCV_VOLUME_2000 (volume bars with resolution 2000).\n",
|
||
"\n",
|
||
"Possible aggregation types\n",
|
||
"- time based OHLCV, time resolution\n",
|
||
"- volume based OHLCV, volume resolution\n",
|
||
"- dollar based OHLCV, dollar amount resolution\n",
|
||
"- renko bars, bricks size as resolution\n",
|
||
"\n",
|
||
"\n",
|
||
"Steps include\n",
|
||
"- fetch trades (remote/cached)\n",
|
||
"- use new vectorized aggregation to aggregate bars of given type (time, volume, dollar) and resolution\n",
|
||
"- store to agg cache\n",
|
||
"\n",
|
||
"Methods:\n",
|
||
"- `fetch_trades_parallel` enables to fetch trades of given symbol and interval, also can filter conditions and minimum size. Returns `trades_df`\n",
|
||
"- `aggregate_trades` accepts `trades_df` and resolution and type of bars (VOLUME, TIME, DOLLAR) and return aggregated ohlcv dataframe `ohlcv_df`\n",
|
||
"\n",
|
||
"TBD will be soon introduced in separate package responsible for fetching the data (cache mngmt, remote fetching and vectorized aggregation) - see (issue)[https://github.com/drew2323/v2trading/issues/250]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"None\n",
|
||
"Loaded env variables from file None\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Activating profile profile1\n",
|
||
"</pre>\n"
|
||
],
|
||
"text/plain": [
|
||
"Activating profile profile1\n"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"trades_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet\n",
|
||
"trades_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet\n",
|
||
"trades_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n",
|
||
"trades_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
|
||
"ohlcv_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet\n",
|
||
"trades_df-BAC-2023-01-01T09:30:00-2024-10-02T16:00:00-['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']-100.parquet\n",
|
||
"trades_df-BAC-2024-05-15T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
|
||
"ohlcv_df-BAC-2023-01-01T09:30:00-2024-10-02T16:00:00-['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']-100.parquet\n",
|
||
"ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
|
||
"ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n",
|
||
"ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet\n",
|
||
"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\n",
|
||
"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']"
|
||
]
|
||
},
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"from numba import jit\n",
|
||
"from alpaca.data.historical import StockHistoricalDataClient\n",
|
||
"from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n",
|
||
"from alpaca.data.requests import StockTradesRequest\n",
|
||
"from v2realbot.enums.enums import BarType\n",
|
||
"import time\n",
|
||
"from datetime import datetime\n",
|
||
"from v2realbot.utils.utils import parse_alpaca_timestamp, ltp, zoneNY, send_to_telegram, fetch_calendar_data\n",
|
||
"import pyarrow\n",
|
||
"from v2realbot.loader.aggregator_vectorized import fetch_daily_stock_trades, fetch_trades_parallel, generate_time_bars_nb, aggregate_trades\n",
|
||
"import vectorbtpro as vbt\n",
|
||
"import v2realbot.utils.config_handler as cfh\n",
|
||
"\n",
|
||
"vbt.settings.set_theme(\"dark\")\n",
|
||
"vbt.settings['plotting']['layout']['width'] = 1280\n",
|
||
"vbt.settings.plotting.auto_rangebreaks = True\n",
|
||
"# Set the option to display with pagination\n",
|
||
"pd.set_option('display.notebook_repr_html', True)\n",
|
||
"pd.set_option('display.max_rows', 20) # Number of rows per page\n",
|
||
"# pd.set_option('display.float_format', '{:.9f}'.format)\n",
|
||
"\n",
|
||
"\n",
|
||
"#trade filtering\n",
|
||
"exclude_conditions = cfh.config_handler.get_val('AGG_EXCLUDED_TRADES') #standard ['C','O','4','B','7','V','P','W','U','Z','F']\n",
|
||
"minsize = 100\n",
|
||
"\n",
|
||
"symbol = \"BAC\"\n",
|
||
"#datetime in zoneNY \n",
|
||
"day_start = datetime(2024, 10, 3, 9, 30, 0)\n",
|
||
"day_stop = datetime(2024, 10, 16, 16, 00, 0)\n",
|
||
"day_start = zoneNY.localize(day_start)\n",
|
||
"day_stop = zoneNY.localize(day_stop)\n",
|
||
"#filename of trades_df parquet, date are in isoformat but without time zone part\n",
|
||
"dir = DATA_DIR + \"/notebooks/\"\n",
|
||
"#parquet interval cache contains exclude conditions and minsize filtering\n",
|
||
"file_trades = dir + f\"trades_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}-{exclude_conditions}-{minsize}.parquet\"\n",
|
||
"#file_trades = dir + f\"trades_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}.parquet\"\n",
|
||
"file_ohlcv = dir + f\"ohlcv_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}-{str(exclude_conditions)}-{minsize}.parquet\"\n",
|
||
"\n",
|
||
"#PRINT all parquet in directory\n",
|
||
"import os\n",
|
||
"files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n",
|
||
"for f in files:\n",
|
||
" print(f)\n",
|
||
"\n",
|
||
"exclude_conditions"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Contains 10 market days\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Processing market days: 100%|██████████| 10/10 [00:00<00:00, 267.74it/s]\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"NOT FOUND. Fetching from remote\n",
|
||
"NOT FOUND. Fetching from remote\n",
|
||
"NOT FOUND. Fetching from remote\n",
|
||
"NOT FOUND. Fetching from remote\n",
|
||
"NOT FOUND. Fetching from remote\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Fetching data: 0%| | 0/10 [00:00<?, ?it/s]"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Remote fetched: is_empty=False 2024-10-03 09:30:00-04:00 2024-10-03 16:00:00-04:00\n",
|
||
"Saving to Trade CACHE /Users/davidbrazda/Library/Application Support/v2realbot/tradecache/BAC-1727962200-1727985600.cache.gz\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Fetching data: 10%|█ | 1/10 [00:21<03:12, 21.41s/it]"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"excluding conditions ['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']\n",
|
||
"minsize 100\n",
|
||
"NOT FOUND. Fetching from remote\n",
|
||
"Remote fetched: is_empty=False 2024-10-08 09:30:00-04:00 2024-10-08 16:00:00-04:00\n",
|
||
"Remote fetched: is_empty=False 2024-10-09 09:30:00-04:00 2024-10-09 16:00:00-04:00\n",
|
||
"Saving to Trade CACHE /Users/davidbrazda/Library/Application Support/v2realbot/tradecache/BAC-1728394200-1728417600.cache.gz\n",
|
||
"Remote fetched: is_empty=False 2024-10-07 09:30:00-04:00 2024-10-07 16:00:00-04:00\n",
|
||
"excluding conditions ['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']\n",
|
||
"minsize 100\n",
|
||
"NOT FOUND. Fetching from remote\n",
|
||
"Saving to Trade CACHE /Users/davidbrazda/Library/Application Support/v2realbot/tradecache/BAC-1728480600-1728504000.cache.gz\n",
|
||
"excluding conditions ['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']\n",
|
||
"minsize 100\n",
|
||
"NOT FOUND. Fetching from remote\n",
|
||
"Saving to Trade CACHE /Users/davidbrazda/Library/Application Support/v2realbot/tradecache/BAC-1728307800-1728331200.cache.gz\n",
|
||
"excluding conditions ['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']\n",
|
||
"minsize 100\n",
|
||
"NOT FOUND. Fetching from remote\n",
|
||
"Remote fetched: is_empty=False 2024-10-04 09:30:00-04:00 2024-10-04 16:00:00-04:00\n",
|
||
"Saving to Trade CACHE /Users/davidbrazda/Library/Application Support/v2realbot/tradecache/BAC-1728048600-1728072000.cache.gz\n",
|
||
"excluding conditions ['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Fetching data: 20%|██ | 2/10 [00:32<02:01, 15.24s/it]"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"minsize 100\n",
|
||
"NOT FOUND. Fetching from remote\n",
|
||
"Remote fetched: is_empty=False 2024-10-10 09:30:00-04:00 2024-10-10 16:00:00-04:00\n",
|
||
"Saving to Trade CACHE /Users/davidbrazda/Library/Application Support/v2realbot/tradecache/BAC-1728567000-1728590400.cache.gz\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Fetching data: 60%|██████ | 6/10 [00:47<00:25, 6.40s/it]"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"excluding conditions ['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']\n",
|
||
"minsize 100\n",
|
||
"Remote fetched: is_empty=False 2024-10-14 09:30:00-04:00 2024-10-14 16:00:00-04:00\n",
|
||
"Saving to Trade CACHE /Users/davidbrazda/Library/Application Support/v2realbot/tradecache/BAC-1728912600-1728936000.cache.gz\n",
|
||
"excluding conditions ['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']\n",
|
||
"minsize 100\n",
|
||
"Remote fetched: is_empty=False 2024-10-16 09:30:00-04:00 2024-10-16 16:00:00-04:00\n",
|
||
"Saving to Trade CACHE /Users/davidbrazda/Library/Application Support/v2realbot/tradecache/BAC-1729085400-1729108800.cache.gz\n",
|
||
"Remote fetched: is_empty=False 2024-10-11 09:30:00-04:00 2024-10-11 16:00:00-04:00\n",
|
||
"excluding conditions ['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']\n",
|
||
"minsize 100\n",
|
||
"Saving to Trade CACHE /Users/davidbrazda/Library/Application Support/v2realbot/tradecache/BAC-1728653400-1728676800.cache.gz\n",
|
||
"excluding conditions ['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Fetching data: 70%|███████ | 7/10 [01:13<00:31, 10.55s/it]"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"minsize 100\n",
|
||
"Remote fetched: is_empty=False 2024-10-15 09:30:00-04:00 2024-10-15 16:00:00-04:00\n",
|
||
"Saving to Trade CACHE /Users/davidbrazda/Library/Application Support/v2realbot/tradecache/BAC-1728999000-1729022400.cache.gz\n",
|
||
"excluding conditions ['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Fetching data: 100%|██████████| 10/10 [01:25<00:00, 8.53s/it]"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"minsize 100\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"from v2realbot.loader.aggregator_vectorized import fetch_daily_stock_trades, fetch_trades_parallel, generate_time_bars_nb, aggregate_trades, fetch_trades_parallel_optimized\n",
|
||
"#fetch trades in one go\n",
|
||
"#trades_df = fetch_daily_stock_trades(symbol, day_start, day_stop, exclude_conditions=exclude_conditions, minsize=minsize, force_remote=False, max_retries=5, backoff_factor=1)\n",
|
||
"#fetch trades in parallel - for longer intervals\n",
|
||
"trades_df = fetch_trades_parallel(symbol, day_start, day_stop, exclude_conditions=exclude_conditions, minsize=minsize, force_remote=True, max_workers=None)\n",
|
||
" \n",
|
||
"##trades_df.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"trades_df.to_parquet(file_trades, engine='pyarrow', compression='gzip')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#Either load trades or ohlcv from parquet if exists\n",
|
||
"\n",
|
||
"#trades_df = fetch_trades_parallel(symbol, day_start, day_stop, exclude_conditions=exclude_conditions, minsize=50, max_workers=20) #exclude_conditions=['C','O','4','B','7','V','P','W','U','Z','F'])\n",
|
||
"# trades_df.to_parquet(file_trades, engine='pyarrow', compression='gzip')\n",
|
||
"\n",
|
||
"trades_df = pd.read_parquet(file_trades,engine='pyarrow')\n",
|
||
"ohlcv_df = aggregate_trades(symbol=symbol, trades_df=trades_df, resolution=1, type=BarType.TIME)\n",
|
||
"ohlcv_df.to_parquet(file_ohlcv, engine='pyarrow', compression='gzip')\n",
|
||
"\n",
|
||
"# ohlcv_df = pd.read_parquet(file_ohlcv,engine='pyarrow')\n",
|
||
"# trades_df = pd.read_parquet(file_trades,engine='pyarrow')\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#list all files is dir directory with parquet extension\n",
|
||
"dir = DATA_DIR + \"/notebooks/\"\n",
|
||
"import os\n",
|
||
"files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n",
|
||
"file_name = \"\"\n",
|
||
"ohlcv_df = pd.read_parquet(file_ohlcv,engine='pyarrow')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"\"/Users/davidbrazda/Library/Application Support/v2realbot/notebooks/ohlcv_df-BAC-2024-10-03T09:30:00-2024-10-16T16:00:00-['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']-100.parquet\""
|
||
]
|
||
},
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"file_ohlcv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>open</th>\n",
|
||
" <th>high</th>\n",
|
||
" <th>low</th>\n",
|
||
" <th>close</th>\n",
|
||
" <th>volume</th>\n",
|
||
" <th>trades</th>\n",
|
||
" <th>updated</th>\n",
|
||
" <th>vwap</th>\n",
|
||
" <th>buyvolume</th>\n",
|
||
" <th>sellvolume</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>time</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>2024-10-03 09:30:00-04:00</th>\n",
|
||
" <td>38.9800</td>\n",
|
||
" <td>39.0000</td>\n",
|
||
" <td>38.940</td>\n",
|
||
" <td>38.970</td>\n",
|
||
" <td>249774.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>2024-10-03 09:30:01.061997-04:00</td>\n",
|
||
" <td>38.960055</td>\n",
|
||
" <td>500.0</td>\n",
|
||
" <td>249088.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-10-03 09:30:01-04:00</th>\n",
|
||
" <td>38.9500</td>\n",
|
||
" <td>39.0001</td>\n",
|
||
" <td>38.950</td>\n",
|
||
" <td>39.000</td>\n",
|
||
" <td>13553.0</td>\n",
|
||
" <td>44.0</td>\n",
|
||
" <td>2024-10-03 09:30:02.171691-04:00</td>\n",
|
||
" <td>38.985179</td>\n",
|
||
" <td>2133.0</td>\n",
|
||
" <td>1894.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-10-03 09:30:02-04:00</th>\n",
|
||
" <td>38.9992</td>\n",
|
||
" <td>39.0100</td>\n",
|
||
" <td>38.990</td>\n",
|
||
" <td>39.010</td>\n",
|
||
" <td>4600.0</td>\n",
|
||
" <td>20.0</td>\n",
|
||
" <td>2024-10-03 09:30:03.091339-04:00</td>\n",
|
||
" <td>39.000123</td>\n",
|
||
" <td>1031.0</td>\n",
|
||
" <td>797.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-10-03 09:30:03-04:00</th>\n",
|
||
" <td>38.9900</td>\n",
|
||
" <td>39.0400</td>\n",
|
||
" <td>38.990</td>\n",
|
||
" <td>39.030</td>\n",
|
||
" <td>7533.0</td>\n",
|
||
" <td>36.0</td>\n",
|
||
" <td>2024-10-03 09:30:04.193646-04:00</td>\n",
|
||
" <td>39.030827</td>\n",
|
||
" <td>1733.0</td>\n",
|
||
" <td>713.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-10-03 09:30:04-04:00</th>\n",
|
||
" <td>39.0320</td>\n",
|
||
" <td>39.0350</td>\n",
|
||
" <td>39.032</td>\n",
|
||
" <td>39.035</td>\n",
|
||
" <td>9142.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2024-10-03 09:30:07.260896-04:00</td>\n",
|
||
" <td>39.032033</td>\n",
|
||
" <td>9142.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-10-16 15:59:55-04:00</th>\n",
|
||
" <td>42.8100</td>\n",
|
||
" <td>42.8100</td>\n",
|
||
" <td>42.810</td>\n",
|
||
" <td>42.810</td>\n",
|
||
" <td>8681.0</td>\n",
|
||
" <td>22.0</td>\n",
|
||
" <td>2024-10-16 15:59:56.000104-04:00</td>\n",
|
||
" <td>42.810000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-10-16 15:59:56-04:00</th>\n",
|
||
" <td>42.8150</td>\n",
|
||
" <td>42.8150</td>\n",
|
||
" <td>42.810</td>\n",
|
||
" <td>42.810</td>\n",
|
||
" <td>4128.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>2024-10-16 15:59:57.010896-04:00</td>\n",
|
||
" <td>42.811550</td>\n",
|
||
" <td>1100.0</td>\n",
|
||
" <td>603.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-10-16 15:59:57-04:00</th>\n",
|
||
" <td>42.8150</td>\n",
|
||
" <td>42.8150</td>\n",
|
||
" <td>42.810</td>\n",
|
||
" <td>42.810</td>\n",
|
||
" <td>5301.0</td>\n",
|
||
" <td>20.0</td>\n",
|
||
" <td>2024-10-16 15:59:58.006387-04:00</td>\n",
|
||
" <td>42.812493</td>\n",
|
||
" <td>789.0</td>\n",
|
||
" <td>1708.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-10-16 15:59:58-04:00</th>\n",
|
||
" <td>42.8160</td>\n",
|
||
" <td>42.8200</td>\n",
|
||
" <td>42.800</td>\n",
|
||
" <td>42.800</td>\n",
|
||
" <td>21469.0</td>\n",
|
||
" <td>33.0</td>\n",
|
||
" <td>2024-10-16 15:59:59.088188-04:00</td>\n",
|
||
" <td>42.809572</td>\n",
|
||
" <td>542.0</td>\n",
|
||
" <td>632.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-10-16 15:59:59-04:00</th>\n",
|
||
" <td>42.8087</td>\n",
|
||
" <td>42.8100</td>\n",
|
||
" <td>42.800</td>\n",
|
||
" <td>42.810</td>\n",
|
||
" <td>26899.0</td>\n",
|
||
" <td>16.0</td>\n",
|
||
" <td>2024-10-16 15:59:59.997799-04:00</td>\n",
|
||
" <td>42.801563</td>\n",
|
||
" <td>4757.0</td>\n",
|
||
" <td>16482.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>114097 rows × 10 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" open high low close volume trades \\\n",
|
||
"time \n",
|
||
"2024-10-03 09:30:00-04:00 38.9800 39.0000 38.940 38.970 249774.0 6.0 \n",
|
||
"2024-10-03 09:30:01-04:00 38.9500 39.0001 38.950 39.000 13553.0 44.0 \n",
|
||
"2024-10-03 09:30:02-04:00 38.9992 39.0100 38.990 39.010 4600.0 20.0 \n",
|
||
"2024-10-03 09:30:03-04:00 38.9900 39.0400 38.990 39.030 7533.0 36.0 \n",
|
||
"2024-10-03 09:30:04-04:00 39.0320 39.0350 39.032 39.035 9142.0 2.0 \n",
|
||
"... ... ... ... ... ... ... \n",
|
||
"2024-10-16 15:59:55-04:00 42.8100 42.8100 42.810 42.810 8681.0 22.0 \n",
|
||
"2024-10-16 15:59:56-04:00 42.8150 42.8150 42.810 42.810 4128.0 9.0 \n",
|
||
"2024-10-16 15:59:57-04:00 42.8150 42.8150 42.810 42.810 5301.0 20.0 \n",
|
||
"2024-10-16 15:59:58-04:00 42.8160 42.8200 42.800 42.800 21469.0 33.0 \n",
|
||
"2024-10-16 15:59:59-04:00 42.8087 42.8100 42.800 42.810 26899.0 16.0 \n",
|
||
"\n",
|
||
" updated vwap \\\n",
|
||
"time \n",
|
||
"2024-10-03 09:30:00-04:00 2024-10-03 09:30:01.061997-04:00 38.960055 \n",
|
||
"2024-10-03 09:30:01-04:00 2024-10-03 09:30:02.171691-04:00 38.985179 \n",
|
||
"2024-10-03 09:30:02-04:00 2024-10-03 09:30:03.091339-04:00 39.000123 \n",
|
||
"2024-10-03 09:30:03-04:00 2024-10-03 09:30:04.193646-04:00 39.030827 \n",
|
||
"2024-10-03 09:30:04-04:00 2024-10-03 09:30:07.260896-04:00 39.032033 \n",
|
||
"... ... ... \n",
|
||
"2024-10-16 15:59:55-04:00 2024-10-16 15:59:56.000104-04:00 42.810000 \n",
|
||
"2024-10-16 15:59:56-04:00 2024-10-16 15:59:57.010896-04:00 42.811550 \n",
|
||
"2024-10-16 15:59:57-04:00 2024-10-16 15:59:58.006387-04:00 42.812493 \n",
|
||
"2024-10-16 15:59:58-04:00 2024-10-16 15:59:59.088188-04:00 42.809572 \n",
|
||
"2024-10-16 15:59:59-04:00 2024-10-16 15:59:59.997799-04:00 42.801563 \n",
|
||
"\n",
|
||
" buyvolume sellvolume \n",
|
||
"time \n",
|
||
"2024-10-03 09:30:00-04:00 500.0 249088.0 \n",
|
||
"2024-10-03 09:30:01-04:00 2133.0 1894.0 \n",
|
||
"2024-10-03 09:30:02-04:00 1031.0 797.0 \n",
|
||
"2024-10-03 09:30:03-04:00 1733.0 713.0 \n",
|
||
"2024-10-03 09:30:04-04:00 9142.0 0.0 \n",
|
||
"... ... ... \n",
|
||
"2024-10-16 15:59:55-04:00 0.0 0.0 \n",
|
||
"2024-10-16 15:59:56-04:00 1100.0 603.0 \n",
|
||
"2024-10-16 15:59:57-04:00 789.0 1708.0 \n",
|
||
"2024-10-16 15:59:58-04:00 542.0 632.0 \n",
|
||
"2024-10-16 15:59:59-04:00 4757.0 16482.0 \n",
|
||
"\n",
|
||
"[114097 rows x 10 columns]"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"ohlcv_df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import seaborn as sns\n",
|
||
"# Calculate daily returns\n",
|
||
"ohlcv_df['returns'] = ohlcv_df['close'].pct_change().dropna()\n",
|
||
"#same as above but pct_change is from 3 datapoints back, but only if it is the same date, else na\n",
|
||
"\n",
|
||
"\n",
|
||
"# Plot the probability distribution curve\n",
|
||
"plt.figure(figsize=(10, 6))\n",
|
||
"sns.histplot(df['returns'].dropna(), kde=True, stat='probability', bins=30)\n",
|
||
"plt.title('Probability Distribution of Daily Returns')\n",
|
||
"plt.xlabel('Daily Returns')\n",
|
||
"plt.ylabel('Probability')\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"from sklearn.model_selection import train_test_split\n",
|
||
"from sklearn.preprocessing import StandardScaler\n",
|
||
"from sklearn.linear_model import LogisticRegression\n",
|
||
"from sklearn.metrics import accuracy_score\n",
|
||
"\n",
|
||
"# Define the intervals from 5 to 20 s, returns for each interval\n",
|
||
"#maybe use rolling window?\n",
|
||
"intervals = range(5, 21, 5)\n",
|
||
"\n",
|
||
"# Create columns for percentage returns\n",
|
||
"rolling_window = 50\n",
|
||
"\n",
|
||
"# Normalize the returns using rolling mean and std\n",
|
||
"for N in intervals:\n",
|
||
" column_name = f'returns_{N}'\n",
|
||
" rolling_mean = ohlcv_df[column_name].rolling(window=rolling_window).mean()\n",
|
||
" rolling_std = ohlcv_df[column_name].rolling(window=rolling_window).std()\n",
|
||
" ohlcv_df[f'norm_{column_name}'] = (ohlcv_df[column_name] - rolling_mean) / rolling_std\n",
|
||
"\n",
|
||
"# Display the dataframe with normalized return columns\n",
|
||
"ohlcv_df\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Calculate the sum of the normalized return columns for each row\n",
|
||
"ohlcv_df['sum_norm_returns'] = ohlcv_df[[f'norm_returns_{N}' for N in intervals]].sum(axis=1)\n",
|
||
"\n",
|
||
"# Sort the DataFrame based on the sum of normalized returns in descending order\n",
|
||
"df_sorted = ohlcv_df.sort_values(by='sum_norm_returns', ascending=False)\n",
|
||
"\n",
|
||
"# Display the top rows with the highest sum of normalized returns\n",
|
||
"df_sorted\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Drop initial rows with NaN values due to pct_change\n",
|
||
"ohlcv_df.dropna(inplace=True)\n",
|
||
"\n",
|
||
"# Plotting the probability distribution curves\n",
|
||
"plt.figure(figsize=(14, 8))\n",
|
||
"for N in intervals:\n",
|
||
" sns.kdeplot(ohlcv_df[f'returns_{N}'].dropna(), label=f'Returns {N}', fill=True)\n",
|
||
"\n",
|
||
"plt.title('Probability Distribution of Percentage Returns')\n",
|
||
"plt.xlabel('Percentage Return')\n",
|
||
"plt.ylabel('Density')\n",
|
||
"plt.legend()\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import seaborn as sns\n",
|
||
"# Plot the probability distribution curve\n",
|
||
"plt.figure(figsize=(10, 6))\n",
|
||
"sns.histplot(ohlcv_df['returns'].dropna(), kde=True, stat='probability', bins=30)\n",
|
||
"plt.title('Probability Distribution of Daily Returns')\n",
|
||
"plt.xlabel('Daily Returns')\n",
|
||
"plt.ylabel('Probability')\n",
|
||
"plt.show()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#show only rows from ohlcv_df where returns > 0.005\n",
|
||
"ohlcv_df[ohlcv_df['returns'] > 0.0005]\n",
|
||
"\n",
|
||
"#ohlcv_df[ohlcv_df['returns'] < -0.005]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#ohlcv where index = date 2024-03-13 and between hour 12\n",
|
||
"\n",
|
||
"a = ohlcv_df.loc['2024-03-13 12:00:00':'2024-03-13 13:00:00']\n",
|
||
"a"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"ohlcv_df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"trades_df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"ohlcv_df.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"trades_df.to_parquet(\"trades_df-spy-0111-0111.parquett\", engine='pyarrow', compression='gzip')\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"trades_df.to_parquet(\"trades_df-spy-111-0516.parquett\", engine='pyarrow', compression='gzip', allow_truncated_timestamps=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"ohlcv_df.to_parquet(\"ohlcv_df-spy-111-0516.parquett\", engine='pyarrow', compression='gzip')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"basic_data = vbt.Data.from_data(vbt.symbol_dict({symbol: ohlcv_df}), tz_convert=zoneNY)\n",
|
||
"vbt.settings['plotting']['auto_rangebreaks'] = True\n",
|
||
"basic_data.ohlcv.plot()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#access just BCA\n",
|
||
"#df_filtered = df.loc[\"BAC\"]"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": ".venv",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.11"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|