Initial commit after copying files from flawed repository
This commit is contained in:
348
research/get_trades_at_once.ipynb
Normal file
348
research/get_trades_at_once.ipynb
Normal file
@ -0,0 +1,348 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Loading trades and vectorized aggregation\n",
|
||||
"Describes how to fetch trades (remote/cached) and use new vectorized aggregation to aggregate bars of given type (time, volume, dollar) and resolution\n",
|
||||
"\n",
|
||||
"`fetch_trades_parallel` enables to fetch trades of given symbol and interval, also can filter conditions and minimum size. return `trades_df`\n",
|
||||
"`aggregate_trades` acceptss `trades_df` and ressolution and type of bars (VOLUME, TIME, DOLLAR) and return aggregated ohlcv dataframe `ohlcv_df`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"from numba import jit\n",
|
||||
"from alpaca.data.historical import StockHistoricalDataClient\n",
|
||||
"from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n",
|
||||
"from alpaca.data.requests import StockTradesRequest\n",
|
||||
"from v2realbot.enums.enums import BarType\n",
|
||||
"import time\n",
|
||||
"from datetime import datetime\n",
|
||||
"from v2realbot.utils.utils import parse_alpaca_timestamp, ltp, zoneNY, send_to_telegram, fetch_calendar_data\n",
|
||||
"import pyarrow\n",
|
||||
"from v2realbot.loader.aggregator_vectorized import fetch_daily_stock_trades, fetch_trades_parallel, generate_time_bars_nb, aggregate_trades\n",
|
||||
"import vectorbtpro as vbt\n",
|
||||
"import v2realbot.utils.config_handler as cfh\n",
|
||||
"\n",
|
||||
"vbt.settings.set_theme(\"dark\")\n",
|
||||
"vbt.settings['plotting']['layout']['width'] = 1280\n",
|
||||
"vbt.settings.plotting.auto_rangebreaks = True\n",
|
||||
"# Set the option to display with pagination\n",
|
||||
"pd.set_option('display.notebook_repr_html', True)\n",
|
||||
"pd.set_option('display.max_rows', 20) # Number of rows per page\n",
|
||||
"# pd.set_option('display.float_format', '{:.9f}'.format)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"#trade filtering\n",
|
||||
"exclude_conditions = cfh.config_handler.get_val('AGG_EXCLUDED_TRADES') #standard ['C','O','4','B','7','V','P','W','U','Z','F']\n",
|
||||
"minsize = 100\n",
|
||||
"\n",
|
||||
"symbol = \"SPY\"\n",
|
||||
"#datetime in zoneNY \n",
|
||||
"day_start = datetime(2024, 1, 1, 9, 30, 0)\n",
|
||||
"day_stop = datetime(2024, 1, 14, 16, 00, 0)\n",
|
||||
"day_start = zoneNY.localize(day_start)\n",
|
||||
"day_stop = zoneNY.localize(day_stop)\n",
|
||||
"#filename of trades_df parquet, date are in isoformat but without time zone part\n",
|
||||
"dir = DATA_DIR + \"/notebooks/\"\n",
|
||||
"#parquet interval cache contains exclude conditions and minsize filtering\n",
|
||||
"file_trades = dir + f\"trades_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}-{exclude_conditions}-{minsize}.parquet\"\n",
|
||||
"#file_trades = dir + f\"trades_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}.parquet\"\n",
|
||||
"file_ohlcv = dir + f\"ohlcv_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}-{exclude_conditions}-{minsize}.parquet\"\n",
|
||||
"\n",
|
||||
"#PRINT all parquet in directory\n",
|
||||
"import os\n",
|
||||
"files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n",
|
||||
"for f in files:\n",
|
||||
" print(f)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"trades_df = fetch_daily_stock_trades(symbol, day_start, day_stop, exclude_conditions=exclude_conditions, minsize=minsize, force_remote=False, max_retries=5, backoff_factor=1)\n",
|
||||
"trades_df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Either load trades or ohlcv from parquet if exists\n",
|
||||
"\n",
|
||||
"#trades_df = fetch_trades_parallel(symbol, day_start, day_stop, exclude_conditions=exclude_conditions, minsize=50, max_workers=20) #exclude_conditions=['C','O','4','B','7','V','P','W','U','Z','F'])\n",
|
||||
"# trades_df.to_parquet(file_trades, engine='pyarrow', compression='gzip')\n",
|
||||
"\n",
|
||||
"trades_df = pd.read_parquet(file_trades,engine='pyarrow')\n",
|
||||
"ohlcv_df = aggregate_trades(symbol=symbol, trades_df=trades_df, resolution=1, type=BarType.TIME)\n",
|
||||
"ohlcv_df.to_parquet(file_ohlcv, engine='pyarrow', compression='gzip')\n",
|
||||
"\n",
|
||||
"# ohlcv_df = pd.read_parquet(file_ohlcv,engine='pyarrow')\n",
|
||||
"# trades_df = pd.read_parquet(file_trades,engine='pyarrow')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#list all files is dir directory with parquet extension\n",
|
||||
"dir = DATA_DIR + \"/notebooks/\"\n",
|
||||
"import os\n",
|
||||
"files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n",
|
||||
"file_name = \"\"\n",
|
||||
"ohlcv_df = pd.read_parquet(file_ohlcv,engine='pyarrow')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ohlcv_df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns\n",
|
||||
"# Calculate daily returns\n",
|
||||
"ohlcv_df['returns'] = ohlcv_df['close'].pct_change().dropna()\n",
|
||||
"#same as above but pct_change is from 3 datapoints back, but only if it is the same date, else na\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Plot the probability distribution curve\n",
|
||||
"plt.figure(figsize=(10, 6))\n",
|
||||
"sns.histplot(df['returns'].dropna(), kde=True, stat='probability', bins=30)\n",
|
||||
"plt.title('Probability Distribution of Daily Returns')\n",
|
||||
"plt.xlabel('Daily Returns')\n",
|
||||
"plt.ylabel('Probability')\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn.preprocessing import StandardScaler\n",
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"\n",
|
||||
"# Define the intervals from 5 to 20 s, returns for each interval\n",
|
||||
"#maybe use rolling window?\n",
|
||||
"intervals = range(5, 21, 5)\n",
|
||||
"\n",
|
||||
"# Create columns for percentage returns\n",
|
||||
"rolling_window = 50\n",
|
||||
"\n",
|
||||
"# Normalize the returns using rolling mean and std\n",
|
||||
"for N in intervals:\n",
|
||||
" column_name = f'returns_{N}'\n",
|
||||
" rolling_mean = ohlcv_df[column_name].rolling(window=rolling_window).mean()\n",
|
||||
" rolling_std = ohlcv_df[column_name].rolling(window=rolling_window).std()\n",
|
||||
" ohlcv_df[f'norm_{column_name}'] = (ohlcv_df[column_name] - rolling_mean) / rolling_std\n",
|
||||
"\n",
|
||||
"# Display the dataframe with normalized return columns\n",
|
||||
"ohlcv_df\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Calculate the sum of the normalized return columns for each row\n",
|
||||
"ohlcv_df['sum_norm_returns'] = ohlcv_df[[f'norm_returns_{N}' for N in intervals]].sum(axis=1)\n",
|
||||
"\n",
|
||||
"# Sort the DataFrame based on the sum of normalized returns in descending order\n",
|
||||
"df_sorted = ohlcv_df.sort_values(by='sum_norm_returns', ascending=False)\n",
|
||||
"\n",
|
||||
"# Display the top rows with the highest sum of normalized returns\n",
|
||||
"df_sorted\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Drop initial rows with NaN values due to pct_change\n",
|
||||
"ohlcv_df.dropna(inplace=True)\n",
|
||||
"\n",
|
||||
"# Plotting the probability distribution curves\n",
|
||||
"plt.figure(figsize=(14, 8))\n",
|
||||
"for N in intervals:\n",
|
||||
" sns.kdeplot(ohlcv_df[f'returns_{N}'].dropna(), label=f'Returns {N}', fill=True)\n",
|
||||
"\n",
|
||||
"plt.title('Probability Distribution of Percentage Returns')\n",
|
||||
"plt.xlabel('Percentage Return')\n",
|
||||
"plt.ylabel('Density')\n",
|
||||
"plt.legend()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns\n",
|
||||
"# Plot the probability distribution curve\n",
|
||||
"plt.figure(figsize=(10, 6))\n",
|
||||
"sns.histplot(ohlcv_df['returns'].dropna(), kde=True, stat='probability', bins=30)\n",
|
||||
"plt.title('Probability Distribution of Daily Returns')\n",
|
||||
"plt.xlabel('Daily Returns')\n",
|
||||
"plt.ylabel('Probability')\n",
|
||||
"plt.show()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#show only rows from ohlcv_df where returns > 0.005\n",
|
||||
"ohlcv_df[ohlcv_df['returns'] > 0.0005]\n",
|
||||
"\n",
|
||||
"#ohlcv_df[ohlcv_df['returns'] < -0.005]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#ohlcv where index = date 2024-03-13 and between hour 12\n",
|
||||
"\n",
|
||||
"a = ohlcv_df.loc['2024-03-13 12:00:00':'2024-03-13 13:00:00']\n",
|
||||
"a"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ohlcv_df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"trades_df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ohlcv_df.info()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"trades_df.to_parquet(\"trades_df-spy-0111-0111.parquett\", engine='pyarrow', compression='gzip')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"trades_df.to_parquet(\"trades_df-spy-111-0516.parquett\", engine='pyarrow', compression='gzip', allow_truncated_timestamps=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ohlcv_df.to_parquet(\"ohlcv_df-spy-111-0516.parquett\", engine='pyarrow', compression='gzip')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"basic_data = vbt.Data.from_data(vbt.symbol_dict({symbol: ohlcv_df}), tz_convert=zoneNY)\n",
|
||||
"vbt.settings['plotting']['auto_rangebreaks'] = True\n",
|
||||
"basic_data.ohlcv.plot()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#access just BCA\n",
|
||||
"#df_filtered = df.loc[\"BAC\"]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Reference in New Issue
Block a user