remote range in utc

This commit is contained in:
David Brazda
2024-11-01 15:41:23 +01:00
parent 2116679dba
commit cf6bcede48
5 changed files with 476 additions and 107 deletions

View File

@ -2,7 +2,7 @@ from setuptools import setup, find_packages
setup(
name='ttools',
version='0.7.0',
version='0.7.1',
packages=find_packages(),
install_requires=[
# list your dependencies here

View File

@ -368,9 +368,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "NameError",
"evalue": "name 'zoneNY' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#loading manually range subset from existing files\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m start \u001b[38;5;241m=\u001b[39m \u001b[43mzoneNY\u001b[49m\u001b[38;5;241m.\u001b[39mlocalize(datetime(\u001b[38;5;241m2024\u001b[39m, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m15\u001b[39m, \u001b[38;5;241m9\u001b[39m, \u001b[38;5;241m30\u001b[39m))\n\u001b[1;32m 3\u001b[0m end \u001b[38;5;241m=\u001b[39m zoneNY\u001b[38;5;241m.\u001b[39mlocalize(datetime(\u001b[38;5;241m2024\u001b[39m, \u001b[38;5;241m10\u001b[39m, \u001b[38;5;241m20\u001b[39m, \u001b[38;5;241m16\u001b[39m, \u001b[38;5;241m00\u001b[39m))\n\u001b[1;32m 5\u001b[0m ohlcv_df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_parquet(\n\u001b[1;32m 6\u001b[0m AGG_CACHE \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSPY-AggType.OHLCV-1-2024-01-15T09-30-00-2024-10-20T16-00-00-4679BCFMOPUVWZ-100-True.parquet\u001b[39m\u001b[38;5;124m\"\u001b[39m, \n\u001b[1;32m 7\u001b[0m engine\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpyarrow\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 8\u001b[0m filters\u001b[38;5;241m=\u001b[39m[(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m>=\u001b[39m\u001b[38;5;124m'\u001b[39m, start), \n\u001b[1;32m 9\u001b[0m (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m<=\u001b[39m\u001b[38;5;124m'\u001b[39m, end)]\n\u001b[1;32m 10\u001b[0m )\n",
"\u001b[0;31mNameError\u001b[0m: name 'zoneNY' is not defined"
]
}
],
"source": [
"#loading manually range subset from existing files\n",
"start = zoneNY.localize(datetime(2024, 1, 15, 9, 30))\n",
@ -422,27 +434,71 @@
"name": "stdout",
"output_type": "stream",
"text": [
"SPY Contains 46 market days\n",
"SPY All 46 split files loaded in 10.521624088287354 seconds\n",
"Trimming 2024-01-16 09:30:00-05:00 2024-03-20 16:00:00-04:00\n",
"excluding ['C', 'O', '4', 'B', '7', 'V', 'P', 'W', 'U', 'Z', 'F', '9', 'M', '6']\n",
"exclude done\n",
"minsize 100\n",
"minsize done\n",
"SPY filtered\n",
"BAC Contains 1 market days\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"BAC Remote fetching: 100%|██████████| 1/1 [00:00<00:00, 434.55it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fetching from remote.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"BAC Receiving trades: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Remote fetched completed whole day 2024-01-16\n",
"Exact UTC range fetched: 2024-01-16 05:00:00+00:00 - 2024-01-17 04:59:59.999999+00:00\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"BAC Receiving trades: 100%|██████████| 1/1 [00:42<00:00, 42.76s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Saved to CACHE /Users/davidbrazda/Library/Application Support/v2realbot/tradecache/BAC-2024-01-16.parquet\n",
"Trimming 2024-01-16 00:00:00-05:00 2024-01-16 23:59:00-05:00\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"DatetimeIndex: 6513606 entries, 2024-01-16 09:30:00.001443-05:00 to 2024-03-20 15:59:59.992808-04:00\n",
"DatetimeIndex: 222754 entries, 2024-01-16 04:00:00.009225-05:00 to 2024-01-16 19:59:48.834830-05:00\n",
"Data columns (total 6 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 x object \n",
" 1 p float64\n",
" 2 s int64 \n",
" 3 i int64 \n",
" 4 c object \n",
" 5 z object \n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 x 222754 non-null object \n",
" 1 p 222754 non-null float64\n",
" 2 s 222754 non-null int64 \n",
" 3 i 222754 non-null int64 \n",
" 4 c 222754 non-null object \n",
" 5 z 222754 non-null object \n",
"dtypes: float64(1), int64(2), object(3)\n",
"memory usage: 347.9+ MB\n"
"memory usage: 11.9+ MB\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
@ -456,20 +512,190 @@
"# df.info()\n",
"\n",
"#fetching multiple days with parallel\n",
"df = fetch_trades_parallel(symbol=\"SPY\",\n",
" start_date=zoneNY.localize(datetime(2024, 1, 16, 9, 30)),\n",
" end_date=zoneNY.localize(datetime(2024, 3, 20, 16, 00)))\n",
"df = fetch_trades_parallel(symbol=\"BAC\",\n",
" start_date=zoneNY.localize(datetime(2024, 1, 16, 0, 0)),\n",
" end_date=zoneNY.localize(datetime(2024, 1, 16, 23, 59)),\n",
" main_session_only=False,\n",
" exclude_conditions=None,\n",
" minsize=None,\n",
" force_remote=True)\n",
"\n",
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 22,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x</th>\n",
" <th>p</th>\n",
" <th>s</th>\n",
" <th>i</th>\n",
" <th>c</th>\n",
" <th>z</th>\n",
" </tr>\n",
" <tr>\n",
" <th>t</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2024-01-16 04:00:00.009225-05:00</th>\n",
" <td>K</td>\n",
" <td>32.800</td>\n",
" <td>1</td>\n",
" <td>52983525027912</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 04:00:00.012088-05:00</th>\n",
" <td>P</td>\n",
" <td>32.580</td>\n",
" <td>8</td>\n",
" <td>52983525027890</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 04:00:02.299262-05:00</th>\n",
" <td>P</td>\n",
" <td>32.750</td>\n",
" <td>1</td>\n",
" <td>52983525027916</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 04:00:03.895322-05:00</th>\n",
" <td>P</td>\n",
" <td>32.640</td>\n",
" <td>1</td>\n",
" <td>52983525027920</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 04:00:04.145553-05:00</th>\n",
" <td>P</td>\n",
" <td>32.740</td>\n",
" <td>1</td>\n",
" <td>52983525027921</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 18:58:10.081270-05:00</th>\n",
" <td>D</td>\n",
" <td>32.104</td>\n",
" <td>10</td>\n",
" <td>79371957716549</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 18:58:11.293971-05:00</th>\n",
" <td>T</td>\n",
" <td>32.090</td>\n",
" <td>3</td>\n",
" <td>62883460503386</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 18:58:24.511348-05:00</th>\n",
" <td>D</td>\n",
" <td>32.110</td>\n",
" <td>1</td>\n",
" <td>79371957716560</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 18:58:46.648899-05:00</th>\n",
" <td>D</td>\n",
" <td>32.110</td>\n",
" <td>1</td>\n",
" <td>79371957716786</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 18:59:54.013894-05:00</th>\n",
" <td>D</td>\n",
" <td>32.100</td>\n",
" <td>1</td>\n",
" <td>71710070428229</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>159301 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" x p s i c z\n",
"t \n",
"2024-01-16 04:00:00.009225-05:00 K 32.800 1 52983525027912 [ , T, I] A\n",
"2024-01-16 04:00:00.012088-05:00 P 32.580 8 52983525027890 [ , T, I] A\n",
"2024-01-16 04:00:02.299262-05:00 P 32.750 1 52983525027916 [ , T, I] A\n",
"2024-01-16 04:00:03.895322-05:00 P 32.640 1 52983525027920 [ , T, I] A\n",
"2024-01-16 04:00:04.145553-05:00 P 32.740 1 52983525027921 [ , T, I] A\n",
"... .. ... .. ... ... ..\n",
"2024-01-16 18:58:10.081270-05:00 D 32.104 10 79371957716549 [ , T, I] A\n",
"2024-01-16 18:58:11.293971-05:00 T 32.090 3 62883460503386 [ , T, I] A\n",
"2024-01-16 18:58:24.511348-05:00 D 32.110 1 79371957716560 [ , T, I] A\n",
"2024-01-16 18:58:46.648899-05:00 D 32.110 1 79371957716786 [ , T, I] A\n",
"2024-01-16 18:59:54.013894-05:00 D 32.100 1 71710070428229 [ , T, I] A\n",
"\n",
"[159301 rows x 6 columns]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.info()"
"df"
]
},
{
@ -500,6 +726,207 @@
"\n",
"#compare_dataframes(df1, df2)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from ttools.config import TRADE_CACHE\n",
"import pandas as pd\n",
"file1 = TRADE_CACHE / \"BAC-2024-01-16.parquet\"\n",
"df1 = pd.read_parquet(file1)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x</th>\n",
" <th>p</th>\n",
" <th>s</th>\n",
" <th>i</th>\n",
" <th>c</th>\n",
" <th>z</th>\n",
" </tr>\n",
" <tr>\n",
" <th>t</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2024-01-16 04:00:00.009225-05:00</th>\n",
" <td>K</td>\n",
" <td>32.80</td>\n",
" <td>1</td>\n",
" <td>52983525027912</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 04:00:00.012088-05:00</th>\n",
" <td>P</td>\n",
" <td>32.58</td>\n",
" <td>8</td>\n",
" <td>52983525027890</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 04:00:00.856156-05:00</th>\n",
" <td>K</td>\n",
" <td>32.61</td>\n",
" <td>14</td>\n",
" <td>52983525028705</td>\n",
" <td>[ , F, T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 04:00:02.299262-05:00</th>\n",
" <td>P</td>\n",
" <td>32.75</td>\n",
" <td>1</td>\n",
" <td>52983525027916</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 04:00:03.895322-05:00</th>\n",
" <td>P</td>\n",
" <td>32.64</td>\n",
" <td>1</td>\n",
" <td>52983525027920</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 19:59:24.796862-05:00</th>\n",
" <td>P</td>\n",
" <td>32.12</td>\n",
" <td>500</td>\n",
" <td>52983576997941</td>\n",
" <td>[ , T]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 19:59:24.796868-05:00</th>\n",
" <td>P</td>\n",
" <td>32.12</td>\n",
" <td>500</td>\n",
" <td>52983576997942</td>\n",
" <td>[ , T]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 19:59:24.796868-05:00</th>\n",
" <td>P</td>\n",
" <td>32.12</td>\n",
" <td>500</td>\n",
" <td>52983576997943</td>\n",
" <td>[ , T]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 19:59:24.796871-05:00</th>\n",
" <td>P</td>\n",
" <td>32.12</td>\n",
" <td>500</td>\n",
" <td>52983576997944</td>\n",
" <td>[ , T]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 19:59:48.834830-05:00</th>\n",
" <td>K</td>\n",
" <td>32.10</td>\n",
" <td>25</td>\n",
" <td>52983526941511</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>222754 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" x p s i c \\\n",
"t \n",
"2024-01-16 04:00:00.009225-05:00 K 32.80 1 52983525027912 [ , T, I] \n",
"2024-01-16 04:00:00.012088-05:00 P 32.58 8 52983525027890 [ , T, I] \n",
"2024-01-16 04:00:00.856156-05:00 K 32.61 14 52983525028705 [ , F, T, I] \n",
"2024-01-16 04:00:02.299262-05:00 P 32.75 1 52983525027916 [ , T, I] \n",
"2024-01-16 04:00:03.895322-05:00 P 32.64 1 52983525027920 [ , T, I] \n",
"... .. ... ... ... ... \n",
"2024-01-16 19:59:24.796862-05:00 P 32.12 500 52983576997941 [ , T] \n",
"2024-01-16 19:59:24.796868-05:00 P 32.12 500 52983576997942 [ , T] \n",
"2024-01-16 19:59:24.796868-05:00 P 32.12 500 52983576997943 [ , T] \n",
"2024-01-16 19:59:24.796871-05:00 P 32.12 500 52983576997944 [ , T] \n",
"2024-01-16 19:59:48.834830-05:00 K 32.10 25 52983526941511 [ , T, I] \n",
"\n",
" z \n",
"t \n",
"2024-01-16 04:00:00.009225-05:00 A \n",
"2024-01-16 04:00:00.012088-05:00 A \n",
"2024-01-16 04:00:00.856156-05:00 A \n",
"2024-01-16 04:00:02.299262-05:00 A \n",
"2024-01-16 04:00:03.895322-05:00 A \n",
"... .. \n",
"2024-01-16 19:59:24.796862-05:00 A \n",
"2024-01-16 19:59:24.796868-05:00 A \n",
"2024-01-16 19:59:24.796868-05:00 A \n",
"2024-01-16 19:59:24.796871-05:00 A \n",
"2024-01-16 19:59:48.834830-05:00 A \n",
"\n",
"[222754 rows x 6 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1"
]
}
],
"metadata": {

View File

@ -1,69 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
]
},
{
"data": {
"text/plain": [
"['CUVWAP', 'DIVRELN']"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"import vectorbtpro as vbt\n",
"from ttools.vbtindicators import register_custom_inds\n",
"from ttools.indicators import CUVWAP\n",
"\n",
"\n",
"register_custom_inds(None, \"override\")\n",
"#chopiness = vbt.indicator(\"technical:CHOPINESS\").run(s12_data.open, s12_data.high, s12_data.low, s12_data.close, s12_data.volume, window = 100)\n",
"#vwap_cum_roll = vbt.indicator(\"technical:ROLLING_VWAP\").run(s12_data.open, s12_data.high, s12_data.low, s12_data.close, s12_data.volume, window = 100, min_periods = 5)\n",
"#vwap_cum_d = vbt.indicator(\"ttools:CUVWAP\").run(s12_data.high, s12_data.low, s12_data.close, s12_data.volume, anchor=\"D\", drag=50)\n",
"#vwap_lin_angle = vbt.indicator(\"talib:LINEARREG_ANGLE\").run(vwap_cum_d.vwap, timeperiod=2)\n",
"\n",
"vbt.IF.list_indicators(\"ttools\")\n",
"\n",
"\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -29,10 +29,10 @@ def aggregate_trades_optimized(symbol: str, trades_df: pd.DataFrame, resolution:
del trades_df
# 3. Convert timestamps maintaining exact precision
# Convert directly to int64 nanoseconds, then to float seconds
unix_timestamps_s = timestamps.view('int64').astype(np.float64) / 1e6
# Convert directly to int64 nanoseconds, then to float seconds - there was a problem
#unix_timestamps_s = timestamps.view('int64').astype(np.float64) / 1e6
#original not optimized, in case of issues (5x slower)
#unix_timestamps_s = timestamps.astype('datetime64[ns]').astype(np.float64) / 1e9
unix_timestamps_s = timestamps.astype('datetime64[ns]').astype(np.float64) / 1e9
# 4. Create ticks array efficiently
# 3. Pre-allocate array for better memory efficiency

View File

@ -1,5 +1,6 @@
from ctypes import Union
from ttools import zoneUTC
from ttools.config import *
from datetime import datetime
from alpaca.data.historical import StockHistoricalDataClient
@ -202,8 +203,8 @@ def fetch_daily_stock_trades(symbol, start, end, exclude_conditions=None, minsiz
by using force_remote - forcess using remote data always and thus refreshing cache for these dates
Attributes:
:param symbol: The stock symbol to fetch trades for.
:param start: The start time for the trade data.
:param end: The end time for the trade data.
:param start: The start time for the trade data, in market timezone.
:param end: The end time for the trade data, in market timezone.
:exclude_conditions: list of string conditions to exclude from the data
:minsize minimum size of trade to be included in the data
:no_return: If True, do not return the DataFrame. Used to prepare cached files.
@ -231,24 +232,34 @@ def fetch_daily_stock_trades(symbol, start, end, exclude_conditions=None, minsiz
#exists in cache?
daily_file = f"{symbol}-{str(start.date())}.parquet"
file_path = TRADE_CACHE / daily_file
if file_path.exists() and (not force_remote or not no_return):
if file_path.exists() and (not force_remote and not no_return):
with trade_cache_lock:
df = pd.read_parquet(file_path)
print("Loaded from CACHE", file_path)
df = filter_trade_df(df, start, end, exclude_conditions, minsize, symbol_included=False, main_session_only=main_session_only)
return df
day_next = start.date() + timedelta(days=1)
#lets create borders of day in UTC as Alpaca has only UTC date
start_date = start.date()
# Create min/max times in NY timezone
ny_day_min = zoneNY.localize(datetime.combine(start_date, time.min))
ny_day_max = zoneNY.localize(datetime.combine(start_date, time.max))
# Convert both to UTC
utc_day_min = ny_day_min.astimezone(zoneUTC)
utc_day_max = ny_day_max.astimezone(zoneUTC)
print("Fetching from remote.")
client = StockHistoricalDataClient(ACCOUNT1_LIVE_API_KEY, ACCOUNT1_LIVE_SECRET_KEY, raw_data=True)
stockTradeRequest = StockTradesRequest(symbol_or_symbols=symbol, start=start.date(), end=day_next, feed=data_feed)
stockTradeRequest = StockTradesRequest(symbol_or_symbols=symbol, start=utc_day_min, end=utc_day_max, feed=data_feed)
last_exception = None
for attempt in range(max_retries):
try:
tradesResponse = client.get_stock_trades(stockTradeRequest)
print(f"Remote fetched completed.", start.date(), day_next)
print(f"Remote fetched completed whole day", start.date())
print(f"Exact UTC range fetched: {utc_day_min} - {utc_day_max}")
if not tradesResponse[symbol]:
print(f"EMPTY")
return pd.DataFrame()
@ -256,7 +267,7 @@ def fetch_daily_stock_trades(symbol, start, end, exclude_conditions=None, minsiz
df = convert_dict_to_multiindex_df(tradesResponse, rename_labels=rename_labels, keep_symbols=keep_symbols)
#if today is market still open, dont cache - also dont cache for IEX feeed
if datetime.now().astimezone(zoneNY).date() < day_next or data_feed == DataFeed.IEX:
if datetime.now().astimezone(zoneNY).date() < start_date + timedelta(days=1) or data_feed == DataFeed.IEX:
print("not saving trade cache, market still open today or IEX datapoint")
#ic(datetime.now().astimezone(zoneNY))
#ic(day.open, day.close)
@ -277,7 +288,7 @@ def fetch_daily_stock_trades(symbol, start, end, exclude_conditions=None, minsiz
print("All attempts to fetch data failed.")
raise ConnectionError(f"Failed to fetch stock trades after {max_retries} retries. Last exception: {str(last_exception)} and {format_exc()}")
def fetch_trades_parallel(symbol, start_date, end_date, exclude_conditions = EXCLUDE_CONDITIONS, minsize = 100, main_session_only = True, force_remote = False, max_workers=None, no_return = False, verbose = None):
def fetch_trades_parallel(symbol, start_date, end_date, exclude_conditions = EXCLUDE_CONDITIONS, minsize = None, main_session_only = True, force_remote = False, max_workers=None, no_return = False, verbose = None):
"""
Fetch trades between ranges.