remote range in utc

This commit is contained in:
David Brazda
2024-11-01 15:41:23 +01:00
parent 2116679dba
commit cf6bcede48
5 changed files with 476 additions and 107 deletions

View File

@ -368,9 +368,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "NameError",
"evalue": "name 'zoneNY' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#loading manually range subset from existing files\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m start \u001b[38;5;241m=\u001b[39m \u001b[43mzoneNY\u001b[49m\u001b[38;5;241m.\u001b[39mlocalize(datetime(\u001b[38;5;241m2024\u001b[39m, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m15\u001b[39m, \u001b[38;5;241m9\u001b[39m, \u001b[38;5;241m30\u001b[39m))\n\u001b[1;32m 3\u001b[0m end \u001b[38;5;241m=\u001b[39m zoneNY\u001b[38;5;241m.\u001b[39mlocalize(datetime(\u001b[38;5;241m2024\u001b[39m, \u001b[38;5;241m10\u001b[39m, \u001b[38;5;241m20\u001b[39m, \u001b[38;5;241m16\u001b[39m, \u001b[38;5;241m00\u001b[39m))\n\u001b[1;32m 5\u001b[0m ohlcv_df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_parquet(\n\u001b[1;32m 6\u001b[0m AGG_CACHE \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSPY-AggType.OHLCV-1-2024-01-15T09-30-00-2024-10-20T16-00-00-4679BCFMOPUVWZ-100-True.parquet\u001b[39m\u001b[38;5;124m\"\u001b[39m, \n\u001b[1;32m 7\u001b[0m engine\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpyarrow\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 8\u001b[0m filters\u001b[38;5;241m=\u001b[39m[(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m>=\u001b[39m\u001b[38;5;124m'\u001b[39m, start), \n\u001b[1;32m 9\u001b[0m (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m<=\u001b[39m\u001b[38;5;124m'\u001b[39m, end)]\n\u001b[1;32m 10\u001b[0m )\n",
"\u001b[0;31mNameError\u001b[0m: name 'zoneNY' is not defined"
]
}
],
"source": [
"#loading manually range subset from existing files\n",
"start = zoneNY.localize(datetime(2024, 1, 15, 9, 30))\n",
@ -422,27 +434,71 @@
"name": "stdout",
"output_type": "stream",
"text": [
"SPY Contains 46 market days\n",
"SPY All 46 split files loaded in 10.521624088287354 seconds\n",
"Trimming 2024-01-16 09:30:00-05:00 2024-03-20 16:00:00-04:00\n",
"excluding ['C', 'O', '4', 'B', '7', 'V', 'P', 'W', 'U', 'Z', 'F', '9', 'M', '6']\n",
"exclude done\n",
"minsize 100\n",
"minsize done\n",
"SPY filtered\n",
"BAC Contains 1 market days\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"BAC Remote fetching: 100%|██████████| 1/1 [00:00<00:00, 434.55it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fetching from remote.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"BAC Receiving trades: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Remote fetched completed whole day 2024-01-16\n",
"Exact UTC range fetched: 2024-01-16 05:00:00+00:00 - 2024-01-17 04:59:59.999999+00:00\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"BAC Receiving trades: 100%|██████████| 1/1 [00:42<00:00, 42.76s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Saved to CACHE /Users/davidbrazda/Library/Application Support/v2realbot/tradecache/BAC-2024-01-16.parquet\n",
"Trimming 2024-01-16 00:00:00-05:00 2024-01-16 23:59:00-05:00\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"DatetimeIndex: 6513606 entries, 2024-01-16 09:30:00.001443-05:00 to 2024-03-20 15:59:59.992808-04:00\n",
"DatetimeIndex: 222754 entries, 2024-01-16 04:00:00.009225-05:00 to 2024-01-16 19:59:48.834830-05:00\n",
"Data columns (total 6 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 x object \n",
" 1 p float64\n",
" 2 s int64 \n",
" 3 i int64 \n",
" 4 c object \n",
" 5 z object \n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 x 222754 non-null object \n",
" 1 p 222754 non-null float64\n",
" 2 s 222754 non-null int64 \n",
" 3 i 222754 non-null int64 \n",
" 4 c 222754 non-null object \n",
" 5 z 222754 non-null object \n",
"dtypes: float64(1), int64(2), object(3)\n",
"memory usage: 347.9+ MB\n"
"memory usage: 11.9+ MB\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
@ -456,20 +512,190 @@
"# df.info()\n",
"\n",
"#fetching multiple days with parallel\n",
"df = fetch_trades_parallel(symbol=\"SPY\",\n",
" start_date=zoneNY.localize(datetime(2024, 1, 16, 9, 30)),\n",
" end_date=zoneNY.localize(datetime(2024, 3, 20, 16, 00)))\n",
"df = fetch_trades_parallel(symbol=\"BAC\",\n",
" start_date=zoneNY.localize(datetime(2024, 1, 16, 0, 0)),\n",
" end_date=zoneNY.localize(datetime(2024, 1, 16, 23, 59)),\n",
" main_session_only=False,\n",
" exclude_conditions=None,\n",
" minsize=None,\n",
" force_remote=True)\n",
"\n",
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 22,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x</th>\n",
" <th>p</th>\n",
" <th>s</th>\n",
" <th>i</th>\n",
" <th>c</th>\n",
" <th>z</th>\n",
" </tr>\n",
" <tr>\n",
" <th>t</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2024-01-16 04:00:00.009225-05:00</th>\n",
" <td>K</td>\n",
" <td>32.800</td>\n",
" <td>1</td>\n",
" <td>52983525027912</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 04:00:00.012088-05:00</th>\n",
" <td>P</td>\n",
" <td>32.580</td>\n",
" <td>8</td>\n",
" <td>52983525027890</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 04:00:02.299262-05:00</th>\n",
" <td>P</td>\n",
" <td>32.750</td>\n",
" <td>1</td>\n",
" <td>52983525027916</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 04:00:03.895322-05:00</th>\n",
" <td>P</td>\n",
" <td>32.640</td>\n",
" <td>1</td>\n",
" <td>52983525027920</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 04:00:04.145553-05:00</th>\n",
" <td>P</td>\n",
" <td>32.740</td>\n",
" <td>1</td>\n",
" <td>52983525027921</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 18:58:10.081270-05:00</th>\n",
" <td>D</td>\n",
" <td>32.104</td>\n",
" <td>10</td>\n",
" <td>79371957716549</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 18:58:11.293971-05:00</th>\n",
" <td>T</td>\n",
" <td>32.090</td>\n",
" <td>3</td>\n",
" <td>62883460503386</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 18:58:24.511348-05:00</th>\n",
" <td>D</td>\n",
" <td>32.110</td>\n",
" <td>1</td>\n",
" <td>79371957716560</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 18:58:46.648899-05:00</th>\n",
" <td>D</td>\n",
" <td>32.110</td>\n",
" <td>1</td>\n",
" <td>79371957716786</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 18:59:54.013894-05:00</th>\n",
" <td>D</td>\n",
" <td>32.100</td>\n",
" <td>1</td>\n",
" <td>71710070428229</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>159301 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" x p s i c z\n",
"t \n",
"2024-01-16 04:00:00.009225-05:00 K 32.800 1 52983525027912 [ , T, I] A\n",
"2024-01-16 04:00:00.012088-05:00 P 32.580 8 52983525027890 [ , T, I] A\n",
"2024-01-16 04:00:02.299262-05:00 P 32.750 1 52983525027916 [ , T, I] A\n",
"2024-01-16 04:00:03.895322-05:00 P 32.640 1 52983525027920 [ , T, I] A\n",
"2024-01-16 04:00:04.145553-05:00 P 32.740 1 52983525027921 [ , T, I] A\n",
"... .. ... .. ... ... ..\n",
"2024-01-16 18:58:10.081270-05:00 D 32.104 10 79371957716549 [ , T, I] A\n",
"2024-01-16 18:58:11.293971-05:00 T 32.090 3 62883460503386 [ , T, I] A\n",
"2024-01-16 18:58:24.511348-05:00 D 32.110 1 79371957716560 [ , T, I] A\n",
"2024-01-16 18:58:46.648899-05:00 D 32.110 1 79371957716786 [ , T, I] A\n",
"2024-01-16 18:59:54.013894-05:00 D 32.100 1 71710070428229 [ , T, I] A\n",
"\n",
"[159301 rows x 6 columns]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.info()"
"df"
]
},
{
@ -500,6 +726,207 @@
"\n",
"#compare_dataframes(df1, df2)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from ttools.config import TRADE_CACHE\n",
"import pandas as pd\n",
"file1 = TRADE_CACHE / \"BAC-2024-01-16.parquet\"\n",
"df1 = pd.read_parquet(file1)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x</th>\n",
" <th>p</th>\n",
" <th>s</th>\n",
" <th>i</th>\n",
" <th>c</th>\n",
" <th>z</th>\n",
" </tr>\n",
" <tr>\n",
" <th>t</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2024-01-16 04:00:00.009225-05:00</th>\n",
" <td>K</td>\n",
" <td>32.80</td>\n",
" <td>1</td>\n",
" <td>52983525027912</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 04:00:00.012088-05:00</th>\n",
" <td>P</td>\n",
" <td>32.58</td>\n",
" <td>8</td>\n",
" <td>52983525027890</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 04:00:00.856156-05:00</th>\n",
" <td>K</td>\n",
" <td>32.61</td>\n",
" <td>14</td>\n",
" <td>52983525028705</td>\n",
" <td>[ , F, T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 04:00:02.299262-05:00</th>\n",
" <td>P</td>\n",
" <td>32.75</td>\n",
" <td>1</td>\n",
" <td>52983525027916</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 04:00:03.895322-05:00</th>\n",
" <td>P</td>\n",
" <td>32.64</td>\n",
" <td>1</td>\n",
" <td>52983525027920</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 19:59:24.796862-05:00</th>\n",
" <td>P</td>\n",
" <td>32.12</td>\n",
" <td>500</td>\n",
" <td>52983576997941</td>\n",
" <td>[ , T]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 19:59:24.796868-05:00</th>\n",
" <td>P</td>\n",
" <td>32.12</td>\n",
" <td>500</td>\n",
" <td>52983576997942</td>\n",
" <td>[ , T]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 19:59:24.796868-05:00</th>\n",
" <td>P</td>\n",
" <td>32.12</td>\n",
" <td>500</td>\n",
" <td>52983576997943</td>\n",
" <td>[ , T]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 19:59:24.796871-05:00</th>\n",
" <td>P</td>\n",
" <td>32.12</td>\n",
" <td>500</td>\n",
" <td>52983576997944</td>\n",
" <td>[ , T]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 19:59:48.834830-05:00</th>\n",
" <td>K</td>\n",
" <td>32.10</td>\n",
" <td>25</td>\n",
" <td>52983526941511</td>\n",
" <td>[ , T, I]</td>\n",
" <td>A</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>222754 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" x p s i c \\\n",
"t \n",
"2024-01-16 04:00:00.009225-05:00 K 32.80 1 52983525027912 [ , T, I] \n",
"2024-01-16 04:00:00.012088-05:00 P 32.58 8 52983525027890 [ , T, I] \n",
"2024-01-16 04:00:00.856156-05:00 K 32.61 14 52983525028705 [ , F, T, I] \n",
"2024-01-16 04:00:02.299262-05:00 P 32.75 1 52983525027916 [ , T, I] \n",
"2024-01-16 04:00:03.895322-05:00 P 32.64 1 52983525027920 [ , T, I] \n",
"... .. ... ... ... ... \n",
"2024-01-16 19:59:24.796862-05:00 P 32.12 500 52983576997941 [ , T] \n",
"2024-01-16 19:59:24.796868-05:00 P 32.12 500 52983576997942 [ , T] \n",
"2024-01-16 19:59:24.796868-05:00 P 32.12 500 52983576997943 [ , T] \n",
"2024-01-16 19:59:24.796871-05:00 P 32.12 500 52983576997944 [ , T] \n",
"2024-01-16 19:59:48.834830-05:00 K 32.10 25 52983526941511 [ , T, I] \n",
"\n",
" z \n",
"t \n",
"2024-01-16 04:00:00.009225-05:00 A \n",
"2024-01-16 04:00:00.012088-05:00 A \n",
"2024-01-16 04:00:00.856156-05:00 A \n",
"2024-01-16 04:00:02.299262-05:00 A \n",
"2024-01-16 04:00:03.895322-05:00 A \n",
"... .. \n",
"2024-01-16 19:59:24.796862-05:00 A \n",
"2024-01-16 19:59:24.796868-05:00 A \n",
"2024-01-16 19:59:24.796868-05:00 A \n",
"2024-01-16 19:59:24.796871-05:00 A \n",
"2024-01-16 19:59:48.834830-05:00 A \n",
"\n",
"[222754 rows x 6 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1"
]
}
],
"metadata": {