optimalizations

This commit is contained in:
David Brazda
2024-11-01 11:18:10 +01:00
parent c3faa53eff
commit 2116679dba
5 changed files with 491 additions and 521 deletions

View File

@ -40,7 +40,7 @@
"from ttools.utils import AggType\n",
"from datetime import datetime\n",
"from ttools.aggregator_vectorized import generate_time_bars_nb, aggregate_trades\n",
"from ttools.loaders import load_data, prepare_trade_cache\n",
"from ttools.loaders import load_data, prepare_trade_cache, fetch_daily_stock_trades\n",
"from ttools.utils import zoneNY\n",
"import vectorbtpro as vbt\n",
"from lightweight_charts import PlotDFAccessor, PlotSRAccessor\n",
@ -69,7 +69,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 7,
"metadata": {},
"outputs": [
{
@ -110,44 +110,44 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2024-02-15 09:30:00-05:00</th>\n",
" <td>499.29</td>\n",
" <td>499.41</td>\n",
" <td>499.2900</td>\n",
" <td>499.3200</td>\n",
" <td>161900.0</td>\n",
" <th>2024-09-16 04:01:24-04:00</th>\n",
" <td>562.22</td>\n",
" <td>562.22</td>\n",
" <td>562.22</td>\n",
" <td>562.22</td>\n",
" <td>200.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-15 09:30:01-05:00</th>\n",
" <td>499.32</td>\n",
" <td>499.41</td>\n",
" <td>499.3000</td>\n",
" <td>499.4000</td>\n",
" <td>10900.0</td>\n",
" <th>2024-09-16 04:02:24-04:00</th>\n",
" <td>562.17</td>\n",
" <td>562.17</td>\n",
" <td>562.17</td>\n",
" <td>562.17</td>\n",
" <td>293.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-15 09:30:02-05:00</th>\n",
" <td>499.36</td>\n",
" <td>499.40</td>\n",
" <td>499.3550</td>\n",
" <td>499.3800</td>\n",
" <td>7040.0</td>\n",
" <th>2024-09-16 04:04:36-04:00</th>\n",
" <td>562.54</td>\n",
" <td>562.54</td>\n",
" <td>562.54</td>\n",
" <td>562.54</td>\n",
" <td>100.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-15 09:30:03-05:00</th>\n",
" <td>499.39</td>\n",
" <td>499.42</td>\n",
" <td>499.3800</td>\n",
" <td>499.4000</td>\n",
" <td>8717.0</td>\n",
" <th>2024-09-16 04:10:00-04:00</th>\n",
" <td>562.39</td>\n",
" <td>562.39</td>\n",
" <td>562.39</td>\n",
" <td>562.39</td>\n",
" <td>102.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-02-15 09:30:04-05:00</th>\n",
" <td>499.40</td>\n",
" <td>499.40</td>\n",
" <td>499.3500</td>\n",
" <td>499.3500</td>\n",
" <td>3265.0</td>\n",
" <th>2024-09-16 04:10:24-04:00</th>\n",
" <td>562.44</td>\n",
" <td>562.44</td>\n",
" <td>562.44</td>\n",
" <td>562.44</td>\n",
" <td>371.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
@ -158,69 +158,69 @@
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-03-18 15:59:55-04:00</th>\n",
" <td>512.94</td>\n",
" <td>512.94</td>\n",
" <td>512.8600</td>\n",
" <td>512.8900</td>\n",
" <td>7345.0</td>\n",
" <th>2024-10-18 19:57:24-04:00</th>\n",
" <td>584.80</td>\n",
" <td>584.80</td>\n",
" <td>584.80</td>\n",
" <td>584.80</td>\n",
" <td>100.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-03-18 15:59:56-04:00</th>\n",
" <td>512.90</td>\n",
" <td>512.90</td>\n",
" <td>512.8700</td>\n",
" <td>512.8800</td>\n",
" <td>2551.0</td>\n",
" <th>2024-10-18 19:57:48-04:00</th>\n",
" <td>584.84</td>\n",
" <td>584.84</td>\n",
" <td>584.84</td>\n",
" <td>584.84</td>\n",
" <td>622.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-03-18 15:59:57-04:00</th>\n",
" <td>512.89</td>\n",
" <td>512.91</td>\n",
" <td>512.8500</td>\n",
" <td>512.8701</td>\n",
" <td>18063.0</td>\n",
" <th>2024-10-18 19:58:48-04:00</th>\n",
" <td>584.77</td>\n",
" <td>584.79</td>\n",
" <td>584.77</td>\n",
" <td>584.79</td>\n",
" <td>4158.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-03-18 15:59:58-04:00</th>\n",
" <td>512.87</td>\n",
" <td>512.90</td>\n",
" <td>512.8496</td>\n",
" <td>512.9000</td>\n",
" <td>7734.0</td>\n",
" <th>2024-10-18 19:59:36-04:00</th>\n",
" <td>584.80</td>\n",
" <td>584.82</td>\n",
" <td>584.80</td>\n",
" <td>584.82</td>\n",
" <td>298.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-03-18 15:59:59-04:00</th>\n",
" <td>512.92</td>\n",
" <td>512.92</td>\n",
" <td>512.8200</td>\n",
" <td>512.8700</td>\n",
" <td>37159.0</td>\n",
" <th>2024-10-18 19:59:48-04:00</th>\n",
" <td>584.76</td>\n",
" <td>584.76</td>\n",
" <td>584.72</td>\n",
" <td>584.72</td>\n",
" <td>258.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>417345 rows × 5 columns</p>\n",
"<p>64218 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" open high low close volume\n",
"time \n",
"2024-02-15 09:30:00-05:00 499.29 499.41 499.2900 499.3200 161900.0\n",
"2024-02-15 09:30:01-05:00 499.32 499.41 499.3000 499.4000 10900.0\n",
"2024-02-15 09:30:02-05:00 499.36 499.40 499.3550 499.3800 7040.0\n",
"2024-02-15 09:30:03-05:00 499.39 499.42 499.3800 499.4000 8717.0\n",
"2024-02-15 09:30:04-05:00 499.40 499.40 499.3500 499.3500 3265.0\n",
"... ... ... ... ... ...\n",
"2024-03-18 15:59:55-04:00 512.94 512.94 512.8600 512.8900 7345.0\n",
"2024-03-18 15:59:56-04:00 512.90 512.90 512.8700 512.8800 2551.0\n",
"2024-03-18 15:59:57-04:00 512.89 512.91 512.8500 512.8701 18063.0\n",
"2024-03-18 15:59:58-04:00 512.87 512.90 512.8496 512.9000 7734.0\n",
"2024-03-18 15:59:59-04:00 512.92 512.92 512.8200 512.8700 37159.0\n",
" open high low close volume\n",
"time \n",
"2024-09-16 04:01:24-04:00 562.22 562.22 562.22 562.22 200.0\n",
"2024-09-16 04:02:24-04:00 562.17 562.17 562.17 562.17 293.0\n",
"2024-09-16 04:04:36-04:00 562.54 562.54 562.54 562.54 100.0\n",
"2024-09-16 04:10:00-04:00 562.39 562.39 562.39 562.39 102.0\n",
"2024-09-16 04:10:24-04:00 562.44 562.44 562.44 562.44 371.0\n",
"... ... ... ... ... ...\n",
"2024-10-18 19:57:24-04:00 584.80 584.80 584.80 584.80 100.0\n",
"2024-10-18 19:57:48-04:00 584.84 584.84 584.84 584.84 622.0\n",
"2024-10-18 19:58:48-04:00 584.77 584.79 584.77 584.79 4158.0\n",
"2024-10-18 19:59:36-04:00 584.80 584.82 584.80 584.82 298.0\n",
"2024-10-18 19:59:48-04:00 584.76 584.76 584.72 584.72 258.0\n",
"\n",
"[417345 rows x 5 columns]"
"[64218 rows x 5 columns]"
]
},
"execution_count": 5,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@ -229,17 +229,17 @@
"#This is how to call LOAD function\n",
"symbol = [\"SPY\"]\n",
"#datetime in zoneNY \n",
"day_start = datetime(2024, 2, 15, 9, 30, 0)\n",
"day_stop = datetime(2024, 3, 18, 16, 0, 0)\n",
"day_start = datetime(2024, 9, 15, 9, 30, 0)\n",
"day_stop = datetime(2024, 10, 20, 16, 0, 0)\n",
"day_start = zoneNY.localize(day_start)\n",
"day_stop = zoneNY.localize(day_stop)\n",
"\n",
"#requested AGG\n",
"resolution = 1 #12s bars\n",
"resolution = 12 #12s bars\n",
"agg_type = AggType.OHLCV #other types AggType.OHLCV_VOL, AggType.OHLCV_DOL, AggType.OHLCV_RENKO\n",
"exclude_conditions = ['C','O','4','B','7','V','P','W','U','Z','F','9','M','6'] #None to defaults\n",
"minsize = 100 #min trade size to include\n",
"main_session_only = True\n",
"main_session_only = False\n",
"force_remote = False\n",
"\n",
"data = load_data(symbol = symbol,\n",
@ -260,162 +260,9 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>open</th>\n",
" <th>high</th>\n",
" <th>low</th>\n",
" <th>close</th>\n",
" <th>volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>time</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2024-10-14 09:45:00-04:00</th>\n",
" <td>41.9650</td>\n",
" <td>41.970</td>\n",
" <td>41.950</td>\n",
" <td>41.9500</td>\n",
" <td>17895.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-10-14 09:45:12-04:00</th>\n",
" <td>41.9589</td>\n",
" <td>41.965</td>\n",
" <td>41.950</td>\n",
" <td>41.9650</td>\n",
" <td>6281.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-10-14 09:45:24-04:00</th>\n",
" <td>41.9650</td>\n",
" <td>42.005</td>\n",
" <td>41.965</td>\n",
" <td>41.9975</td>\n",
" <td>3522.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-10-14 09:45:36-04:00</th>\n",
" <td>41.9900</td>\n",
" <td>42.005</td>\n",
" <td>41.990</td>\n",
" <td>42.0000</td>\n",
" <td>5960.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-10-14 09:45:48-04:00</th>\n",
" <td>42.0050</td>\n",
" <td>42.040</td>\n",
" <td>42.005</td>\n",
" <td>42.0300</td>\n",
" <td>9113.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-10-16 15:00:00-04:00</th>\n",
" <td>42.9150</td>\n",
" <td>42.915</td>\n",
" <td>42.910</td>\n",
" <td>42.9100</td>\n",
" <td>12872.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-10-16 15:00:12-04:00</th>\n",
" <td>42.9150</td>\n",
" <td>42.920</td>\n",
" <td>42.910</td>\n",
" <td>42.9200</td>\n",
" <td>7574.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-10-16 15:00:24-04:00</th>\n",
" <td>42.9200</td>\n",
" <td>42.920</td>\n",
" <td>42.910</td>\n",
" <td>42.9200</td>\n",
" <td>1769.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-10-16 15:00:36-04:00</th>\n",
" <td>42.9200</td>\n",
" <td>42.920</td>\n",
" <td>42.905</td>\n",
" <td>42.9050</td>\n",
" <td>26599.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-10-16 15:00:48-04:00</th>\n",
" <td>42.9050</td>\n",
" <td>42.905</td>\n",
" <td>42.880</td>\n",
" <td>42.8800</td>\n",
" <td>9216.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5480 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" open high low close volume\n",
"time \n",
"2024-10-14 09:45:00-04:00 41.9650 41.970 41.950 41.9500 17895.0\n",
"2024-10-14 09:45:12-04:00 41.9589 41.965 41.950 41.9650 6281.0\n",
"2024-10-14 09:45:24-04:00 41.9650 42.005 41.965 41.9975 3522.0\n",
"2024-10-14 09:45:36-04:00 41.9900 42.005 41.990 42.0000 5960.0\n",
"2024-10-14 09:45:48-04:00 42.0050 42.040 42.005 42.0300 9113.0\n",
"... ... ... ... ... ...\n",
"2024-10-16 15:00:00-04:00 42.9150 42.915 42.910 42.9100 12872.0\n",
"2024-10-16 15:00:12-04:00 42.9150 42.920 42.910 42.9200 7574.0\n",
"2024-10-16 15:00:24-04:00 42.9200 42.920 42.910 42.9200 1769.0\n",
"2024-10-16 15:00:36-04:00 42.9200 42.920 42.905 42.9050 26599.0\n",
"2024-10-16 15:00:48-04:00 42.9050 42.905 42.880 42.8800 9216.0\n",
"\n",
"[5480 rows x 5 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"data.ohlcv.data[symbol[0]]"
]
@ -478,26 +325,9 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"File: SPY-AggType.OHLCV-12-2024-01-15T09-30-00-2024-10-20T16-00-00-4679BCFMOPUVWZ-100-True.parquet\n",
"Coverage: 2024-01-15 09:30:00 to 2024-10-20 16:00:00\n",
"Symbol: SPY\n",
"Agg Type: AggType.OHLCV\n",
"Resolution: 12\n",
"Excludes: 4679BCFMOPUVWZ\n",
"Minsize: 100\n",
"Main Session Only: True\n",
"--------------------------------------------------------------------------------\n"
]
}
],
"outputs": [],
"source": [
"from ttools.utils import list_matching_files, print_matching_files_info, zoneNY\n",
"from datetime import datetime\n",
@ -533,261 +363,16 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"And date subset loaded from parquet. Usually this is all done yb `load_data` in loader."
"From this file the subset of dates are loaded. Usually this is all done automatically by `load_data` in loader."
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>open</th>\n",
" <th>high</th>\n",
" <th>low</th>\n",
" <th>close</th>\n",
" <th>volume</th>\n",
" <th>trades</th>\n",
" <th>updated</th>\n",
" <th>vwap</th>\n",
" <th>buyvolume</th>\n",
" <th>sellvolume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>time</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2024-01-16 09:30:00-05:00</th>\n",
" <td>475.250</td>\n",
" <td>475.3600</td>\n",
" <td>475.20</td>\n",
" <td>475.285</td>\n",
" <td>255386.0</td>\n",
" <td>93.0</td>\n",
" <td>2024-01-16 09:30:01.002183-05:00</td>\n",
" <td>475.251725</td>\n",
" <td>3692.0</td>\n",
" <td>242756.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 09:30:01-05:00</th>\n",
" <td>475.335</td>\n",
" <td>475.3350</td>\n",
" <td>475.23</td>\n",
" <td>475.260</td>\n",
" <td>15161.0</td>\n",
" <td>100.0</td>\n",
" <td>2024-01-16 09:30:02.007313-05:00</td>\n",
" <td>475.283390</td>\n",
" <td>4386.0</td>\n",
" <td>4944.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 09:30:02-05:00</th>\n",
" <td>475.250</td>\n",
" <td>475.3000</td>\n",
" <td>475.24</td>\n",
" <td>475.300</td>\n",
" <td>6993.0</td>\n",
" <td>39.0</td>\n",
" <td>2024-01-16 09:30:03.008912-05:00</td>\n",
" <td>475.262507</td>\n",
" <td>1900.0</td>\n",
" <td>2256.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 09:30:03-05:00</th>\n",
" <td>475.290</td>\n",
" <td>475.3200</td>\n",
" <td>475.24</td>\n",
" <td>475.270</td>\n",
" <td>8497.0</td>\n",
" <td>47.0</td>\n",
" <td>2024-01-16 09:30:04.201093-05:00</td>\n",
" <td>475.275280</td>\n",
" <td>1300.0</td>\n",
" <td>3200.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-01-16 09:30:04-05:00</th>\n",
" <td>475.250</td>\n",
" <td>475.2700</td>\n",
" <td>475.22</td>\n",
" <td>475.270</td>\n",
" <td>5367.0</td>\n",
" <td>37.0</td>\n",
" <td>2024-01-16 09:30:05.004980-05:00</td>\n",
" <td>475.234353</td>\n",
" <td>1613.0</td>\n",
" <td>1247.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-10-18 15:59:55-04:00</th>\n",
" <td>584.520</td>\n",
" <td>584.5800</td>\n",
" <td>584.51</td>\n",
" <td>584.580</td>\n",
" <td>10357.0</td>\n",
" <td>47.0</td>\n",
" <td>2024-10-18 15:59:56.008928-04:00</td>\n",
" <td>584.543870</td>\n",
" <td>1600.0</td>\n",
" <td>1100.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-10-18 15:59:56-04:00</th>\n",
" <td>584.570</td>\n",
" <td>584.6091</td>\n",
" <td>584.55</td>\n",
" <td>584.550</td>\n",
" <td>6527.0</td>\n",
" <td>32.0</td>\n",
" <td>2024-10-18 15:59:57.007658-04:00</td>\n",
" <td>584.566643</td>\n",
" <td>1525.0</td>\n",
" <td>1002.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-10-18 15:59:57-04:00</th>\n",
" <td>584.560</td>\n",
" <td>584.6100</td>\n",
" <td>584.56</td>\n",
" <td>584.600</td>\n",
" <td>5068.0</td>\n",
" <td>23.0</td>\n",
" <td>2024-10-18 15:59:58.000435-04:00</td>\n",
" <td>584.596249</td>\n",
" <td>1960.0</td>\n",
" <td>900.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-10-18 15:59:58-04:00</th>\n",
" <td>584.590</td>\n",
" <td>584.6200</td>\n",
" <td>584.56</td>\n",
" <td>584.560</td>\n",
" <td>8786.0</td>\n",
" <td>23.0</td>\n",
" <td>2024-10-18 15:59:59.041984-04:00</td>\n",
" <td>584.592217</td>\n",
" <td>2859.0</td>\n",
" <td>3921.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-10-18 15:59:59-04:00</th>\n",
" <td>584.560</td>\n",
" <td>584.6100</td>\n",
" <td>584.56</td>\n",
" <td>584.570</td>\n",
" <td>12583.0</td>\n",
" <td>69.0</td>\n",
" <td>2024-10-18 15:59:59.982132-04:00</td>\n",
" <td>584.583131</td>\n",
" <td>5303.0</td>\n",
" <td>1980.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3384529 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" open high low close volume \\\n",
"time \n",
"2024-01-16 09:30:00-05:00 475.250 475.3600 475.20 475.285 255386.0 \n",
"2024-01-16 09:30:01-05:00 475.335 475.3350 475.23 475.260 15161.0 \n",
"2024-01-16 09:30:02-05:00 475.250 475.3000 475.24 475.300 6993.0 \n",
"2024-01-16 09:30:03-05:00 475.290 475.3200 475.24 475.270 8497.0 \n",
"2024-01-16 09:30:04-05:00 475.250 475.2700 475.22 475.270 5367.0 \n",
"... ... ... ... ... ... \n",
"2024-10-18 15:59:55-04:00 584.520 584.5800 584.51 584.580 10357.0 \n",
"2024-10-18 15:59:56-04:00 584.570 584.6091 584.55 584.550 6527.0 \n",
"2024-10-18 15:59:57-04:00 584.560 584.6100 584.56 584.600 5068.0 \n",
"2024-10-18 15:59:58-04:00 584.590 584.6200 584.56 584.560 8786.0 \n",
"2024-10-18 15:59:59-04:00 584.560 584.6100 584.56 584.570 12583.0 \n",
"\n",
" trades updated \\\n",
"time \n",
"2024-01-16 09:30:00-05:00 93.0 2024-01-16 09:30:01.002183-05:00 \n",
"2024-01-16 09:30:01-05:00 100.0 2024-01-16 09:30:02.007313-05:00 \n",
"2024-01-16 09:30:02-05:00 39.0 2024-01-16 09:30:03.008912-05:00 \n",
"2024-01-16 09:30:03-05:00 47.0 2024-01-16 09:30:04.201093-05:00 \n",
"2024-01-16 09:30:04-05:00 37.0 2024-01-16 09:30:05.004980-05:00 \n",
"... ... ... \n",
"2024-10-18 15:59:55-04:00 47.0 2024-10-18 15:59:56.008928-04:00 \n",
"2024-10-18 15:59:56-04:00 32.0 2024-10-18 15:59:57.007658-04:00 \n",
"2024-10-18 15:59:57-04:00 23.0 2024-10-18 15:59:58.000435-04:00 \n",
"2024-10-18 15:59:58-04:00 23.0 2024-10-18 15:59:59.041984-04:00 \n",
"2024-10-18 15:59:59-04:00 69.0 2024-10-18 15:59:59.982132-04:00 \n",
"\n",
" vwap buyvolume sellvolume \n",
"time \n",
"2024-01-16 09:30:00-05:00 475.251725 3692.0 242756.0 \n",
"2024-01-16 09:30:01-05:00 475.283390 4386.0 4944.0 \n",
"2024-01-16 09:30:02-05:00 475.262507 1900.0 2256.0 \n",
"2024-01-16 09:30:03-05:00 475.275280 1300.0 3200.0 \n",
"2024-01-16 09:30:04-05:00 475.234353 1613.0 1247.0 \n",
"... ... ... ... \n",
"2024-10-18 15:59:55-04:00 584.543870 1600.0 1100.0 \n",
"2024-10-18 15:59:56-04:00 584.566643 1525.0 1002.0 \n",
"2024-10-18 15:59:57-04:00 584.596249 1960.0 900.0 \n",
"2024-10-18 15:59:58-04:00 584.592217 2859.0 3921.0 \n",
"2024-10-18 15:59:59-04:00 584.583131 5303.0 1980.0 \n",
"\n",
"[3384529 rows x 10 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"#loading manually range subset from existing files\n",
"start = zoneNY.localize(datetime(2024, 1, 15, 9, 30))\n",
"end = zoneNY.localize(datetime(2024, 10, 20, 16, 00))\n",
"\n",
@ -800,6 +385,121 @@
"\n",
"ohlcv_df"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TTOOLS: Loaded env variables from file /Users/davidbrazda/Documents/Development/python/.env\n"
]
}
],
"source": [
"\n",
"from ttools.loaders import fetch_daily_stock_trades, fetch_trades_parallel\n",
"from ttools.utils import zoneNY\n",
"from datetime import datetime"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Fetching trades for whole range"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SPY Contains 46 market days\n",
"SPY All 46 split files loaded in 10.521624088287354 seconds\n",
"Trimming 2024-01-16 09:30:00-05:00 2024-03-20 16:00:00-04:00\n",
"excluding ['C', 'O', '4', 'B', '7', 'V', 'P', 'W', 'U', 'Z', 'F', '9', 'M', '6']\n",
"exclude done\n",
"minsize 100\n",
"minsize done\n",
"SPY filtered\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"DatetimeIndex: 6513606 entries, 2024-01-16 09:30:00.001443-05:00 to 2024-03-20 15:59:59.992808-04:00\n",
"Data columns (total 6 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 x object \n",
" 1 p float64\n",
" 2 s int64 \n",
" 3 i int64 \n",
" 4 c object \n",
" 5 z object \n",
"dtypes: float64(1), int64(2), object(3)\n",
"memory usage: 347.9+ MB\n"
]
}
],
"source": [
"\n",
"\n",
"#fethcing one day\n",
"# df = fetch_daily_stock_trades(symbol=\"SPY\",\n",
"# start=zoneNY.localize(datetime(2024, 1, 16, 9, 30)),\n",
"# end=zoneNY.localize(datetime(2024, 1, 16, 16, 00)))\n",
"# df.info()\n",
"\n",
"#fetching multiple days with parallel\n",
"df = fetch_trades_parallel(symbol=\"SPY\",\n",
" start_date=zoneNY.localize(datetime(2024, 1, 16, 9, 30)),\n",
" end_date=zoneNY.localize(datetime(2024, 3, 20, 16, 00)))\n",
"\n",
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#comparing dataframes\n",
"from ttools.utils import AGG_CACHE, compare_dataframes\n",
"import pandas as pd\n",
"file1 = AGG_CACHE / \"SPY-AggType.OHLCV-1-2024-02-15T09-30-00-2024-10-20T16-00-00-4679BCFMOPUVWZ-100-False.parquet\"\n",
"file2 = AGG_CACHE / \"SPY-AggType.OHLCV-1-2024-02-15T09-30-00-2024-10-20T16-00-00-4679BCFMOPUVWZ-100-False_older2.parquet\"\n",
"df1 = pd.read_parquet(file1)\n",
"df2 = pd.read_parquet(file2)\n",
"df1.equals(df2)\n",
"\n",
"#compare_dataframes(df1, df2)"
]
}
],
"metadata": {