optimalizations
This commit is contained in:
2
setup.py
2
setup.py
@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='ttools',
|
name='ttools',
|
||||||
version='0.6.4',
|
version='0.7.0',
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
install_requires=[
|
install_requires=[
|
||||||
# list your dependencies here
|
# list your dependencies here
|
||||||
|
|||||||
@ -40,7 +40,7 @@
|
|||||||
"from ttools.utils import AggType\n",
|
"from ttools.utils import AggType\n",
|
||||||
"from datetime import datetime\n",
|
"from datetime import datetime\n",
|
||||||
"from ttools.aggregator_vectorized import generate_time_bars_nb, aggregate_trades\n",
|
"from ttools.aggregator_vectorized import generate_time_bars_nb, aggregate_trades\n",
|
||||||
"from ttools.loaders import load_data, prepare_trade_cache\n",
|
"from ttools.loaders import load_data, prepare_trade_cache, fetch_daily_stock_trades\n",
|
||||||
"from ttools.utils import zoneNY\n",
|
"from ttools.utils import zoneNY\n",
|
||||||
"import vectorbtpro as vbt\n",
|
"import vectorbtpro as vbt\n",
|
||||||
"from lightweight_charts import PlotDFAccessor, PlotSRAccessor\n",
|
"from lightweight_charts import PlotDFAccessor, PlotSRAccessor\n",
|
||||||
@ -69,7 +69,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": 7,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -110,44 +110,44 @@
|
|||||||
" </thead>\n",
|
" </thead>\n",
|
||||||
" <tbody>\n",
|
" <tbody>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>2024-02-15 09:30:00-05:00</th>\n",
|
" <th>2024-09-16 04:01:24-04:00</th>\n",
|
||||||
" <td>499.29</td>\n",
|
" <td>562.22</td>\n",
|
||||||
" <td>499.41</td>\n",
|
" <td>562.22</td>\n",
|
||||||
" <td>499.2900</td>\n",
|
" <td>562.22</td>\n",
|
||||||
" <td>499.3200</td>\n",
|
" <td>562.22</td>\n",
|
||||||
" <td>161900.0</td>\n",
|
" <td>200.0</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>2024-02-15 09:30:01-05:00</th>\n",
|
" <th>2024-09-16 04:02:24-04:00</th>\n",
|
||||||
" <td>499.32</td>\n",
|
" <td>562.17</td>\n",
|
||||||
" <td>499.41</td>\n",
|
" <td>562.17</td>\n",
|
||||||
" <td>499.3000</td>\n",
|
" <td>562.17</td>\n",
|
||||||
" <td>499.4000</td>\n",
|
" <td>562.17</td>\n",
|
||||||
" <td>10900.0</td>\n",
|
" <td>293.0</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>2024-02-15 09:30:02-05:00</th>\n",
|
" <th>2024-09-16 04:04:36-04:00</th>\n",
|
||||||
" <td>499.36</td>\n",
|
" <td>562.54</td>\n",
|
||||||
" <td>499.40</td>\n",
|
" <td>562.54</td>\n",
|
||||||
" <td>499.3550</td>\n",
|
" <td>562.54</td>\n",
|
||||||
" <td>499.3800</td>\n",
|
" <td>562.54</td>\n",
|
||||||
" <td>7040.0</td>\n",
|
" <td>100.0</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>2024-02-15 09:30:03-05:00</th>\n",
|
" <th>2024-09-16 04:10:00-04:00</th>\n",
|
||||||
" <td>499.39</td>\n",
|
" <td>562.39</td>\n",
|
||||||
" <td>499.42</td>\n",
|
" <td>562.39</td>\n",
|
||||||
" <td>499.3800</td>\n",
|
" <td>562.39</td>\n",
|
||||||
" <td>499.4000</td>\n",
|
" <td>562.39</td>\n",
|
||||||
" <td>8717.0</td>\n",
|
" <td>102.0</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>2024-02-15 09:30:04-05:00</th>\n",
|
" <th>2024-09-16 04:10:24-04:00</th>\n",
|
||||||
" <td>499.40</td>\n",
|
" <td>562.44</td>\n",
|
||||||
" <td>499.40</td>\n",
|
" <td>562.44</td>\n",
|
||||||
" <td>499.3500</td>\n",
|
" <td>562.44</td>\n",
|
||||||
" <td>499.3500</td>\n",
|
" <td>562.44</td>\n",
|
||||||
" <td>3265.0</td>\n",
|
" <td>371.0</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>...</th>\n",
|
" <th>...</th>\n",
|
||||||
@ -158,69 +158,69 @@
|
|||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>2024-03-18 15:59:55-04:00</th>\n",
|
" <th>2024-10-18 19:57:24-04:00</th>\n",
|
||||||
" <td>512.94</td>\n",
|
" <td>584.80</td>\n",
|
||||||
" <td>512.94</td>\n",
|
" <td>584.80</td>\n",
|
||||||
" <td>512.8600</td>\n",
|
" <td>584.80</td>\n",
|
||||||
" <td>512.8900</td>\n",
|
" <td>584.80</td>\n",
|
||||||
" <td>7345.0</td>\n",
|
" <td>100.0</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>2024-03-18 15:59:56-04:00</th>\n",
|
" <th>2024-10-18 19:57:48-04:00</th>\n",
|
||||||
" <td>512.90</td>\n",
|
" <td>584.84</td>\n",
|
||||||
" <td>512.90</td>\n",
|
" <td>584.84</td>\n",
|
||||||
" <td>512.8700</td>\n",
|
" <td>584.84</td>\n",
|
||||||
" <td>512.8800</td>\n",
|
" <td>584.84</td>\n",
|
||||||
" <td>2551.0</td>\n",
|
" <td>622.0</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>2024-03-18 15:59:57-04:00</th>\n",
|
" <th>2024-10-18 19:58:48-04:00</th>\n",
|
||||||
" <td>512.89</td>\n",
|
" <td>584.77</td>\n",
|
||||||
" <td>512.91</td>\n",
|
" <td>584.79</td>\n",
|
||||||
" <td>512.8500</td>\n",
|
" <td>584.77</td>\n",
|
||||||
" <td>512.8701</td>\n",
|
" <td>584.79</td>\n",
|
||||||
" <td>18063.0</td>\n",
|
" <td>4158.0</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>2024-03-18 15:59:58-04:00</th>\n",
|
" <th>2024-10-18 19:59:36-04:00</th>\n",
|
||||||
" <td>512.87</td>\n",
|
" <td>584.80</td>\n",
|
||||||
" <td>512.90</td>\n",
|
" <td>584.82</td>\n",
|
||||||
" <td>512.8496</td>\n",
|
" <td>584.80</td>\n",
|
||||||
" <td>512.9000</td>\n",
|
" <td>584.82</td>\n",
|
||||||
" <td>7734.0</td>\n",
|
" <td>298.0</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>2024-03-18 15:59:59-04:00</th>\n",
|
" <th>2024-10-18 19:59:48-04:00</th>\n",
|
||||||
" <td>512.92</td>\n",
|
" <td>584.76</td>\n",
|
||||||
" <td>512.92</td>\n",
|
" <td>584.76</td>\n",
|
||||||
" <td>512.8200</td>\n",
|
" <td>584.72</td>\n",
|
||||||
" <td>512.8700</td>\n",
|
" <td>584.72</td>\n",
|
||||||
" <td>37159.0</td>\n",
|
" <td>258.0</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" </tbody>\n",
|
" </tbody>\n",
|
||||||
"</table>\n",
|
"</table>\n",
|
||||||
"<p>417345 rows × 5 columns</p>\n",
|
"<p>64218 rows × 5 columns</p>\n",
|
||||||
"</div>"
|
"</div>"
|
||||||
],
|
],
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
" open high low close volume\n",
|
" open high low close volume\n",
|
||||||
"time \n",
|
"time \n",
|
||||||
"2024-02-15 09:30:00-05:00 499.29 499.41 499.2900 499.3200 161900.0\n",
|
"2024-09-16 04:01:24-04:00 562.22 562.22 562.22 562.22 200.0\n",
|
||||||
"2024-02-15 09:30:01-05:00 499.32 499.41 499.3000 499.4000 10900.0\n",
|
"2024-09-16 04:02:24-04:00 562.17 562.17 562.17 562.17 293.0\n",
|
||||||
"2024-02-15 09:30:02-05:00 499.36 499.40 499.3550 499.3800 7040.0\n",
|
"2024-09-16 04:04:36-04:00 562.54 562.54 562.54 562.54 100.0\n",
|
||||||
"2024-02-15 09:30:03-05:00 499.39 499.42 499.3800 499.4000 8717.0\n",
|
"2024-09-16 04:10:00-04:00 562.39 562.39 562.39 562.39 102.0\n",
|
||||||
"2024-02-15 09:30:04-05:00 499.40 499.40 499.3500 499.3500 3265.0\n",
|
"2024-09-16 04:10:24-04:00 562.44 562.44 562.44 562.44 371.0\n",
|
||||||
"... ... ... ... ... ...\n",
|
"... ... ... ... ... ...\n",
|
||||||
"2024-03-18 15:59:55-04:00 512.94 512.94 512.8600 512.8900 7345.0\n",
|
"2024-10-18 19:57:24-04:00 584.80 584.80 584.80 584.80 100.0\n",
|
||||||
"2024-03-18 15:59:56-04:00 512.90 512.90 512.8700 512.8800 2551.0\n",
|
"2024-10-18 19:57:48-04:00 584.84 584.84 584.84 584.84 622.0\n",
|
||||||
"2024-03-18 15:59:57-04:00 512.89 512.91 512.8500 512.8701 18063.0\n",
|
"2024-10-18 19:58:48-04:00 584.77 584.79 584.77 584.79 4158.0\n",
|
||||||
"2024-03-18 15:59:58-04:00 512.87 512.90 512.8496 512.9000 7734.0\n",
|
"2024-10-18 19:59:36-04:00 584.80 584.82 584.80 584.82 298.0\n",
|
||||||
"2024-03-18 15:59:59-04:00 512.92 512.92 512.8200 512.8700 37159.0\n",
|
"2024-10-18 19:59:48-04:00 584.76 584.76 584.72 584.72 258.0\n",
|
||||||
"\n",
|
"\n",
|
||||||
"[417345 rows x 5 columns]"
|
"[64218 rows x 5 columns]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 5,
|
"execution_count": 7,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -229,17 +229,17 @@
|
|||||||
"#This is how to call LOAD function\n",
|
"#This is how to call LOAD function\n",
|
||||||
"symbol = [\"SPY\"]\n",
|
"symbol = [\"SPY\"]\n",
|
||||||
"#datetime in zoneNY \n",
|
"#datetime in zoneNY \n",
|
||||||
"day_start = datetime(2024, 2, 15, 9, 30, 0)\n",
|
"day_start = datetime(2024, 9, 15, 9, 30, 0)\n",
|
||||||
"day_stop = datetime(2024, 3, 18, 16, 0, 0)\n",
|
"day_stop = datetime(2024, 10, 20, 16, 0, 0)\n",
|
||||||
"day_start = zoneNY.localize(day_start)\n",
|
"day_start = zoneNY.localize(day_start)\n",
|
||||||
"day_stop = zoneNY.localize(day_stop)\n",
|
"day_stop = zoneNY.localize(day_stop)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"#requested AGG\n",
|
"#requested AGG\n",
|
||||||
"resolution = 1 #12s bars\n",
|
"resolution = 12 #12s bars\n",
|
||||||
"agg_type = AggType.OHLCV #other types AggType.OHLCV_VOL, AggType.OHLCV_DOL, AggType.OHLCV_RENKO\n",
|
"agg_type = AggType.OHLCV #other types AggType.OHLCV_VOL, AggType.OHLCV_DOL, AggType.OHLCV_RENKO\n",
|
||||||
"exclude_conditions = ['C','O','4','B','7','V','P','W','U','Z','F','9','M','6'] #None to defaults\n",
|
"exclude_conditions = ['C','O','4','B','7','V','P','W','U','Z','F','9','M','6'] #None to defaults\n",
|
||||||
"minsize = 100 #min trade size to include\n",
|
"minsize = 100 #min trade size to include\n",
|
||||||
"main_session_only = True\n",
|
"main_session_only = False\n",
|
||||||
"force_remote = False\n",
|
"force_remote = False\n",
|
||||||
"\n",
|
"\n",
|
||||||
"data = load_data(symbol = symbol,\n",
|
"data = load_data(symbol = symbol,\n",
|
||||||
@ -260,162 +260,9 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/html": [
|
|
||||||
"<div>\n",
|
|
||||||
"<style scoped>\n",
|
|
||||||
" .dataframe tbody tr th:only-of-type {\n",
|
|
||||||
" vertical-align: middle;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe tbody tr th {\n",
|
|
||||||
" vertical-align: top;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe thead th {\n",
|
|
||||||
" text-align: right;\n",
|
|
||||||
" }\n",
|
|
||||||
"</style>\n",
|
|
||||||
"<table border=\"1\" class=\"dataframe\">\n",
|
|
||||||
" <thead>\n",
|
|
||||||
" <tr style=\"text-align: right;\">\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th>open</th>\n",
|
|
||||||
" <th>high</th>\n",
|
|
||||||
" <th>low</th>\n",
|
|
||||||
" <th>close</th>\n",
|
|
||||||
" <th>volume</th>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>time</th>\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </thead>\n",
|
|
||||||
" <tbody>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-10-14 09:45:00-04:00</th>\n",
|
|
||||||
" <td>41.9650</td>\n",
|
|
||||||
" <td>41.970</td>\n",
|
|
||||||
" <td>41.950</td>\n",
|
|
||||||
" <td>41.9500</td>\n",
|
|
||||||
" <td>17895.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-10-14 09:45:12-04:00</th>\n",
|
|
||||||
" <td>41.9589</td>\n",
|
|
||||||
" <td>41.965</td>\n",
|
|
||||||
" <td>41.950</td>\n",
|
|
||||||
" <td>41.9650</td>\n",
|
|
||||||
" <td>6281.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-10-14 09:45:24-04:00</th>\n",
|
|
||||||
" <td>41.9650</td>\n",
|
|
||||||
" <td>42.005</td>\n",
|
|
||||||
" <td>41.965</td>\n",
|
|
||||||
" <td>41.9975</td>\n",
|
|
||||||
" <td>3522.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-10-14 09:45:36-04:00</th>\n",
|
|
||||||
" <td>41.9900</td>\n",
|
|
||||||
" <td>42.005</td>\n",
|
|
||||||
" <td>41.990</td>\n",
|
|
||||||
" <td>42.0000</td>\n",
|
|
||||||
" <td>5960.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-10-14 09:45:48-04:00</th>\n",
|
|
||||||
" <td>42.0050</td>\n",
|
|
||||||
" <td>42.040</td>\n",
|
|
||||||
" <td>42.005</td>\n",
|
|
||||||
" <td>42.0300</td>\n",
|
|
||||||
" <td>9113.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>...</th>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-10-16 15:00:00-04:00</th>\n",
|
|
||||||
" <td>42.9150</td>\n",
|
|
||||||
" <td>42.915</td>\n",
|
|
||||||
" <td>42.910</td>\n",
|
|
||||||
" <td>42.9100</td>\n",
|
|
||||||
" <td>12872.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-10-16 15:00:12-04:00</th>\n",
|
|
||||||
" <td>42.9150</td>\n",
|
|
||||||
" <td>42.920</td>\n",
|
|
||||||
" <td>42.910</td>\n",
|
|
||||||
" <td>42.9200</td>\n",
|
|
||||||
" <td>7574.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-10-16 15:00:24-04:00</th>\n",
|
|
||||||
" <td>42.9200</td>\n",
|
|
||||||
" <td>42.920</td>\n",
|
|
||||||
" <td>42.910</td>\n",
|
|
||||||
" <td>42.9200</td>\n",
|
|
||||||
" <td>1769.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-10-16 15:00:36-04:00</th>\n",
|
|
||||||
" <td>42.9200</td>\n",
|
|
||||||
" <td>42.920</td>\n",
|
|
||||||
" <td>42.905</td>\n",
|
|
||||||
" <td>42.9050</td>\n",
|
|
||||||
" <td>26599.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-10-16 15:00:48-04:00</th>\n",
|
|
||||||
" <td>42.9050</td>\n",
|
|
||||||
" <td>42.905</td>\n",
|
|
||||||
" <td>42.880</td>\n",
|
|
||||||
" <td>42.8800</td>\n",
|
|
||||||
" <td>9216.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </tbody>\n",
|
|
||||||
"</table>\n",
|
|
||||||
"<p>5480 rows × 5 columns</p>\n",
|
|
||||||
"</div>"
|
|
||||||
],
|
|
||||||
"text/plain": [
|
|
||||||
" open high low close volume\n",
|
|
||||||
"time \n",
|
|
||||||
"2024-10-14 09:45:00-04:00 41.9650 41.970 41.950 41.9500 17895.0\n",
|
|
||||||
"2024-10-14 09:45:12-04:00 41.9589 41.965 41.950 41.9650 6281.0\n",
|
|
||||||
"2024-10-14 09:45:24-04:00 41.9650 42.005 41.965 41.9975 3522.0\n",
|
|
||||||
"2024-10-14 09:45:36-04:00 41.9900 42.005 41.990 42.0000 5960.0\n",
|
|
||||||
"2024-10-14 09:45:48-04:00 42.0050 42.040 42.005 42.0300 9113.0\n",
|
|
||||||
"... ... ... ... ... ...\n",
|
|
||||||
"2024-10-16 15:00:00-04:00 42.9150 42.915 42.910 42.9100 12872.0\n",
|
|
||||||
"2024-10-16 15:00:12-04:00 42.9150 42.920 42.910 42.9200 7574.0\n",
|
|
||||||
"2024-10-16 15:00:24-04:00 42.9200 42.920 42.910 42.9200 1769.0\n",
|
|
||||||
"2024-10-16 15:00:36-04:00 42.9200 42.920 42.905 42.9050 26599.0\n",
|
|
||||||
"2024-10-16 15:00:48-04:00 42.9050 42.905 42.880 42.8800 9216.0\n",
|
|
||||||
"\n",
|
|
||||||
"[5480 rows x 5 columns]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 5,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"data.ohlcv.data[symbol[0]]"
|
"data.ohlcv.data[symbol[0]]"
|
||||||
]
|
]
|
||||||
@ -478,26 +325,9 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"\n",
|
|
||||||
"File: SPY-AggType.OHLCV-12-2024-01-15T09-30-00-2024-10-20T16-00-00-4679BCFMOPUVWZ-100-True.parquet\n",
|
|
||||||
"Coverage: 2024-01-15 09:30:00 to 2024-10-20 16:00:00\n",
|
|
||||||
"Symbol: SPY\n",
|
|
||||||
"Agg Type: AggType.OHLCV\n",
|
|
||||||
"Resolution: 12\n",
|
|
||||||
"Excludes: 4679BCFMOPUVWZ\n",
|
|
||||||
"Minsize: 100\n",
|
|
||||||
"Main Session Only: True\n",
|
|
||||||
"--------------------------------------------------------------------------------\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"from ttools.utils import list_matching_files, print_matching_files_info, zoneNY\n",
|
"from ttools.utils import list_matching_files, print_matching_files_info, zoneNY\n",
|
||||||
"from datetime import datetime\n",
|
"from datetime import datetime\n",
|
||||||
@ -533,261 +363,16 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"And date subset loaded from parquet. Usually this is all done yb `load_data` in loader."
|
"From this file the subset of dates are loaded. Usually this is all done automatically by `load_data` in loader."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/html": [
|
|
||||||
"<div>\n",
|
|
||||||
"<style scoped>\n",
|
|
||||||
" .dataframe tbody tr th:only-of-type {\n",
|
|
||||||
" vertical-align: middle;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe tbody tr th {\n",
|
|
||||||
" vertical-align: top;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe thead th {\n",
|
|
||||||
" text-align: right;\n",
|
|
||||||
" }\n",
|
|
||||||
"</style>\n",
|
|
||||||
"<table border=\"1\" class=\"dataframe\">\n",
|
|
||||||
" <thead>\n",
|
|
||||||
" <tr style=\"text-align: right;\">\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th>open</th>\n",
|
|
||||||
" <th>high</th>\n",
|
|
||||||
" <th>low</th>\n",
|
|
||||||
" <th>close</th>\n",
|
|
||||||
" <th>volume</th>\n",
|
|
||||||
" <th>trades</th>\n",
|
|
||||||
" <th>updated</th>\n",
|
|
||||||
" <th>vwap</th>\n",
|
|
||||||
" <th>buyvolume</th>\n",
|
|
||||||
" <th>sellvolume</th>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>time</th>\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </thead>\n",
|
|
||||||
" <tbody>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-01-16 09:30:00-05:00</th>\n",
|
|
||||||
" <td>475.250</td>\n",
|
|
||||||
" <td>475.3600</td>\n",
|
|
||||||
" <td>475.20</td>\n",
|
|
||||||
" <td>475.285</td>\n",
|
|
||||||
" <td>255386.0</td>\n",
|
|
||||||
" <td>93.0</td>\n",
|
|
||||||
" <td>2024-01-16 09:30:01.002183-05:00</td>\n",
|
|
||||||
" <td>475.251725</td>\n",
|
|
||||||
" <td>3692.0</td>\n",
|
|
||||||
" <td>242756.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-01-16 09:30:01-05:00</th>\n",
|
|
||||||
" <td>475.335</td>\n",
|
|
||||||
" <td>475.3350</td>\n",
|
|
||||||
" <td>475.23</td>\n",
|
|
||||||
" <td>475.260</td>\n",
|
|
||||||
" <td>15161.0</td>\n",
|
|
||||||
" <td>100.0</td>\n",
|
|
||||||
" <td>2024-01-16 09:30:02.007313-05:00</td>\n",
|
|
||||||
" <td>475.283390</td>\n",
|
|
||||||
" <td>4386.0</td>\n",
|
|
||||||
" <td>4944.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-01-16 09:30:02-05:00</th>\n",
|
|
||||||
" <td>475.250</td>\n",
|
|
||||||
" <td>475.3000</td>\n",
|
|
||||||
" <td>475.24</td>\n",
|
|
||||||
" <td>475.300</td>\n",
|
|
||||||
" <td>6993.0</td>\n",
|
|
||||||
" <td>39.0</td>\n",
|
|
||||||
" <td>2024-01-16 09:30:03.008912-05:00</td>\n",
|
|
||||||
" <td>475.262507</td>\n",
|
|
||||||
" <td>1900.0</td>\n",
|
|
||||||
" <td>2256.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-01-16 09:30:03-05:00</th>\n",
|
|
||||||
" <td>475.290</td>\n",
|
|
||||||
" <td>475.3200</td>\n",
|
|
||||||
" <td>475.24</td>\n",
|
|
||||||
" <td>475.270</td>\n",
|
|
||||||
" <td>8497.0</td>\n",
|
|
||||||
" <td>47.0</td>\n",
|
|
||||||
" <td>2024-01-16 09:30:04.201093-05:00</td>\n",
|
|
||||||
" <td>475.275280</td>\n",
|
|
||||||
" <td>1300.0</td>\n",
|
|
||||||
" <td>3200.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-01-16 09:30:04-05:00</th>\n",
|
|
||||||
" <td>475.250</td>\n",
|
|
||||||
" <td>475.2700</td>\n",
|
|
||||||
" <td>475.22</td>\n",
|
|
||||||
" <td>475.270</td>\n",
|
|
||||||
" <td>5367.0</td>\n",
|
|
||||||
" <td>37.0</td>\n",
|
|
||||||
" <td>2024-01-16 09:30:05.004980-05:00</td>\n",
|
|
||||||
" <td>475.234353</td>\n",
|
|
||||||
" <td>1613.0</td>\n",
|
|
||||||
" <td>1247.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>...</th>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-10-18 15:59:55-04:00</th>\n",
|
|
||||||
" <td>584.520</td>\n",
|
|
||||||
" <td>584.5800</td>\n",
|
|
||||||
" <td>584.51</td>\n",
|
|
||||||
" <td>584.580</td>\n",
|
|
||||||
" <td>10357.0</td>\n",
|
|
||||||
" <td>47.0</td>\n",
|
|
||||||
" <td>2024-10-18 15:59:56.008928-04:00</td>\n",
|
|
||||||
" <td>584.543870</td>\n",
|
|
||||||
" <td>1600.0</td>\n",
|
|
||||||
" <td>1100.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-10-18 15:59:56-04:00</th>\n",
|
|
||||||
" <td>584.570</td>\n",
|
|
||||||
" <td>584.6091</td>\n",
|
|
||||||
" <td>584.55</td>\n",
|
|
||||||
" <td>584.550</td>\n",
|
|
||||||
" <td>6527.0</td>\n",
|
|
||||||
" <td>32.0</td>\n",
|
|
||||||
" <td>2024-10-18 15:59:57.007658-04:00</td>\n",
|
|
||||||
" <td>584.566643</td>\n",
|
|
||||||
" <td>1525.0</td>\n",
|
|
||||||
" <td>1002.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-10-18 15:59:57-04:00</th>\n",
|
|
||||||
" <td>584.560</td>\n",
|
|
||||||
" <td>584.6100</td>\n",
|
|
||||||
" <td>584.56</td>\n",
|
|
||||||
" <td>584.600</td>\n",
|
|
||||||
" <td>5068.0</td>\n",
|
|
||||||
" <td>23.0</td>\n",
|
|
||||||
" <td>2024-10-18 15:59:58.000435-04:00</td>\n",
|
|
||||||
" <td>584.596249</td>\n",
|
|
||||||
" <td>1960.0</td>\n",
|
|
||||||
" <td>900.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-10-18 15:59:58-04:00</th>\n",
|
|
||||||
" <td>584.590</td>\n",
|
|
||||||
" <td>584.6200</td>\n",
|
|
||||||
" <td>584.56</td>\n",
|
|
||||||
" <td>584.560</td>\n",
|
|
||||||
" <td>8786.0</td>\n",
|
|
||||||
" <td>23.0</td>\n",
|
|
||||||
" <td>2024-10-18 15:59:59.041984-04:00</td>\n",
|
|
||||||
" <td>584.592217</td>\n",
|
|
||||||
" <td>2859.0</td>\n",
|
|
||||||
" <td>3921.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2024-10-18 15:59:59-04:00</th>\n",
|
|
||||||
" <td>584.560</td>\n",
|
|
||||||
" <td>584.6100</td>\n",
|
|
||||||
" <td>584.56</td>\n",
|
|
||||||
" <td>584.570</td>\n",
|
|
||||||
" <td>12583.0</td>\n",
|
|
||||||
" <td>69.0</td>\n",
|
|
||||||
" <td>2024-10-18 15:59:59.982132-04:00</td>\n",
|
|
||||||
" <td>584.583131</td>\n",
|
|
||||||
" <td>5303.0</td>\n",
|
|
||||||
" <td>1980.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </tbody>\n",
|
|
||||||
"</table>\n",
|
|
||||||
"<p>3384529 rows × 10 columns</p>\n",
|
|
||||||
"</div>"
|
|
||||||
],
|
|
||||||
"text/plain": [
|
|
||||||
" open high low close volume \\\n",
|
|
||||||
"time \n",
|
|
||||||
"2024-01-16 09:30:00-05:00 475.250 475.3600 475.20 475.285 255386.0 \n",
|
|
||||||
"2024-01-16 09:30:01-05:00 475.335 475.3350 475.23 475.260 15161.0 \n",
|
|
||||||
"2024-01-16 09:30:02-05:00 475.250 475.3000 475.24 475.300 6993.0 \n",
|
|
||||||
"2024-01-16 09:30:03-05:00 475.290 475.3200 475.24 475.270 8497.0 \n",
|
|
||||||
"2024-01-16 09:30:04-05:00 475.250 475.2700 475.22 475.270 5367.0 \n",
|
|
||||||
"... ... ... ... ... ... \n",
|
|
||||||
"2024-10-18 15:59:55-04:00 584.520 584.5800 584.51 584.580 10357.0 \n",
|
|
||||||
"2024-10-18 15:59:56-04:00 584.570 584.6091 584.55 584.550 6527.0 \n",
|
|
||||||
"2024-10-18 15:59:57-04:00 584.560 584.6100 584.56 584.600 5068.0 \n",
|
|
||||||
"2024-10-18 15:59:58-04:00 584.590 584.6200 584.56 584.560 8786.0 \n",
|
|
||||||
"2024-10-18 15:59:59-04:00 584.560 584.6100 584.56 584.570 12583.0 \n",
|
|
||||||
"\n",
|
|
||||||
" trades updated \\\n",
|
|
||||||
"time \n",
|
|
||||||
"2024-01-16 09:30:00-05:00 93.0 2024-01-16 09:30:01.002183-05:00 \n",
|
|
||||||
"2024-01-16 09:30:01-05:00 100.0 2024-01-16 09:30:02.007313-05:00 \n",
|
|
||||||
"2024-01-16 09:30:02-05:00 39.0 2024-01-16 09:30:03.008912-05:00 \n",
|
|
||||||
"2024-01-16 09:30:03-05:00 47.0 2024-01-16 09:30:04.201093-05:00 \n",
|
|
||||||
"2024-01-16 09:30:04-05:00 37.0 2024-01-16 09:30:05.004980-05:00 \n",
|
|
||||||
"... ... ... \n",
|
|
||||||
"2024-10-18 15:59:55-04:00 47.0 2024-10-18 15:59:56.008928-04:00 \n",
|
|
||||||
"2024-10-18 15:59:56-04:00 32.0 2024-10-18 15:59:57.007658-04:00 \n",
|
|
||||||
"2024-10-18 15:59:57-04:00 23.0 2024-10-18 15:59:58.000435-04:00 \n",
|
|
||||||
"2024-10-18 15:59:58-04:00 23.0 2024-10-18 15:59:59.041984-04:00 \n",
|
|
||||||
"2024-10-18 15:59:59-04:00 69.0 2024-10-18 15:59:59.982132-04:00 \n",
|
|
||||||
"\n",
|
|
||||||
" vwap buyvolume sellvolume \n",
|
|
||||||
"time \n",
|
|
||||||
"2024-01-16 09:30:00-05:00 475.251725 3692.0 242756.0 \n",
|
|
||||||
"2024-01-16 09:30:01-05:00 475.283390 4386.0 4944.0 \n",
|
|
||||||
"2024-01-16 09:30:02-05:00 475.262507 1900.0 2256.0 \n",
|
|
||||||
"2024-01-16 09:30:03-05:00 475.275280 1300.0 3200.0 \n",
|
|
||||||
"2024-01-16 09:30:04-05:00 475.234353 1613.0 1247.0 \n",
|
|
||||||
"... ... ... ... \n",
|
|
||||||
"2024-10-18 15:59:55-04:00 584.543870 1600.0 1100.0 \n",
|
|
||||||
"2024-10-18 15:59:56-04:00 584.566643 1525.0 1002.0 \n",
|
|
||||||
"2024-10-18 15:59:57-04:00 584.596249 1960.0 900.0 \n",
|
|
||||||
"2024-10-18 15:59:58-04:00 584.592217 2859.0 3921.0 \n",
|
|
||||||
"2024-10-18 15:59:59-04:00 584.583131 5303.0 1980.0 \n",
|
|
||||||
"\n",
|
|
||||||
"[3384529 rows x 10 columns]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 7,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
|
"#loading manually range subset from existing files\n",
|
||||||
"start = zoneNY.localize(datetime(2024, 1, 15, 9, 30))\n",
|
"start = zoneNY.localize(datetime(2024, 1, 15, 9, 30))\n",
|
||||||
"end = zoneNY.localize(datetime(2024, 10, 20, 16, 00))\n",
|
"end = zoneNY.localize(datetime(2024, 10, 20, 16, 00))\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -800,6 +385,121 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"ohlcv_df"
|
"ohlcv_df"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"TTOOLS: Loaded env variables from file /Users/davidbrazda/Documents/Development/python/.env\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"from ttools.loaders import fetch_daily_stock_trades, fetch_trades_parallel\n",
|
||||||
|
"from ttools.utils import zoneNY\n",
|
||||||
|
"from datetime import datetime"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Fetching trades for whole range"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"SPY Contains 46 market days\n",
|
||||||
|
"SPY All 46 split files loaded in 10.521624088287354 seconds\n",
|
||||||
|
"Trimming 2024-01-16 09:30:00-05:00 2024-03-20 16:00:00-04:00\n",
|
||||||
|
"excluding ['C', 'O', '4', 'B', '7', 'V', 'P', 'W', 'U', 'Z', 'F', '9', 'M', '6']\n",
|
||||||
|
"exclude done\n",
|
||||||
|
"minsize 100\n",
|
||||||
|
"minsize done\n",
|
||||||
|
"SPY filtered\n",
|
||||||
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||||
|
"DatetimeIndex: 6513606 entries, 2024-01-16 09:30:00.001443-05:00 to 2024-03-20 15:59:59.992808-04:00\n",
|
||||||
|
"Data columns (total 6 columns):\n",
|
||||||
|
" # Column Dtype \n",
|
||||||
|
"--- ------ ----- \n",
|
||||||
|
" 0 x object \n",
|
||||||
|
" 1 p float64\n",
|
||||||
|
" 2 s int64 \n",
|
||||||
|
" 3 i int64 \n",
|
||||||
|
" 4 c object \n",
|
||||||
|
" 5 z object \n",
|
||||||
|
"dtypes: float64(1), int64(2), object(3)\n",
|
||||||
|
"memory usage: 347.9+ MB\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"#fethcing one day\n",
|
||||||
|
"# df = fetch_daily_stock_trades(symbol=\"SPY\",\n",
|
||||||
|
"# start=zoneNY.localize(datetime(2024, 1, 16, 9, 30)),\n",
|
||||||
|
"# end=zoneNY.localize(datetime(2024, 1, 16, 16, 00)))\n",
|
||||||
|
"# df.info()\n",
|
||||||
|
"\n",
|
||||||
|
"#fetching multiple days with parallel\n",
|
||||||
|
"df = fetch_trades_parallel(symbol=\"SPY\",\n",
|
||||||
|
" start_date=zoneNY.localize(datetime(2024, 1, 16, 9, 30)),\n",
|
||||||
|
" end_date=zoneNY.localize(datetime(2024, 3, 20, 16, 00)))\n",
|
||||||
|
"\n",
|
||||||
|
"df.info()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df.info()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"True"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"#comparing dataframes\n",
|
||||||
|
"from ttools.utils import AGG_CACHE, compare_dataframes\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"file1 = AGG_CACHE / \"SPY-AggType.OHLCV-1-2024-02-15T09-30-00-2024-10-20T16-00-00-4679BCFMOPUVWZ-100-False.parquet\"\n",
|
||||||
|
"file2 = AGG_CACHE / \"SPY-AggType.OHLCV-1-2024-02-15T09-30-00-2024-10-20T16-00-00-4679BCFMOPUVWZ-100-False_older2.parquet\"\n",
|
||||||
|
"df1 = pd.read_parquet(file1)\n",
|
||||||
|
"df2 = pd.read_parquet(file2)\n",
|
||||||
|
"df1.equals(df2)\n",
|
||||||
|
"\n",
|
||||||
|
"#compare_dataframes(df1, df2)"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
@ -10,8 +10,80 @@ Includes fetch (remote/cached) methods and numba aggregator function for TIME BA
|
|||||||
|
|
||||||
"""""
|
"""""
|
||||||
|
|
||||||
|
def aggregate_trades_optimized(symbol: str, trades_df: pd.DataFrame, resolution: int, type: AggType = AggType.OHLCV, clear_input: bool = False):
|
||||||
|
"""
|
||||||
|
Optimized version of trade aggregation function with reduced memory footprint.
|
||||||
|
"""
|
||||||
|
# 1. Get timestamps from index if 't' is not in columns
|
||||||
|
if 't' not in trades_df.columns:
|
||||||
|
timestamps = trades_df.index.values
|
||||||
|
else:
|
||||||
|
timestamps = trades_df['t'].values
|
||||||
|
|
||||||
|
# 2. Select only needed columns for prices and sizes
|
||||||
|
prices = trades_df['p'].values
|
||||||
|
sizes = trades_df['s'].values
|
||||||
|
|
||||||
|
#Clears input to freeup memory
|
||||||
|
if clear_input:
|
||||||
|
del trades_df
|
||||||
|
|
||||||
|
# 3. Convert timestamps maintaining exact precision
|
||||||
|
# Convert directly to int64 nanoseconds, then to float seconds
|
||||||
|
unix_timestamps_s = timestamps.view('int64').astype(np.float64) / 1e6
|
||||||
|
#original not optimized, in case of issues (5x slower)
|
||||||
|
#unix_timestamps_s = timestamps.astype('datetime64[ns]').astype(np.float64) / 1e9
|
||||||
|
|
||||||
|
# 4. Create ticks array efficiently
|
||||||
|
# 3. Pre-allocate array for better memory efficiency
|
||||||
|
ticks = np.empty((len(timestamps), 3), dtype=np.float64)
|
||||||
|
ticks[:, 0] = unix_timestamps_s
|
||||||
|
ticks[:, 1] = prices
|
||||||
|
ticks[:, 2] = sizes
|
||||||
|
|
||||||
|
# 5. Clear memory of intermediate objects
|
||||||
|
del timestamps, prices, sizes, unix_timestamps_s
|
||||||
|
|
||||||
|
# 6. Process based on type using existing pattern
|
||||||
|
try:
|
||||||
|
match type:
|
||||||
|
case AggType.OHLCV:
|
||||||
|
ohlcv_bars = generate_time_bars_nb(ticks, resolution)
|
||||||
|
columns = ['time', 'open', 'high', 'low', 'close', 'volume', 'trades',
|
||||||
|
'updated', 'vwap', 'buyvolume', 'sellvolume']
|
||||||
|
case AggType.OHLCV_VOL:
|
||||||
|
ohlcv_bars = generate_volume_bars_nb(ticks, resolution)
|
||||||
|
columns = ['time', 'open', 'high', 'low', 'close', 'volume', 'trades',
|
||||||
|
'updated', 'buyvolume', 'sellvolume']
|
||||||
|
case AggType.OHLCV_DOL:
|
||||||
|
ohlcv_bars = generate_dollar_bars_nb(ticks, resolution)
|
||||||
|
columns = ['time', 'open', 'high', 'low', 'close', 'volume', 'trades',
|
||||||
|
'amount', 'updated']
|
||||||
|
case _:
|
||||||
|
raise ValueError("Invalid AggType type. Supported types are 'time', 'volume' and 'dollar'.")
|
||||||
|
finally:
|
||||||
|
# 7. Clear large numpy array as soon as possible
|
||||||
|
del ticks
|
||||||
|
|
||||||
|
# 8. Create DataFrame and handle timestamps - keeping original working approach
|
||||||
|
ohlcv_df = pd.DataFrame(ohlcv_bars, columns=columns)
|
||||||
|
del ohlcv_bars
|
||||||
|
|
||||||
|
# 9. Use the original timestamp handling that we know works
|
||||||
|
ohlcv_df['time'] = pd.to_datetime(ohlcv_df['time'], unit='s').dt.tz_localize('UTC').dt.tz_convert(zoneNY)
|
||||||
|
ohlcv_df['updated'] = pd.to_datetime(ohlcv_df['updated'], unit="s").dt.tz_localize('UTC').dt.tz_convert(zoneNY)
|
||||||
|
|
||||||
|
# 10. Round microseconds as in original
|
||||||
|
ohlcv_df['updated'] = ohlcv_df['updated'].dt.round('us')
|
||||||
|
|
||||||
|
# 11. Set index last, as in original
|
||||||
|
ohlcv_df.set_index('time', inplace=True)
|
||||||
|
|
||||||
|
return ohlcv_df
|
||||||
|
|
||||||
def aggregate_trades(symbol: str, trades_df: pd.DataFrame, resolution: int, type: AggType = AggType.OHLCV):
|
def aggregate_trades(symbol: str, trades_df: pd.DataFrame, resolution: int, type: AggType = AggType.OHLCV):
|
||||||
""""
|
""""
|
||||||
|
Original replaced by optimized version
|
||||||
Accepts dataframe with trades keyed by symbol. Preparess dataframe to
|
Accepts dataframe with trades keyed by symbol. Preparess dataframe to
|
||||||
numpy and calls Numba optimized aggregator for given bar type. (time/volume/dollar)
|
numpy and calls Numba optimized aggregator for given bar type. (time/volume/dollar)
|
||||||
"""""
|
"""""
|
||||||
|
|||||||
@ -17,8 +17,14 @@ from ttools.utils import AggType, fetch_calendar_data, print, print_matching_fil
|
|||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import threading
|
import threading
|
||||||
from typing import List, Union
|
from typing import List, Union
|
||||||
from ttools.aggregator_vectorized import aggregate_trades
|
from ttools.aggregator_vectorized import aggregate_trades, aggregate_trades_optimized
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import pyarrow.dataset as ds
|
||||||
|
import pandas as pd
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
import math
|
||||||
|
import os
|
||||||
"""
|
"""
|
||||||
Module for fetching stock data. Supports
|
Module for fetching stock data. Supports
|
||||||
1) cache management
|
1) cache management
|
||||||
@ -87,6 +93,8 @@ def convert_dict_to_multiindex_df(tradesResponse, rename_labels = True, keep_sym
|
|||||||
final_df.reset_index(inplace=True) # Reset index to remove MultiIndex levels, making them columns
|
final_df.reset_index(inplace=True) # Reset index to remove MultiIndex levels, making them columns
|
||||||
final_df.drop(columns=['symbol'], inplace=True) #remove symbol column
|
final_df.drop(columns=['symbol'], inplace=True) #remove symbol column
|
||||||
final_df.set_index(timestamp_col, inplace=True) #reindex by timestamp
|
final_df.set_index(timestamp_col, inplace=True) #reindex by timestamp
|
||||||
|
#print index datetime resolution
|
||||||
|
#print(final_df.index.dtype)
|
||||||
|
|
||||||
return final_df
|
return final_df
|
||||||
|
|
||||||
@ -106,6 +114,28 @@ def filter_trade_df(df: pd.DataFrame, start: datetime = None, end: datetime = No
|
|||||||
Returns:
|
Returns:
|
||||||
df: pd.DataFrame
|
df: pd.DataFrame
|
||||||
"""
|
"""
|
||||||
|
def fast_filter(df, exclude_conditions):
|
||||||
|
# Convert arrays to strings once
|
||||||
|
str_series = df['c'].apply(lambda x: ','.join(x))
|
||||||
|
|
||||||
|
# Create mask using vectorized string operations
|
||||||
|
mask = np.zeros(len(df), dtype=bool)
|
||||||
|
for cond in exclude_conditions:
|
||||||
|
mask |= str_series.str.contains(cond, regex=False)
|
||||||
|
|
||||||
|
# Apply filter
|
||||||
|
return df[~mask]
|
||||||
|
|
||||||
|
def vectorized_string_sets(df, exclude_conditions):
|
||||||
|
# Convert exclude_conditions to set for O(1) lookup
|
||||||
|
exclude_set = set(exclude_conditions)
|
||||||
|
|
||||||
|
# Vectorized operation using sets intersection
|
||||||
|
arrays = df['c'].values
|
||||||
|
mask = np.array([bool(set(arr) & exclude_set) for arr in arrays])
|
||||||
|
|
||||||
|
return df[~mask]
|
||||||
|
|
||||||
# 9:30 to 16:00
|
# 9:30 to 16:00
|
||||||
if main_session_only:
|
if main_session_only:
|
||||||
|
|
||||||
@ -120,30 +150,50 @@ def filter_trade_df(df: pd.DataFrame, start: datetime = None, end: datetime = No
|
|||||||
#REQUIRED FILTERING
|
#REQUIRED FILTERING
|
||||||
# Create a mask to filter rows within the specified time range
|
# Create a mask to filter rows within the specified time range
|
||||||
if start is not None and end is not None:
|
if start is not None and end is not None:
|
||||||
print(f"filtering {start.time()} {end.time()}")
|
print(f"Trimming {start} {end}")
|
||||||
if symbol_included:
|
if symbol_included:
|
||||||
mask = (df.index.get_level_values('t') >= start) & \
|
mask = (df.index.get_level_values('t') >= start) & \
|
||||||
(df.index.get_level_values('t') <= end)
|
(df.index.get_level_values('t') <= end)
|
||||||
|
df = df[mask]
|
||||||
else:
|
else:
|
||||||
mask = (df.index >= start) & (df.index <= end)
|
df = df.loc[start:end]
|
||||||
|
|
||||||
# Apply the mask to the DataFrame
|
|
||||||
df = df[mask]
|
|
||||||
|
|
||||||
if exclude_conditions is not None:
|
if exclude_conditions is not None:
|
||||||
print(f"excluding {exclude_conditions}")
|
print(f"excluding {exclude_conditions}")
|
||||||
# Create a mask to exclude rows with any of the specified conditions
|
df = vectorized_string_sets(df, exclude_conditions)
|
||||||
mask = df['c'].apply(lambda x: any(cond in exclude_conditions for cond in x))
|
print("exclude done")
|
||||||
|
|
||||||
# Filter out the rows with specified conditions
|
|
||||||
df = df[~mask]
|
|
||||||
|
|
||||||
if minsize is not None:
|
if minsize is not None:
|
||||||
print(f"minsize {minsize}")
|
print(f"minsize {minsize}")
|
||||||
#exclude conditions
|
#exclude conditions
|
||||||
df = df[df['s'] >= minsize]
|
df = df[df['s'] >= minsize]
|
||||||
|
print("minsize done")
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
def calculate_optimal_workers(file_count, min_workers=4, max_workers=32):
|
||||||
|
"""
|
||||||
|
Calculate optimal number of workers based on file count and system resources
|
||||||
|
|
||||||
|
Rules of thumb:
|
||||||
|
- Minimum of 4 workers to ensure parallelization
|
||||||
|
- Maximum of 32 workers to avoid thread overhead
|
||||||
|
- For 100 files, aim for around 16-24 workers
|
||||||
|
- Scale with CPU count but don't exceed max_workers
|
||||||
|
"""
|
||||||
|
cpu_count = os.cpu_count() or 4
|
||||||
|
|
||||||
|
# Base calculation: 2-4x CPU count for I/O bound tasks
|
||||||
|
suggested_workers = cpu_count * 3
|
||||||
|
|
||||||
|
# Scale based on file count (1 worker per 4-6 files is a good ratio)
|
||||||
|
files_based_workers = math.ceil(file_count / 5)
|
||||||
|
|
||||||
|
# Take the smaller of the two suggestions
|
||||||
|
optimal_workers = min(suggested_workers, files_based_workers)
|
||||||
|
|
||||||
|
# Clamp between min and max workers
|
||||||
|
return max(min_workers, min(optimal_workers, max_workers))
|
||||||
|
|
||||||
def fetch_daily_stock_trades(symbol, start, end, exclude_conditions=None, minsize=None, main_session_only=True, no_return=False,force_remote=False, rename_labels = False, keep_symbols=False, max_retries=5, backoff_factor=1, data_feed: DataFeed = DataFeed.SIP, verbose = None):
|
def fetch_daily_stock_trades(symbol, start, end, exclude_conditions=None, minsize=None, main_session_only=True, no_return=False,force_remote=False, rename_labels = False, keep_symbols=False, max_retries=5, backoff_factor=1, data_feed: DataFeed = DataFeed.SIP, verbose = None):
|
||||||
#doc for this function
|
#doc for this function
|
||||||
"""
|
"""
|
||||||
@ -281,7 +331,12 @@ def fetch_trades_parallel(symbol, start_date, end_date, exclude_conditions = EXC
|
|||||||
#speed it up , locals first and then fetches
|
#speed it up , locals first and then fetches
|
||||||
s_time = timetime()
|
s_time = timetime()
|
||||||
with trade_cache_lock:
|
with trade_cache_lock:
|
||||||
local_df = pd.concat([pd.read_parquet(f) for _,f in days_from_cache])
|
file_paths = [f for _, f in days_from_cache]
|
||||||
|
dataset = ds.dataset(file_paths, format='parquet')
|
||||||
|
local_df = dataset.to_table().to_pandas()
|
||||||
|
del dataset
|
||||||
|
#original version
|
||||||
|
#local_df = pd.concat([pd.read_parquet(f) for _,f in days_from_cache])
|
||||||
final_time = timetime() - s_time
|
final_time = timetime() - s_time
|
||||||
print(f"{symbol} All {len(days_from_cache)} split files loaded in", final_time, "seconds")
|
print(f"{symbol} All {len(days_from_cache)} split files loaded in", final_time, "seconds")
|
||||||
#the filter is required
|
#the filter is required
|
||||||
@ -413,7 +468,7 @@ def load_data(symbol: Union[str, List[str]],
|
|||||||
else:
|
else:
|
||||||
#neslo by zrychlit, kdyz se zobrazuje pomalu Searching cache - nejaky bottle neck?
|
#neslo by zrychlit, kdyz se zobrazuje pomalu Searching cache - nejaky bottle neck?
|
||||||
df = fetch_trades_parallel(symbol, start_date, end_date, minsize=minsize, exclude_conditions=exclude_conditions, main_session_only=main_session_only, force_remote=force_remote) #exclude_conditions=['C','O','4','B','7','V','P','W','U','Z','F'])
|
df = fetch_trades_parallel(symbol, start_date, end_date, minsize=minsize, exclude_conditions=exclude_conditions, main_session_only=main_session_only, force_remote=force_remote) #exclude_conditions=['C','O','4','B','7','V','P','W','U','Z','F'])
|
||||||
ohlcv_df = aggregate_trades(symbol=symbol, trades_df=df, resolution=resolution, type=agg_type)
|
ohlcv_df = aggregate_trades_optimized(symbol=symbol, trades_df=df, resolution=resolution, type=agg_type, clear_input = True)
|
||||||
|
|
||||||
ohlcv_df.to_parquet(file_ohlcv, engine='pyarrow')
|
ohlcv_df.to_parquet(file_ohlcv, engine='pyarrow')
|
||||||
print(f"{symbol} Saved to agg_cache", file_ohlcv)
|
print(f"{symbol} Saved to agg_cache", file_ohlcv)
|
||||||
|
|||||||
145
ttools/utils.py
145
ttools/utils.py
@ -273,4 +273,147 @@ class StartBarAlign(str, Enum):
|
|||||||
RANDOM = first bar starts when first trade occurs
|
RANDOM = first bar starts when first trade occurs
|
||||||
"""
|
"""
|
||||||
ROUND = "round"
|
ROUND = "round"
|
||||||
RANDOM = "random"
|
RANDOM = "random"
|
||||||
|
|
||||||
|
def compare_dataframes(df1, df2, name1="DataFrame 1", name2="DataFrame 2", check_dtype=True):
|
||||||
|
"""
|
||||||
|
Compare two DataFrames and provide detailed analysis of their differences.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
df1, df2 : pandas.DataFrame
|
||||||
|
The DataFrames to compare
|
||||||
|
name1, name2 : str
|
||||||
|
Names to identify the DataFrames in the output
|
||||||
|
check_dtype : bool
|
||||||
|
Whether to check if dtypes match for columns
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
bool
|
||||||
|
True if DataFrames are identical (based on check_dtype parameter)
|
||||||
|
dict
|
||||||
|
Detailed comparison results
|
||||||
|
"""
|
||||||
|
results = {
|
||||||
|
'are_equal': False,
|
||||||
|
'shape_match': False,
|
||||||
|
'column_match': False,
|
||||||
|
'index_match': False,
|
||||||
|
'dtype_match': False,
|
||||||
|
'content_match': False,
|
||||||
|
'differences': {}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Shape comparison
|
||||||
|
if df1.shape != df2.shape:
|
||||||
|
results['differences']['shape'] = {
|
||||||
|
name1: df1.shape,
|
||||||
|
name2: df2.shape
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
results['shape_match'] = True
|
||||||
|
|
||||||
|
# Column comparison
|
||||||
|
cols1 = set(df1.columns)
|
||||||
|
cols2 = set(df2.columns)
|
||||||
|
if cols1 != cols2:
|
||||||
|
results['differences']['columns'] = {
|
||||||
|
f'unique_to_{name1}': list(cols1 - cols2),
|
||||||
|
f'unique_to_{name2}': list(cols2 - cols1),
|
||||||
|
'common': list(cols1 & cols2)
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
results['column_match'] = True
|
||||||
|
|
||||||
|
# Index comparison
|
||||||
|
idx1 = set(df1.index)
|
||||||
|
idx2 = set(df2.index)
|
||||||
|
if idx1 != idx2:
|
||||||
|
results['differences']['index'] = {
|
||||||
|
f'unique_to_{name1}': list(idx1 - idx2),
|
||||||
|
f'unique_to_{name2}': list(idx2 - idx1),
|
||||||
|
'common': list(idx1 & idx2)
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
results['index_match'] = True
|
||||||
|
|
||||||
|
# dtype comparison
|
||||||
|
if check_dtype and results['column_match']:
|
||||||
|
dtype_diff = {}
|
||||||
|
for col in cols1:
|
||||||
|
if df1[col].dtype != df2[col].dtype:
|
||||||
|
dtype_diff[col] = {
|
||||||
|
name1: str(df1[col].dtype),
|
||||||
|
name2: str(df2[col].dtype)
|
||||||
|
}
|
||||||
|
if dtype_diff:
|
||||||
|
results['differences']['dtypes'] = dtype_diff
|
||||||
|
else:
|
||||||
|
results['dtype_match'] = True
|
||||||
|
|
||||||
|
# Content comparison (only for matching columns and indices)
|
||||||
|
if results['column_match'] and results['index_match']:
|
||||||
|
common_cols = list(cols1)
|
||||||
|
common_idx = list(idx1)
|
||||||
|
|
||||||
|
value_diff = {}
|
||||||
|
for col in common_cols:
|
||||||
|
# Compare values
|
||||||
|
if not df1[col].equals(df2[col]):
|
||||||
|
# Find specific differences
|
||||||
|
mask = df1[col] != df2[col]
|
||||||
|
if any(mask):
|
||||||
|
diff_indices = df1.index[mask]
|
||||||
|
value_diff[col] = {
|
||||||
|
'different_at_indices': list(diff_indices),
|
||||||
|
'sample_differences': {
|
||||||
|
str(idx): {
|
||||||
|
name1: df1.loc[idx, col],
|
||||||
|
name2: df2.loc[idx, col]
|
||||||
|
} for idx in list(diff_indices)[:5] # Show first 5 differences
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if value_diff:
|
||||||
|
results['differences']['values'] = value_diff
|
||||||
|
else:
|
||||||
|
results['content_match'] = True
|
||||||
|
|
||||||
|
# Overall equality
|
||||||
|
results['are_equal'] = all([
|
||||||
|
results['shape_match'],
|
||||||
|
results['column_match'],
|
||||||
|
results['index_match'],
|
||||||
|
results['content_match'],
|
||||||
|
(results['dtype_match'] if check_dtype else True)
|
||||||
|
])
|
||||||
|
|
||||||
|
# Print summary
|
||||||
|
print(f"\nComparison Summary of {name1} vs {name2}:")
|
||||||
|
print(f"Shape Match: {results['shape_match']} ({df1.shape} vs {df2.shape})")
|
||||||
|
print(f"Column Match: {results['column_match']}")
|
||||||
|
print(f"Index Match: {results['index_match']}")
|
||||||
|
print(f"Dtype Match: {results['dtype_match']}" if check_dtype else "Dtype Check: Skipped")
|
||||||
|
print(f"Content Match: {results['content_match']}")
|
||||||
|
print(f"\nOverall Equal: {results['are_equal']}")
|
||||||
|
|
||||||
|
# Print detailed differences if any
|
||||||
|
if not results['are_equal']:
|
||||||
|
print("\nDetailed Differences:")
|
||||||
|
for diff_type, diff_content in results['differences'].items():
|
||||||
|
print(f"\n{diff_type.upper()}:")
|
||||||
|
if diff_type == 'values':
|
||||||
|
print(f"Number of columns with differences: {len(diff_content)}")
|
||||||
|
for col, details in diff_content.items():
|
||||||
|
print(f"\nColumn '{col}':")
|
||||||
|
print(f"Number of different values: {len(details['different_at_indices'])}")
|
||||||
|
print("First few differences:")
|
||||||
|
for idx, vals in details['sample_differences'].items():
|
||||||
|
print(f" At index {idx}:")
|
||||||
|
print(f" {name1}: {vals[name1]}")
|
||||||
|
print(f" {name2}: {vals[name2]}")
|
||||||
|
else:
|
||||||
|
print(diff_content)
|
||||||
|
|
||||||
|
return results['are_equal'], results
|
||||||
|
|||||||
Reference in New Issue
Block a user