diff --git a/setup.py b/setup.py index 26b8fe9..7a82e84 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup( name='ttools', - version='0.7.0', + version='0.7.1', packages=find_packages(), install_requires=[ # list your dependencies here diff --git a/tests/data_loader_tryme.ipynb b/tests/data_loader_tryme.ipynb index 8ad48be..5ff47da 100644 --- a/tests/data_loader_tryme.ipynb +++ b/tests/data_loader_tryme.ipynb @@ -368,9 +368,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'zoneNY' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#loading manually range subset from existing files\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m start \u001b[38;5;241m=\u001b[39m \u001b[43mzoneNY\u001b[49m\u001b[38;5;241m.\u001b[39mlocalize(datetime(\u001b[38;5;241m2024\u001b[39m, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m15\u001b[39m, \u001b[38;5;241m9\u001b[39m, \u001b[38;5;241m30\u001b[39m))\n\u001b[1;32m 3\u001b[0m end \u001b[38;5;241m=\u001b[39m zoneNY\u001b[38;5;241m.\u001b[39mlocalize(datetime(\u001b[38;5;241m2024\u001b[39m, \u001b[38;5;241m10\u001b[39m, \u001b[38;5;241m20\u001b[39m, \u001b[38;5;241m16\u001b[39m, \u001b[38;5;241m00\u001b[39m))\n\u001b[1;32m 5\u001b[0m ohlcv_df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_parquet(\n\u001b[1;32m 6\u001b[0m AGG_CACHE \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSPY-AggType.OHLCV-1-2024-01-15T09-30-00-2024-10-20T16-00-00-4679BCFMOPUVWZ-100-True.parquet\u001b[39m\u001b[38;5;124m\"\u001b[39m, \n\u001b[1;32m 7\u001b[0m engine\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpyarrow\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 8\u001b[0m filters\u001b[38;5;241m=\u001b[39m[(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m>=\u001b[39m\u001b[38;5;124m'\u001b[39m, start), \n\u001b[1;32m 9\u001b[0m (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m<=\u001b[39m\u001b[38;5;124m'\u001b[39m, end)]\n\u001b[1;32m 10\u001b[0m )\n", + "\u001b[0;31mNameError\u001b[0m: name 'zoneNY' is not defined" + ] + } + ], "source": [ "#loading manually range subset from existing files\n", "start = zoneNY.localize(datetime(2024, 1, 15, 9, 30))\n", @@ -422,27 +434,71 @@ "name": "stdout", "output_type": "stream", "text": [ - "SPY Contains 46 market days\n", - "SPY All 46 split files loaded in 10.521624088287354 seconds\n", - "Trimming 2024-01-16 09:30:00-05:00 2024-03-20 16:00:00-04:00\n", - "excluding ['C', 'O', '4', 'B', '7', 'V', 'P', 'W', 'U', 'Z', 'F', '9', 'M', '6']\n", - "exclude done\n", - "minsize 100\n", - "minsize done\n", - "SPY filtered\n", + "BAC Contains 1 market days\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "BAC Remote fetching: 100%|██████████| 1/1 [00:00<00:00, 434.55it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fetching from remote.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "BAC Receiving trades: 0%| | 0/1 [00:00\n", - "DatetimeIndex: 6513606 entries, 2024-01-16 09:30:00.001443-05:00 to 2024-03-20 15:59:59.992808-04:00\n", + "DatetimeIndex: 222754 entries, 2024-01-16 04:00:00.009225-05:00 to 2024-01-16 19:59:48.834830-05:00\n", "Data columns (total 6 columns):\n", - " # Column Dtype \n", - "--- ------ ----- \n", - " 0 x object \n", - " 1 p float64\n", - " 2 s int64 \n", - " 3 i int64 \n", - " 4 c object \n", - " 5 z object \n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 x 222754 non-null object \n", + " 1 p 222754 non-null float64\n", + " 2 s 222754 non-null int64 \n", + " 3 i 222754 non-null int64 \n", + " 4 c 222754 non-null object \n", + " 5 z 222754 non-null object \n", "dtypes: float64(1), int64(2), object(3)\n", - "memory usage: 347.9+ MB\n" + "memory usage: 11.9+ MB\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" ] } ], @@ -456,20 +512,190 @@ "# df.info()\n", "\n", "#fetching multiple days with parallel\n", - "df = fetch_trades_parallel(symbol=\"SPY\",\n", - " start_date=zoneNY.localize(datetime(2024, 1, 16, 9, 30)),\n", - " end_date=zoneNY.localize(datetime(2024, 3, 20, 16, 00)))\n", + "df = fetch_trades_parallel(symbol=\"BAC\",\n", + " start_date=zoneNY.localize(datetime(2024, 1, 16, 0, 0)),\n", + " end_date=zoneNY.localize(datetime(2024, 1, 16, 23, 59)),\n", + " main_session_only=False,\n", + " exclude_conditions=None,\n", + " minsize=None,\n", + " force_remote=True)\n", "\n", "df.info()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xpsicz
t
2024-01-16 04:00:00.009225-05:00K32.800152983525027912[ , T, I]A
2024-01-16 04:00:00.012088-05:00P32.580852983525027890[ , T, I]A
2024-01-16 04:00:02.299262-05:00P32.750152983525027916[ , T, I]A
2024-01-16 04:00:03.895322-05:00P32.640152983525027920[ , T, I]A
2024-01-16 04:00:04.145553-05:00P32.740152983525027921[ , T, I]A
.....................
2024-01-16 18:58:10.081270-05:00D32.1041079371957716549[ , T, I]A
2024-01-16 18:58:11.293971-05:00T32.090362883460503386[ , T, I]A
2024-01-16 18:58:24.511348-05:00D32.110179371957716560[ , T, I]A
2024-01-16 18:58:46.648899-05:00D32.110179371957716786[ , T, I]A
2024-01-16 18:59:54.013894-05:00D32.100171710070428229[ , T, I]A
\n", + "

159301 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " x p s i c z\n", + "t \n", + "2024-01-16 04:00:00.009225-05:00 K 32.800 1 52983525027912 [ , T, I] A\n", + "2024-01-16 04:00:00.012088-05:00 P 32.580 8 52983525027890 [ , T, I] A\n", + "2024-01-16 04:00:02.299262-05:00 P 32.750 1 52983525027916 [ , T, I] A\n", + "2024-01-16 04:00:03.895322-05:00 P 32.640 1 52983525027920 [ , T, I] A\n", + "2024-01-16 04:00:04.145553-05:00 P 32.740 1 52983525027921 [ , T, I] A\n", + "... .. ... .. ... ... ..\n", + "2024-01-16 18:58:10.081270-05:00 D 32.104 10 79371957716549 [ , T, I] A\n", + "2024-01-16 18:58:11.293971-05:00 T 32.090 3 62883460503386 [ , T, I] A\n", + "2024-01-16 18:58:24.511348-05:00 D 32.110 1 79371957716560 [ , T, I] A\n", + "2024-01-16 18:58:46.648899-05:00 D 32.110 1 79371957716786 [ , T, I] A\n", + "2024-01-16 18:59:54.013894-05:00 D 32.100 1 71710070428229 [ , T, I] A\n", + "\n", + "[159301 rows x 6 columns]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "df.info()" + "df" ] }, { @@ -500,6 +726,207 @@ "\n", "#compare_dataframes(df1, df2)" ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from ttools.config import TRADE_CACHE\n", + "import pandas as pd\n", + "file1 = TRADE_CACHE / \"BAC-2024-01-16.parquet\"\n", + "df1 = pd.read_parquet(file1)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xpsicz
t
2024-01-16 04:00:00.009225-05:00K32.80152983525027912[ , T, I]A
2024-01-16 04:00:00.012088-05:00P32.58852983525027890[ , T, I]A
2024-01-16 04:00:00.856156-05:00K32.611452983525028705[ , F, T, I]A
2024-01-16 04:00:02.299262-05:00P32.75152983525027916[ , T, I]A
2024-01-16 04:00:03.895322-05:00P32.64152983525027920[ , T, I]A
.....................
2024-01-16 19:59:24.796862-05:00P32.1250052983576997941[ , T]A
2024-01-16 19:59:24.796868-05:00P32.1250052983576997942[ , T]A
2024-01-16 19:59:24.796868-05:00P32.1250052983576997943[ , T]A
2024-01-16 19:59:24.796871-05:00P32.1250052983576997944[ , T]A
2024-01-16 19:59:48.834830-05:00K32.102552983526941511[ , T, I]A
\n", + "

222754 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " x p s i c \\\n", + "t \n", + "2024-01-16 04:00:00.009225-05:00 K 32.80 1 52983525027912 [ , T, I] \n", + "2024-01-16 04:00:00.012088-05:00 P 32.58 8 52983525027890 [ , T, I] \n", + "2024-01-16 04:00:00.856156-05:00 K 32.61 14 52983525028705 [ , F, T, I] \n", + "2024-01-16 04:00:02.299262-05:00 P 32.75 1 52983525027916 [ , T, I] \n", + "2024-01-16 04:00:03.895322-05:00 P 32.64 1 52983525027920 [ , T, I] \n", + "... .. ... ... ... ... \n", + "2024-01-16 19:59:24.796862-05:00 P 32.12 500 52983576997941 [ , T] \n", + "2024-01-16 19:59:24.796868-05:00 P 32.12 500 52983576997942 [ , T] \n", + "2024-01-16 19:59:24.796868-05:00 P 32.12 500 52983576997943 [ , T] \n", + "2024-01-16 19:59:24.796871-05:00 P 32.12 500 52983576997944 [ , T] \n", + "2024-01-16 19:59:48.834830-05:00 K 32.10 25 52983526941511 [ , T, I] \n", + "\n", + " z \n", + "t \n", + "2024-01-16 04:00:00.009225-05:00 A \n", + "2024-01-16 04:00:00.012088-05:00 A \n", + "2024-01-16 04:00:00.856156-05:00 A \n", + "2024-01-16 04:00:02.299262-05:00 A \n", + "2024-01-16 04:00:03.895322-05:00 A \n", + "... .. \n", + "2024-01-16 19:59:24.796862-05:00 A \n", + "2024-01-16 19:59:24.796868-05:00 A \n", + "2024-01-16 19:59:24.796868-05:00 A \n", + "2024-01-16 19:59:24.796871-05:00 A \n", + "2024-01-16 19:59:48.834830-05:00 A \n", + "\n", + "[222754 rows x 6 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1" + ] } ], "metadata": { diff --git a/tests/test_ntb.ipynb b/tests/test_ntb.ipynb deleted file mode 100644 index 93e81ec..0000000 --- a/tests/test_ntb.ipynb +++ /dev/null @@ -1,69 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - }, - { - "data": { - "text/plain": [ - "['CUVWAP', 'DIVRELN']" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "import vectorbtpro as vbt\n", - "from ttools.vbtindicators import register_custom_inds\n", - "from ttools.indicators import CUVWAP\n", - "\n", - "\n", - "register_custom_inds(None, \"override\")\n", - "#chopiness = vbt.indicator(\"technical:CHOPINESS\").run(s12_data.open, s12_data.high, s12_data.low, s12_data.close, s12_data.volume, window = 100)\n", - "#vwap_cum_roll = vbt.indicator(\"technical:ROLLING_VWAP\").run(s12_data.open, s12_data.high, s12_data.low, s12_data.close, s12_data.volume, window = 100, min_periods = 5)\n", - "#vwap_cum_d = vbt.indicator(\"ttools:CUVWAP\").run(s12_data.high, s12_data.low, s12_data.close, s12_data.volume, anchor=\"D\", drag=50)\n", - "#vwap_lin_angle = vbt.indicator(\"talib:LINEARREG_ANGLE\").run(vwap_cum_d.vwap, timeperiod=2)\n", - "\n", - "vbt.IF.list_indicators(\"ttools\")\n", - "\n", - "\n", - "\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/ttools/aggregator_vectorized.py b/ttools/aggregator_vectorized.py index 2920e4f..31373f7 100644 --- a/ttools/aggregator_vectorized.py +++ b/ttools/aggregator_vectorized.py @@ -29,10 +29,10 @@ def aggregate_trades_optimized(symbol: str, trades_df: pd.DataFrame, resolution: del trades_df # 3. Convert timestamps maintaining exact precision - # Convert directly to int64 nanoseconds, then to float seconds - unix_timestamps_s = timestamps.view('int64').astype(np.float64) / 1e6 + # Convert directly to int64 nanoseconds, then to float seconds - there was a problem + #unix_timestamps_s = timestamps.view('int64').astype(np.float64) / 1e6 #original not optimized, in case of issues (5x slower) - #unix_timestamps_s = timestamps.astype('datetime64[ns]').astype(np.float64) / 1e9 + unix_timestamps_s = timestamps.astype('datetime64[ns]').astype(np.float64) / 1e9 # 4. Create ticks array efficiently # 3. Pre-allocate array for better memory efficiency diff --git a/ttools/loaders.py b/ttools/loaders.py index 0863f83..97f1f07 100644 --- a/ttools/loaders.py +++ b/ttools/loaders.py @@ -1,5 +1,6 @@ from ctypes import Union +from ttools import zoneUTC from ttools.config import * from datetime import datetime from alpaca.data.historical import StockHistoricalDataClient @@ -202,8 +203,8 @@ def fetch_daily_stock_trades(symbol, start, end, exclude_conditions=None, minsiz by using force_remote - forcess using remote data always and thus refreshing cache for these dates Attributes: :param symbol: The stock symbol to fetch trades for. - :param start: The start time for the trade data. - :param end: The end time for the trade data. + :param start: The start time for the trade data, in market timezone. + :param end: The end time for the trade data, in market timezone. :exclude_conditions: list of string conditions to exclude from the data :minsize minimum size of trade to be included in the data :no_return: If True, do not return the DataFrame. Used to prepare cached files. @@ -231,24 +232,34 @@ def fetch_daily_stock_trades(symbol, start, end, exclude_conditions=None, minsiz #exists in cache? daily_file = f"{symbol}-{str(start.date())}.parquet" file_path = TRADE_CACHE / daily_file - if file_path.exists() and (not force_remote or not no_return): + if file_path.exists() and (not force_remote and not no_return): with trade_cache_lock: df = pd.read_parquet(file_path) print("Loaded from CACHE", file_path) df = filter_trade_df(df, start, end, exclude_conditions, minsize, symbol_included=False, main_session_only=main_session_only) return df - day_next = start.date() + timedelta(days=1) + #lets create borders of day in UTC as Alpaca has only UTC date + start_date = start.date() + + # Create min/max times in NY timezone + ny_day_min = zoneNY.localize(datetime.combine(start_date, time.min)) + ny_day_max = zoneNY.localize(datetime.combine(start_date, time.max)) + + # Convert both to UTC + utc_day_min = ny_day_min.astimezone(zoneUTC) + utc_day_max = ny_day_max.astimezone(zoneUTC) print("Fetching from remote.") client = StockHistoricalDataClient(ACCOUNT1_LIVE_API_KEY, ACCOUNT1_LIVE_SECRET_KEY, raw_data=True) - stockTradeRequest = StockTradesRequest(symbol_or_symbols=symbol, start=start.date(), end=day_next, feed=data_feed) + stockTradeRequest = StockTradesRequest(symbol_or_symbols=symbol, start=utc_day_min, end=utc_day_max, feed=data_feed) last_exception = None for attempt in range(max_retries): try: tradesResponse = client.get_stock_trades(stockTradeRequest) - print(f"Remote fetched completed.", start.date(), day_next) + print(f"Remote fetched completed whole day", start.date()) + print(f"Exact UTC range fetched: {utc_day_min} - {utc_day_max}") if not tradesResponse[symbol]: print(f"EMPTY") return pd.DataFrame() @@ -256,7 +267,7 @@ def fetch_daily_stock_trades(symbol, start, end, exclude_conditions=None, minsiz df = convert_dict_to_multiindex_df(tradesResponse, rename_labels=rename_labels, keep_symbols=keep_symbols) #if today is market still open, dont cache - also dont cache for IEX feeed - if datetime.now().astimezone(zoneNY).date() < day_next or data_feed == DataFeed.IEX: + if datetime.now().astimezone(zoneNY).date() < start_date + timedelta(days=1) or data_feed == DataFeed.IEX: print("not saving trade cache, market still open today or IEX datapoint") #ic(datetime.now().astimezone(zoneNY)) #ic(day.open, day.close) @@ -277,7 +288,7 @@ def fetch_daily_stock_trades(symbol, start, end, exclude_conditions=None, minsiz print("All attempts to fetch data failed.") raise ConnectionError(f"Failed to fetch stock trades after {max_retries} retries. Last exception: {str(last_exception)} and {format_exc()}") -def fetch_trades_parallel(symbol, start_date, end_date, exclude_conditions = EXCLUDE_CONDITIONS, minsize = 100, main_session_only = True, force_remote = False, max_workers=None, no_return = False, verbose = None): +def fetch_trades_parallel(symbol, start_date, end_date, exclude_conditions = EXCLUDE_CONDITIONS, minsize = None, main_session_only = True, force_remote = False, max_workers=None, no_return = False, verbose = None): """ Fetch trades between ranges.