{ "cells": [ { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import pyarrow\n", "import numpy as np\n", "from numba import jit\n", "import v2realbot.utils.config_handler as cfh" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Další info k pokračování je zde https://blog.quantinsti.com/tick-tick-ohlc-data-pandas-tutorial/" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "DatetimeIndex: 190261 entries, 2024-04-22 13:30:00.267711+00:00 to 2024-04-22 19:59:59.987614+00:00\n", "Data columns (total 6 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 exchange 190261 non-null object \n", " 1 price 190261 non-null float64\n", " 2 size 190261 non-null float64\n", " 3 id 190261 non-null int64 \n", " 4 conditions 190261 non-null object \n", " 5 tape 190261 non-null object \n", "dtypes: float64(2), int64(1), object(3)\n", "memory usage: 10.2+ MB\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
exchangepricesizeidconditionstape
timestamp
2024-04-22 13:30:00.267711+00:00K36.8905.052983525037630[ , F, I]A
2024-04-22 13:30:00.300501+00:00D37.0051.071675241117014[ , I]A
2024-04-22 13:30:00.305439+00:00D37.0051.071675241117496[ , I]A
2024-04-22 13:30:00.314520+00:00D37.0051.071675241118034[ , I]A
2024-04-22 13:30:00.335201+00:00D37.0051.071675241121369[ , I]A
.....................
2024-04-22 19:59:59.902614+00:00V37.7501100.056480705310575[ ]A
2024-04-22 19:59:59.977134+00:00N37.745300.052983559963478[ ]A
2024-04-22 19:59:59.977137+00:00N37.7407300.052983559963696[ ]A
2024-04-22 19:59:59.978626+00:00V37.75016.056480706886228[ , I]A
2024-04-22 19:59:59.987614+00:00N37.74530.052983559963958[ , I]A
\n", "

190261 rows × 6 columns

\n", "
" ], "text/plain": [ " exchange price size id \\\n", "timestamp \n", "2024-04-22 13:30:00.267711+00:00 K 36.890 5.0 52983525037630 \n", "2024-04-22 13:30:00.300501+00:00 D 37.005 1.0 71675241117014 \n", "2024-04-22 13:30:00.305439+00:00 D 37.005 1.0 71675241117496 \n", "2024-04-22 13:30:00.314520+00:00 D 37.005 1.0 71675241118034 \n", "2024-04-22 13:30:00.335201+00:00 D 37.005 1.0 71675241121369 \n", "... ... ... ... ... \n", "2024-04-22 19:59:59.902614+00:00 V 37.750 1100.0 56480705310575 \n", "2024-04-22 19:59:59.977134+00:00 N 37.745 300.0 52983559963478 \n", "2024-04-22 19:59:59.977137+00:00 N 37.740 7300.0 52983559963696 \n", "2024-04-22 19:59:59.978626+00:00 V 37.750 16.0 56480706886228 \n", "2024-04-22 19:59:59.987614+00:00 N 37.745 30.0 52983559963958 \n", "\n", " conditions tape \n", "timestamp \n", "2024-04-22 13:30:00.267711+00:00 [ , F, I] A \n", "2024-04-22 13:30:00.300501+00:00 [ , I] A \n", "2024-04-22 13:30:00.305439+00:00 [ , I] A \n", "2024-04-22 13:30:00.314520+00:00 [ , I] A \n", "2024-04-22 13:30:00.335201+00:00 [ , I] A \n", "... ... ... \n", "2024-04-22 19:59:59.902614+00:00 [ ] A \n", "2024-04-22 19:59:59.977134+00:00 [ ] A \n", "2024-04-22 19:59:59.977137+00:00 [ ] A \n", "2024-04-22 19:59:59.978626+00:00 [ , I] A \n", "2024-04-22 19:59:59.987614+00:00 [ , I] A \n", "\n", "[190261 rows x 6 columns]" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tdf=pd.read_parquet('trades_bac.parquet',engine='pyarrow')\n", "#print(df)\n", "df = tdf.loc['BAC']\n", "df.info()\n", "df" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "@jit(nopython=True)\n", "def ohlcv_bars(ticks, start_time, end_time, resolution):\n", " \"\"\"\n", " Generate OHLCV bars from tick data, skipping intervals without trading activity.\n", " \n", " Parameters:\n", " - ticks: numpy array with columns [timestamp, price, size]\n", " - start_time: the start timestamp for bars (Unix timestamp)\n", " - end_time: the end timestamp for bars (Unix timestamp)\n", " - resolution: time resolution in seconds\n", " \n", " Returns:\n", " - OHLCV bars as a numpy array\n", " \"\"\"\n", " num_bars = (end_time - start_time) // resolution + 1\n", " bar_list = []\n", "\n", " for i in range(num_bars):\n", " bar_start_time = start_time + i * resolution\n", " bar_end_time = bar_start_time + resolution\n", " bar_ticks = ticks[(ticks[:, 0] >= bar_start_time) & (ticks[:, 0] < bar_end_time)]\n", " \n", " if bar_ticks.shape[0] == 0:\n", " continue # Skip this bar as there are no ticks\n", "\n", " # Calculate OHLCV values\n", " open_price = bar_ticks[0, 1] # open\n", " high_price = np.max(bar_ticks[:, 1]) # high\n", " low_price = np.min(bar_ticks[:, 1]) # low\n", " close_price = bar_ticks[-1, 1] # close\n", " volume = np.sum(bar_ticks[:, 2]) # volume\n", " bar_time = bar_start_time # timestamp for the bar\n", "\n", " bar_list.append([open_price, high_price, low_price, close_price, volume, bar_time])\n", "\n", " # Convert list to numpy array\n", " if bar_list:\n", " ohlcv = np.array(bar_list)\n", " else:\n", " ohlcv = np.empty((0, 6)) # return an empty array if no bars were created\n", "\n", " return ohlcv\n" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "DatetimeIndex: 190261 entries, 2024-04-22 13:30:00.267711+00:00 to 2024-04-22 19:59:59.987614+00:00\n", "Data columns (total 6 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 exchange 190261 non-null object \n", " 1 price 190261 non-null float64\n", " 2 size 190261 non-null float64\n", " 3 id 190261 non-null int64 \n", " 4 conditions 190261 non-null object \n", " 5 tape 190261 non-null object \n", "dtypes: float64(2), int64(1), object(3)\n", "memory usage: 10.2+ MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['C', 'O', '4', 'B', '7', 'V', 'P', 'W', 'U', 'Z', 'F']\n", "\n", "DatetimeIndex: 143751 entries, 2024-04-22 13:30:00.300501+00:00 to 2024-04-22 19:59:59.987614+00:00\n", "Data columns (total 6 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 exchange 143751 non-null object \n", " 1 price 143751 non-null float64\n", " 2 size 143751 non-null float64\n", " 3 id 143751 non-null int64 \n", " 4 conditions 143751 non-null object \n", " 5 tape 143751 non-null object \n", "dtypes: float64(2), int64(1), object(3)\n", "memory usage: 7.7+ MB\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
exchangepricesizeidconditionstape
timestamp
2024-04-22 13:30:00.300501+00:00D37.0051.071675241117014[ , I]A
2024-04-22 13:30:00.305439+00:00D37.0051.071675241117496[ , I]A
2024-04-22 13:30:00.314520+00:00D37.0051.071675241118034[ , I]A
2024-04-22 13:30:00.335201+00:00D37.0051.071675241121369[ , I]A
2024-04-22 13:30:00.346219+00:00D37.0051.071675241122389[ , I]A
.....................
2024-04-22 19:59:59.902614+00:00V37.7501100.056480705310575[ ]A
2024-04-22 19:59:59.977134+00:00N37.745300.052983559963478[ ]A
2024-04-22 19:59:59.977137+00:00N37.7407300.052983559963696[ ]A
2024-04-22 19:59:59.978626+00:00V37.75016.056480706886228[ , I]A
2024-04-22 19:59:59.987614+00:00N37.74530.052983559963958[ , I]A
\n", "

143751 rows × 6 columns

\n", "
" ], "text/plain": [ " exchange price size id \\\n", "timestamp \n", "2024-04-22 13:30:00.300501+00:00 D 37.005 1.0 71675241117014 \n", "2024-04-22 13:30:00.305439+00:00 D 37.005 1.0 71675241117496 \n", "2024-04-22 13:30:00.314520+00:00 D 37.005 1.0 71675241118034 \n", "2024-04-22 13:30:00.335201+00:00 D 37.005 1.0 71675241121369 \n", "2024-04-22 13:30:00.346219+00:00 D 37.005 1.0 71675241122389 \n", "... ... ... ... ... \n", "2024-04-22 19:59:59.902614+00:00 V 37.750 1100.0 56480705310575 \n", "2024-04-22 19:59:59.977134+00:00 N 37.745 300.0 52983559963478 \n", "2024-04-22 19:59:59.977137+00:00 N 37.740 7300.0 52983559963696 \n", "2024-04-22 19:59:59.978626+00:00 V 37.750 16.0 56480706886228 \n", "2024-04-22 19:59:59.987614+00:00 N 37.745 30.0 52983559963958 \n", "\n", " conditions tape \n", "timestamp \n", "2024-04-22 13:30:00.300501+00:00 [ , I] A \n", "2024-04-22 13:30:00.305439+00:00 [ , I] A \n", "2024-04-22 13:30:00.314520+00:00 [ , I] A \n", "2024-04-22 13:30:00.335201+00:00 [ , I] A \n", "2024-04-22 13:30:00.346219+00:00 [ , I] A \n", "... ... ... \n", "2024-04-22 19:59:59.902614+00:00 [ ] A \n", "2024-04-22 19:59:59.977134+00:00 [ ] A \n", "2024-04-22 19:59:59.977137+00:00 [ ] A \n", "2024-04-22 19:59:59.978626+00:00 [ , I] A \n", "2024-04-22 19:59:59.987614+00:00 [ , I] A \n", "\n", "[143751 rows x 6 columns]" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "excludes = cfh.config_handler.get_val('AGG_EXCLUDED_TRADES')\n", "print(excludes)\n", "#excludes = [\"F\", \"I\"]\n", "# FILTER EXCLUDED TRADES\n", "# Filter rows to exclude those where 'conditions' contains 'F' or 'I'\n", "# This simplifies the logic by directly using ~ (bitwise not operator) with np.isin\n", "df = df[~df['conditions'].apply(lambda x: np.isin(x, excludes).any())]" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/8p/dwqnp65s0s77jdbm4_6z4vp80000gn/T/ipykernel_52602/3341929382.py:2: DeprecationWarning: parsing timezone aware datetimes is deprecated; this will raise an error in the future\n", " structured_array = np.array(list(zip(df.index, df['price'], df['size'])),\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[('2024-04-22T13:30:00.300501000', 37.005, 1.0e+00)\n", " ('2024-04-22T13:30:00.305439000', 37.005, 1.0e+00)\n", " ('2024-04-22T13:30:00.314520000', 37.005, 1.0e+00) ...\n", " ('2024-04-22T19:59:59.977137000', 37.74 , 7.3e+03)\n", " ('2024-04-22T19:59:59.978626000', 37.75 , 1.6e+01)\n", " ('2024-04-22T19:59:59.987614000', 37.745, 3.0e+01)]\n" ] }, { "data": { "text/plain": [ "array([('2024-04-22T13:30:00.300501000', 37.005, 1.0e+00),\n", " ('2024-04-22T13:30:00.305439000', 37.005, 1.0e+00),\n", " ('2024-04-22T13:30:00.314520000', 37.005, 1.0e+00), ...,\n", " ('2024-04-22T19:59:59.977137000', 37.74 , 7.3e+03),\n", " ('2024-04-22T19:59:59.978626000', 37.75 , 1.6e+01),\n", " ('2024-04-22T19:59:59.987614000', 37.745, 3.0e+01)],\n", " dtype=[('timestamp', '