621 lines
22 KiB
Plaintext
621 lines
22 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import pyarrow\n",
|
||
"import numpy as np\n",
|
||
"from numba import jit\n",
|
||
"import v2realbot.utils.config_handler as cfh"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Další info k pokračování je zde https://blog.quantinsti.com/tick-tick-ohlc-data-pandas-tutorial/"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"DatetimeIndex: 190261 entries, 2024-04-22 13:30:00.267711+00:00 to 2024-04-22 19:59:59.987614+00:00\n",
|
||
"Data columns (total 6 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 exchange 190261 non-null object \n",
|
||
" 1 price 190261 non-null float64\n",
|
||
" 2 size 190261 non-null float64\n",
|
||
" 3 id 190261 non-null int64 \n",
|
||
" 4 conditions 190261 non-null object \n",
|
||
" 5 tape 190261 non-null object \n",
|
||
"dtypes: float64(2), int64(1), object(3)\n",
|
||
"memory usage: 10.2+ MB\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>exchange</th>\n",
|
||
" <th>price</th>\n",
|
||
" <th>size</th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>conditions</th>\n",
|
||
" <th>tape</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>timestamp</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 13:30:00.267711+00:00</th>\n",
|
||
" <td>K</td>\n",
|
||
" <td>36.890</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>52983525037630</td>\n",
|
||
" <td>[ , F, I]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 13:30:00.300501+00:00</th>\n",
|
||
" <td>D</td>\n",
|
||
" <td>37.005</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>71675241117014</td>\n",
|
||
" <td>[ , I]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 13:30:00.305439+00:00</th>\n",
|
||
" <td>D</td>\n",
|
||
" <td>37.005</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>71675241117496</td>\n",
|
||
" <td>[ , I]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 13:30:00.314520+00:00</th>\n",
|
||
" <td>D</td>\n",
|
||
" <td>37.005</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>71675241118034</td>\n",
|
||
" <td>[ , I]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 13:30:00.335201+00:00</th>\n",
|
||
" <td>D</td>\n",
|
||
" <td>37.005</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>71675241121369</td>\n",
|
||
" <td>[ , I]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 19:59:59.902614+00:00</th>\n",
|
||
" <td>V</td>\n",
|
||
" <td>37.750</td>\n",
|
||
" <td>1100.0</td>\n",
|
||
" <td>56480705310575</td>\n",
|
||
" <td>[ ]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 19:59:59.977134+00:00</th>\n",
|
||
" <td>N</td>\n",
|
||
" <td>37.745</td>\n",
|
||
" <td>300.0</td>\n",
|
||
" <td>52983559963478</td>\n",
|
||
" <td>[ ]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 19:59:59.977137+00:00</th>\n",
|
||
" <td>N</td>\n",
|
||
" <td>37.740</td>\n",
|
||
" <td>7300.0</td>\n",
|
||
" <td>52983559963696</td>\n",
|
||
" <td>[ ]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 19:59:59.978626+00:00</th>\n",
|
||
" <td>V</td>\n",
|
||
" <td>37.750</td>\n",
|
||
" <td>16.0</td>\n",
|
||
" <td>56480706886228</td>\n",
|
||
" <td>[ , I]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 19:59:59.987614+00:00</th>\n",
|
||
" <td>N</td>\n",
|
||
" <td>37.745</td>\n",
|
||
" <td>30.0</td>\n",
|
||
" <td>52983559963958</td>\n",
|
||
" <td>[ , I]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>190261 rows × 6 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" exchange price size id \\\n",
|
||
"timestamp \n",
|
||
"2024-04-22 13:30:00.267711+00:00 K 36.890 5.0 52983525037630 \n",
|
||
"2024-04-22 13:30:00.300501+00:00 D 37.005 1.0 71675241117014 \n",
|
||
"2024-04-22 13:30:00.305439+00:00 D 37.005 1.0 71675241117496 \n",
|
||
"2024-04-22 13:30:00.314520+00:00 D 37.005 1.0 71675241118034 \n",
|
||
"2024-04-22 13:30:00.335201+00:00 D 37.005 1.0 71675241121369 \n",
|
||
"... ... ... ... ... \n",
|
||
"2024-04-22 19:59:59.902614+00:00 V 37.750 1100.0 56480705310575 \n",
|
||
"2024-04-22 19:59:59.977134+00:00 N 37.745 300.0 52983559963478 \n",
|
||
"2024-04-22 19:59:59.977137+00:00 N 37.740 7300.0 52983559963696 \n",
|
||
"2024-04-22 19:59:59.978626+00:00 V 37.750 16.0 56480706886228 \n",
|
||
"2024-04-22 19:59:59.987614+00:00 N 37.745 30.0 52983559963958 \n",
|
||
"\n",
|
||
" conditions tape \n",
|
||
"timestamp \n",
|
||
"2024-04-22 13:30:00.267711+00:00 [ , F, I] A \n",
|
||
"2024-04-22 13:30:00.300501+00:00 [ , I] A \n",
|
||
"2024-04-22 13:30:00.305439+00:00 [ , I] A \n",
|
||
"2024-04-22 13:30:00.314520+00:00 [ , I] A \n",
|
||
"2024-04-22 13:30:00.335201+00:00 [ , I] A \n",
|
||
"... ... ... \n",
|
||
"2024-04-22 19:59:59.902614+00:00 [ ] A \n",
|
||
"2024-04-22 19:59:59.977134+00:00 [ ] A \n",
|
||
"2024-04-22 19:59:59.977137+00:00 [ ] A \n",
|
||
"2024-04-22 19:59:59.978626+00:00 [ , I] A \n",
|
||
"2024-04-22 19:59:59.987614+00:00 [ , I] A \n",
|
||
"\n",
|
||
"[190261 rows x 6 columns]"
|
||
]
|
||
},
|
||
"execution_count": 38,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tdf=pd.read_parquet('trades_bac.parquet',engine='pyarrow')\n",
|
||
"#print(df)\n",
|
||
"df = tdf.loc['BAC']\n",
|
||
"df.info()\n",
|
||
"df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"@jit(nopython=True)\n",
|
||
"def ohlcv_bars(ticks, start_time, end_time, resolution):\n",
|
||
" \"\"\"\n",
|
||
" Generate OHLCV bars from tick data, skipping intervals without trading activity.\n",
|
||
" \n",
|
||
" Parameters:\n",
|
||
" - ticks: numpy array with columns [timestamp, price, size]\n",
|
||
" - start_time: the start timestamp for bars (Unix timestamp)\n",
|
||
" - end_time: the end timestamp for bars (Unix timestamp)\n",
|
||
" - resolution: time resolution in seconds\n",
|
||
" \n",
|
||
" Returns:\n",
|
||
" - OHLCV bars as a numpy array\n",
|
||
" \"\"\"\n",
|
||
" num_bars = (end_time - start_time) // resolution + 1\n",
|
||
" bar_list = []\n",
|
||
"\n",
|
||
" for i in range(num_bars):\n",
|
||
" bar_start_time = start_time + i * resolution\n",
|
||
" bar_end_time = bar_start_time + resolution\n",
|
||
" bar_ticks = ticks[(ticks[:, 0] >= bar_start_time) & (ticks[:, 0] < bar_end_time)]\n",
|
||
" \n",
|
||
" if bar_ticks.shape[0] == 0:\n",
|
||
" continue # Skip this bar as there are no ticks\n",
|
||
"\n",
|
||
" # Calculate OHLCV values\n",
|
||
" open_price = bar_ticks[0, 1] # open\n",
|
||
" high_price = np.max(bar_ticks[:, 1]) # high\n",
|
||
" low_price = np.min(bar_ticks[:, 1]) # low\n",
|
||
" close_price = bar_ticks[-1, 1] # close\n",
|
||
" volume = np.sum(bar_ticks[:, 2]) # volume\n",
|
||
" bar_time = bar_start_time # timestamp for the bar\n",
|
||
"\n",
|
||
" bar_list.append([open_price, high_price, low_price, close_price, volume, bar_time])\n",
|
||
"\n",
|
||
" # Convert list to numpy array\n",
|
||
" if bar_list:\n",
|
||
" ohlcv = np.array(bar_list)\n",
|
||
" else:\n",
|
||
" ohlcv = np.empty((0, 6)) # return an empty array if no bars were created\n",
|
||
"\n",
|
||
" return ohlcv\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"DatetimeIndex: 190261 entries, 2024-04-22 13:30:00.267711+00:00 to 2024-04-22 19:59:59.987614+00:00\n",
|
||
"Data columns (total 6 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 exchange 190261 non-null object \n",
|
||
" 1 price 190261 non-null float64\n",
|
||
" 2 size 190261 non-null float64\n",
|
||
" 3 id 190261 non-null int64 \n",
|
||
" 4 conditions 190261 non-null object \n",
|
||
" 5 tape 190261 non-null object \n",
|
||
"dtypes: float64(2), int64(1), object(3)\n",
|
||
"memory usage: 10.2+ MB\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"df.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"['C', 'O', '4', 'B', '7', 'V', 'P', 'W', 'U', 'Z', 'F']\n",
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"DatetimeIndex: 143751 entries, 2024-04-22 13:30:00.300501+00:00 to 2024-04-22 19:59:59.987614+00:00\n",
|
||
"Data columns (total 6 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 exchange 143751 non-null object \n",
|
||
" 1 price 143751 non-null float64\n",
|
||
" 2 size 143751 non-null float64\n",
|
||
" 3 id 143751 non-null int64 \n",
|
||
" 4 conditions 143751 non-null object \n",
|
||
" 5 tape 143751 non-null object \n",
|
||
"dtypes: float64(2), int64(1), object(3)\n",
|
||
"memory usage: 7.7+ MB\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>exchange</th>\n",
|
||
" <th>price</th>\n",
|
||
" <th>size</th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>conditions</th>\n",
|
||
" <th>tape</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>timestamp</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 13:30:00.300501+00:00</th>\n",
|
||
" <td>D</td>\n",
|
||
" <td>37.005</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>71675241117014</td>\n",
|
||
" <td>[ , I]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 13:30:00.305439+00:00</th>\n",
|
||
" <td>D</td>\n",
|
||
" <td>37.005</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>71675241117496</td>\n",
|
||
" <td>[ , I]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 13:30:00.314520+00:00</th>\n",
|
||
" <td>D</td>\n",
|
||
" <td>37.005</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>71675241118034</td>\n",
|
||
" <td>[ , I]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 13:30:00.335201+00:00</th>\n",
|
||
" <td>D</td>\n",
|
||
" <td>37.005</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>71675241121369</td>\n",
|
||
" <td>[ , I]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 13:30:00.346219+00:00</th>\n",
|
||
" <td>D</td>\n",
|
||
" <td>37.005</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>71675241122389</td>\n",
|
||
" <td>[ , I]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 19:59:59.902614+00:00</th>\n",
|
||
" <td>V</td>\n",
|
||
" <td>37.750</td>\n",
|
||
" <td>1100.0</td>\n",
|
||
" <td>56480705310575</td>\n",
|
||
" <td>[ ]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 19:59:59.977134+00:00</th>\n",
|
||
" <td>N</td>\n",
|
||
" <td>37.745</td>\n",
|
||
" <td>300.0</td>\n",
|
||
" <td>52983559963478</td>\n",
|
||
" <td>[ ]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 19:59:59.977137+00:00</th>\n",
|
||
" <td>N</td>\n",
|
||
" <td>37.740</td>\n",
|
||
" <td>7300.0</td>\n",
|
||
" <td>52983559963696</td>\n",
|
||
" <td>[ ]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 19:59:59.978626+00:00</th>\n",
|
||
" <td>V</td>\n",
|
||
" <td>37.750</td>\n",
|
||
" <td>16.0</td>\n",
|
||
" <td>56480706886228</td>\n",
|
||
" <td>[ , I]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2024-04-22 19:59:59.987614+00:00</th>\n",
|
||
" <td>N</td>\n",
|
||
" <td>37.745</td>\n",
|
||
" <td>30.0</td>\n",
|
||
" <td>52983559963958</td>\n",
|
||
" <td>[ , I]</td>\n",
|
||
" <td>A</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>143751 rows × 6 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" exchange price size id \\\n",
|
||
"timestamp \n",
|
||
"2024-04-22 13:30:00.300501+00:00 D 37.005 1.0 71675241117014 \n",
|
||
"2024-04-22 13:30:00.305439+00:00 D 37.005 1.0 71675241117496 \n",
|
||
"2024-04-22 13:30:00.314520+00:00 D 37.005 1.0 71675241118034 \n",
|
||
"2024-04-22 13:30:00.335201+00:00 D 37.005 1.0 71675241121369 \n",
|
||
"2024-04-22 13:30:00.346219+00:00 D 37.005 1.0 71675241122389 \n",
|
||
"... ... ... ... ... \n",
|
||
"2024-04-22 19:59:59.902614+00:00 V 37.750 1100.0 56480705310575 \n",
|
||
"2024-04-22 19:59:59.977134+00:00 N 37.745 300.0 52983559963478 \n",
|
||
"2024-04-22 19:59:59.977137+00:00 N 37.740 7300.0 52983559963696 \n",
|
||
"2024-04-22 19:59:59.978626+00:00 V 37.750 16.0 56480706886228 \n",
|
||
"2024-04-22 19:59:59.987614+00:00 N 37.745 30.0 52983559963958 \n",
|
||
"\n",
|
||
" conditions tape \n",
|
||
"timestamp \n",
|
||
"2024-04-22 13:30:00.300501+00:00 [ , I] A \n",
|
||
"2024-04-22 13:30:00.305439+00:00 [ , I] A \n",
|
||
"2024-04-22 13:30:00.314520+00:00 [ , I] A \n",
|
||
"2024-04-22 13:30:00.335201+00:00 [ , I] A \n",
|
||
"2024-04-22 13:30:00.346219+00:00 [ , I] A \n",
|
||
"... ... ... \n",
|
||
"2024-04-22 19:59:59.902614+00:00 [ ] A \n",
|
||
"2024-04-22 19:59:59.977134+00:00 [ ] A \n",
|
||
"2024-04-22 19:59:59.977137+00:00 [ ] A \n",
|
||
"2024-04-22 19:59:59.978626+00:00 [ , I] A \n",
|
||
"2024-04-22 19:59:59.987614+00:00 [ , I] A \n",
|
||
"\n",
|
||
"[143751 rows x 6 columns]"
|
||
]
|
||
},
|
||
"execution_count": 41,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"excludes = cfh.config_handler.get_val('AGG_EXCLUDED_TRADES')\n",
|
||
"print(excludes)\n",
|
||
"#excludes = [\"F\", \"I\"]\n",
|
||
"# FILTER EXCLUDED TRADES\n",
|
||
"# Filter rows to exclude those where 'conditions' contains 'F' or 'I'\n",
|
||
"# This simplifies the logic by directly using ~ (bitwise not operator) with np.isin\n",
|
||
"df = df[~df['conditions'].apply(lambda x: np.isin(x, excludes).any())]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/var/folders/8p/dwqnp65s0s77jdbm4_6z4vp80000gn/T/ipykernel_52602/3341929382.py:2: DeprecationWarning: parsing timezone aware datetimes is deprecated; this will raise an error in the future\n",
|
||
" structured_array = np.array(list(zip(df.index, df['price'], df['size'])),\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"[('2024-04-22T13:30:00.300501000', 37.005, 1.0e+00)\n",
|
||
" ('2024-04-22T13:30:00.305439000', 37.005, 1.0e+00)\n",
|
||
" ('2024-04-22T13:30:00.314520000', 37.005, 1.0e+00) ...\n",
|
||
" ('2024-04-22T19:59:59.977137000', 37.74 , 7.3e+03)\n",
|
||
" ('2024-04-22T19:59:59.978626000', 37.75 , 1.6e+01)\n",
|
||
" ('2024-04-22T19:59:59.987614000', 37.745, 3.0e+01)]\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([('2024-04-22T13:30:00.300501000', 37.005, 1.0e+00),\n",
|
||
" ('2024-04-22T13:30:00.305439000', 37.005, 1.0e+00),\n",
|
||
" ('2024-04-22T13:30:00.314520000', 37.005, 1.0e+00), ...,\n",
|
||
" ('2024-04-22T19:59:59.977137000', 37.74 , 7.3e+03),\n",
|
||
" ('2024-04-22T19:59:59.978626000', 37.75 , 1.6e+01),\n",
|
||
" ('2024-04-22T19:59:59.987614000', 37.745, 3.0e+01)],\n",
|
||
" dtype=[('timestamp', '<M8[ns]'), ('price', '<f8'), ('size', '<f8')])"
|
||
]
|
||
},
|
||
"execution_count": 46,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Creating a structured array with the timestamp as the first element\n",
|
||
"structured_array = np.array(list(zip(df.index, df['price'], df['size'])),\n",
|
||
" dtype=[('timestamp', 'datetime64[ns]'), ('price', 'float'), ('size', 'float')])\n",
|
||
"\n",
|
||
"print(structured_array)\n",
|
||
"structured_array\n",
|
||
"\n",
|
||
"# ticks = df[['index', 'price', 'size']].to_numpy()\n",
|
||
"# # ticks[:, 0] = pd.to_datetime(ticks[:, 0]).astype('int64') // 1_000_000_000 # \n",
|
||
"# ticks"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"resolution_seconds = 1 # 1 second resolution\n",
|
||
"ohlcv_data = ohlcv_bars(structured_array, resolution_seconds)\n",
|
||
"\n",
|
||
"# Converting the result back to DataFrame for better usability\n",
|
||
"ohlcv_df = pd.DataFrame(ohlcv_data, columns=['Open', 'High', 'Low', 'Close', 'Volume', 'Time'])\n",
|
||
"ohlcv_df['Time'] = pd.to_datetime(ohlcv_df['Time'], unit='s') # Convert timestamps back to datetime\n"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": ".venv",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.11"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|