static site pwd protected, load dotenv moved to config, aggregator vecotrized chng

This commit is contained in:
David Brazda
2024-06-04 12:47:48 +02:00
parent 05b7725a25
commit 093fe5dfbc
15 changed files with 49538 additions and 851 deletions

View File

@ -0,0 +1,410 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Loading trades and vectorized aggregation\n",
"Describes how to fetch trades (remote/cached) and use new vectorized aggregation to aggregate bars of given type (time, volume, dollar) and resolution\n",
"\n",
"`fetch_trades_parallel` enables to fetch trades of given symbol and interval, also can filter conditions and minimum size. return `trades_df`\n",
"`aggregate_trades` acceptss `trades_df` and ressolution and type of bars (VOLUME, TIME, DOLLAR) and return aggregated ohlcv dataframe `ohlcv_df`"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Activating profile profile1\n",
"</pre>\n"
],
"text/plain": [
"Activating profile profile1\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"trades_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet\n",
"trades_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n",
"ohlcv_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet\n",
"ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from numba import jit\n",
"from alpaca.data.historical import StockHistoricalDataClient\n",
"from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n",
"from alpaca.data.requests import StockTradesRequest\n",
"from v2realbot.enums.enums import BarType\n",
"import time\n",
"from datetime import datetime\n",
"from v2realbot.utils.utils import parse_alpaca_timestamp, ltp, zoneNY, send_to_telegram, fetch_calendar_data\n",
"import pyarrow\n",
"from v2realbot.loader.aggregator_vectorized import fetch_daily_stock_trades, fetch_trades_parallel, generate_time_bars_nb, aggregate_trades\n",
"import vectorbtpro as vbt\n",
"import v2realbot.utils.config_handler as cfh\n",
"\n",
"vbt.settings.set_theme(\"dark\")\n",
"vbt.settings['plotting']['layout']['width'] = 1280\n",
"vbt.settings.plotting.auto_rangebreaks = True\n",
"# Set the option to display with pagination\n",
"pd.set_option('display.notebook_repr_html', True)\n",
"pd.set_option('display.max_rows', 20) # Number of rows per page\n",
"# pd.set_option('display.float_format', '{:.9f}'.format)\n",
"\n",
"\n",
"#trade filtering\n",
"exclude_conditions = cfh.config_handler.get_val('AGG_EXCLUDED_TRADES') #standard ['C','O','4','B','7','V','P','W','U','Z','F']\n",
"minsize = 100\n",
"\n",
"symbol = \"SPY\"\n",
"#datetime in zoneNY \n",
"day_start = datetime(2024, 1, 1, 9, 30, 0)\n",
"day_stop = datetime(2024, 1, 14, 16, 00, 0)\n",
"day_start = zoneNY.localize(day_start)\n",
"day_stop = zoneNY.localize(day_stop)\n",
"#filename of trades_df parquet, date are in isoformat but without time zone part\n",
"dir = DATA_DIR + \"/notebooks/\"\n",
"#parquet interval cache contains exclude conditions and minsize filtering\n",
"file_trades = dir + f\"trades_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}-{exclude_conditions}-{minsize}.parquet\"\n",
"#file_trades = dir + f\"trades_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}.parquet\"\n",
"file_ohlcv = dir + f\"ohlcv_df-{symbol}-{day_start.strftime('%Y-%m-%dT%H:%M:%S')}-{day_stop.strftime('%Y-%m-%dT%H:%M:%S')}-{exclude_conditions}-{minsize}.parquet\"\n",
"\n",
"#PRINT all parquet in directory\n",
"import os\n",
"files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n",
"for f in files:\n",
" print(f)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"NOT FOUND. Fetching from remote\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m trades_df \u001b[38;5;241m=\u001b[39m \u001b[43mfetch_daily_stock_trades\u001b[49m\u001b[43m(\u001b[49m\u001b[43msymbol\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mday_start\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mday_stop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexclude_conditions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexclude_conditions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mminsize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mminsize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mforce_remote\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_retries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbackoff_factor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m trades_df\n",
"File \u001b[0;32m~/Documents/Development/python/v2trading/v2realbot/loader/aggregator_vectorized.py:200\u001b[0m, in \u001b[0;36mfetch_daily_stock_trades\u001b[0;34m(symbol, start, end, exclude_conditions, minsize, force_remote, max_retries, backoff_factor)\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m attempt \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(max_retries):\n\u001b[1;32m 199\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 200\u001b[0m tradesResponse \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_stock_trades\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstockTradeRequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 201\u001b[0m is_empty \u001b[38;5;241m=\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m tradesResponse[symbol]\n\u001b[1;32m 202\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRemote fetched: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mis_empty\u001b[38;5;132;01m=}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, start, end)\n",
"File \u001b[0;32m~/Documents/Development/python/v2trading/.venv/lib/python3.10/site-packages/alpaca/data/historical/stock.py:144\u001b[0m, in \u001b[0;36mStockHistoricalDataClient.get_stock_trades\u001b[0;34m(self, request_params)\u001b[0m\n\u001b[1;32m 141\u001b[0m params \u001b[38;5;241m=\u001b[39m request_params\u001b[38;5;241m.\u001b[39mto_request_fields()\n\u001b[1;32m 143\u001b[0m \u001b[38;5;66;03m# paginated get request for market data api\u001b[39;00m\n\u001b[0;32m--> 144\u001b[0m raw_trades \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_data_get\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 145\u001b[0m \u001b[43m \u001b[49m\u001b[43mendpoint_data_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtrades\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 146\u001b[0m \u001b[43m \u001b[49m\u001b[43mendpoint_asset_class\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstocks\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 147\u001b[0m \u001b[43m \u001b[49m\u001b[43mapi_version\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mv2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 148\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 149\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_use_raw_data:\n\u001b[1;32m 152\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m raw_trades\n",
"File \u001b[0;32m~/Documents/Development/python/v2trading/.venv/lib/python3.10/site-packages/alpaca/data/historical/stock.py:338\u001b[0m, in \u001b[0;36mStockHistoricalDataClient._data_get\u001b[0;34m(self, endpoint_asset_class, endpoint_data_type, api_version, symbol_or_symbols, limit, page_limit, extension, **kwargs)\u001b[0m\n\u001b[1;32m 335\u001b[0m params[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlimit\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m actual_limit\n\u001b[1;32m 336\u001b[0m params[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpage_token\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m page_token\n\u001b[0;32m--> 338\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mapi_version\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mapi_version\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 340\u001b[0m \u001b[38;5;66;03m# TODO: Merge parsing if possible\u001b[39;00m\n\u001b[1;32m 341\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m extension \u001b[38;5;241m==\u001b[39m DataExtensionType\u001b[38;5;241m.\u001b[39mSNAPSHOT:\n",
"File \u001b[0;32m~/Documents/Development/python/v2trading/.venv/lib/python3.10/site-packages/alpaca/common/rest.py:221\u001b[0m, in \u001b[0;36mRESTClient.get\u001b[0;34m(self, path, data, **kwargs)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget\u001b[39m(\u001b[38;5;28mself\u001b[39m, path: \u001b[38;5;28mstr\u001b[39m, data: Union[\u001b[38;5;28mdict\u001b[39m, \u001b[38;5;28mstr\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m HTTPResult:\n\u001b[1;32m 211\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Performs a single GET request\u001b[39;00m\n\u001b[1;32m 212\u001b[0m \n\u001b[1;32m 213\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[38;5;124;03m dict: The response\u001b[39;00m\n\u001b[1;32m 220\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 221\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mGET\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/Documents/Development/python/v2trading/.venv/lib/python3.10/site-packages/alpaca/common/rest.py:129\u001b[0m, in \u001b[0;36mRESTClient._request\u001b[0;34m(self, method, path, data, base_url, api_version)\u001b[0m\n\u001b[1;32m 127\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m retry \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 129\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_one_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 130\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m RetryException:\n\u001b[1;32m 131\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_retry_wait)\n",
"File \u001b[0;32m~/Documents/Development/python/v2trading/.venv/lib/python3.10/site-packages/alpaca/common/rest.py:193\u001b[0m, in \u001b[0;36mRESTClient._one_request\u001b[0;34m(self, method, url, opts, retry)\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_one_request\u001b[39m(\u001b[38;5;28mself\u001b[39m, method: \u001b[38;5;28mstr\u001b[39m, url: \u001b[38;5;28mstr\u001b[39m, opts: \u001b[38;5;28mdict\u001b[39m, retry: \u001b[38;5;28mint\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mdict\u001b[39m:\n\u001b[1;32m 175\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Perform one request, possibly raising RetryException in the case\u001b[39;00m\n\u001b[1;32m 176\u001b[0m \u001b[38;5;124;03m the response is 429. Otherwise, if error text contain \"code\" string,\u001b[39;00m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;124;03m then it decodes to json object and returns APIError.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[38;5;124;03m dict: The response data\u001b[39;00m\n\u001b[1;32m 192\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 193\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_session\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mopts\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 196\u001b[0m response\u001b[38;5;241m.\u001b[39mraise_for_status()\n",
"File \u001b[0;32m~/Documents/Development/python/v2trading/.venv/lib/python3.10/site-packages/requests/sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 584\u001b[0m send_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 585\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout\u001b[39m\u001b[38;5;124m\"\u001b[39m: timeout,\n\u001b[1;32m 586\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mallow_redirects\u001b[39m\u001b[38;5;124m\"\u001b[39m: allow_redirects,\n\u001b[1;32m 587\u001b[0m }\n\u001b[1;32m 588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msend_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n",
"File \u001b[0;32m~/Documents/Development/python/v2trading/.venv/lib/python3.10/site-packages/requests/sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 700\u001b[0m start \u001b[38;5;241m=\u001b[39m preferred_clock()\n\u001b[1;32m 702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43madapter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m 706\u001b[0m elapsed \u001b[38;5;241m=\u001b[39m preferred_clock() \u001b[38;5;241m-\u001b[39m start\n",
"File \u001b[0;32m~/Documents/Development/python/v2trading/.venv/lib/python3.10/site-packages/requests/adapters.py:486\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 483\u001b[0m timeout \u001b[38;5;241m=\u001b[39m TimeoutSauce(connect\u001b[38;5;241m=\u001b[39mtimeout, read\u001b[38;5;241m=\u001b[39mtimeout)\n\u001b[1;32m 485\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 486\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 487\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 488\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 489\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 490\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 491\u001b[0m \u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 492\u001b[0m \u001b[43m \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 493\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 494\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 495\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 496\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 497\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 498\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 500\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 501\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(err, request\u001b[38;5;241m=\u001b[39mrequest)\n",
"File \u001b[0;32m~/Documents/Development/python/v2trading/.venv/lib/python3.10/site-packages/urllib3/connectionpool.py:703\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 700\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prepare_proxy(conn)\n\u001b[1;32m 702\u001b[0m \u001b[38;5;66;03m# Make the request on the httplib connection object.\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 704\u001b[0m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 705\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 706\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 707\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 708\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 709\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 710\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 711\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 713\u001b[0m \u001b[38;5;66;03m# If we're going to release the connection in ``finally:``, then\u001b[39;00m\n\u001b[1;32m 714\u001b[0m \u001b[38;5;66;03m# the response doesn't need to know about the connection. Otherwise\u001b[39;00m\n\u001b[1;32m 715\u001b[0m \u001b[38;5;66;03m# it will also try to release it and we'll have a double-release\u001b[39;00m\n\u001b[1;32m 716\u001b[0m \u001b[38;5;66;03m# mess.\u001b[39;00m\n\u001b[1;32m 717\u001b[0m response_conn \u001b[38;5;241m=\u001b[39m conn \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m release_conn \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
"File \u001b[0;32m~/Documents/Development/python/v2trading/.venv/lib/python3.10/site-packages/urllib3/connectionpool.py:449\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 444\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m conn\u001b[38;5;241m.\u001b[39mgetresponse()\n\u001b[1;32m 445\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 446\u001b[0m \u001b[38;5;66;03m# Remove the TypeError from the exception chain in\u001b[39;00m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;66;03m# Python 3 (including for exceptions like SystemExit).\u001b[39;00m\n\u001b[1;32m 448\u001b[0m \u001b[38;5;66;03m# Otherwise it looks like a bug in the code.\u001b[39;00m\n\u001b[0;32m--> 449\u001b[0m \u001b[43msix\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_from\u001b[49m\u001b[43m(\u001b[49m\u001b[43me\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 450\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (SocketTimeout, BaseSSLError, SocketError) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 451\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_timeout(err\u001b[38;5;241m=\u001b[39me, url\u001b[38;5;241m=\u001b[39murl, timeout_value\u001b[38;5;241m=\u001b[39mread_timeout)\n",
"File \u001b[0;32m<string>:3\u001b[0m, in \u001b[0;36mraise_from\u001b[0;34m(value, from_value)\u001b[0m\n",
"File \u001b[0;32m~/Documents/Development/python/v2trading/.venv/lib/python3.10/site-packages/urllib3/connectionpool.py:444\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 441\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 442\u001b[0m \u001b[38;5;66;03m# Python 3\u001b[39;00m\n\u001b[1;32m 443\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 444\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 445\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 446\u001b[0m \u001b[38;5;66;03m# Remove the TypeError from the exception chain in\u001b[39;00m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;66;03m# Python 3 (including for exceptions like SystemExit).\u001b[39;00m\n\u001b[1;32m 448\u001b[0m \u001b[38;5;66;03m# Otherwise it looks like a bug in the code.\u001b[39;00m\n\u001b[1;32m 449\u001b[0m six\u001b[38;5;241m.\u001b[39mraise_from(e, \u001b[38;5;28;01mNone\u001b[39;00m)\n",
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/http/client.py:1375\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1373\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1374\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1375\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1376\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n\u001b[1;32m 1377\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclose()\n",
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/http/client.py:318\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 316\u001b[0m \u001b[38;5;66;03m# read until we get a non-100 response\u001b[39;00m\n\u001b[1;32m 317\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 318\u001b[0m version, status, reason \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 319\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m CONTINUE:\n\u001b[1;32m 320\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n",
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/http/client.py:279\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_read_status\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 279\u001b[0m line \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreadline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_MAXLINE\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124miso-8859-1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 280\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(line) \u001b[38;5;241m>\u001b[39m _MAXLINE:\n\u001b[1;32m 281\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m LineTooLong(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstatus line\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/socket.py:705\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 703\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 704\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 705\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 706\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m 707\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/ssl.py:1274\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1270\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m flags \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 1271\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1272\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m 1273\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m)\n\u001b[0;32m-> 1274\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnbytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1275\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1276\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mrecv_into(buffer, nbytes, flags)\n",
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/ssl.py:1130\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1128\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1129\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m buffer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1130\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sslobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1131\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1132\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sslobj\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;28mlen\u001b[39m)\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"trades_df = fetch_daily_stock_trades(symbol, day_start, day_stop, exclude_conditions=exclude_conditions, minsize=minsize, force_remote=False, max_retries=5, backoff_factor=1)\n",
"trades_df"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"#Either load trades or ohlcv from parquet if exists\n",
"\n",
"#trades_df = fetch_trades_parallel(symbol, day_start, day_stop, exclude_conditions=exclude_conditions, minsize=50, max_workers=20) #exclude_conditions=['C','O','4','B','7','V','P','W','U','Z','F'])\n",
"# trades_df.to_parquet(file_trades, engine='pyarrow', compression='gzip')\n",
"\n",
"trades_df = pd.read_parquet(file_trades,engine='pyarrow')\n",
"ohlcv_df = aggregate_trades(symbol=symbol, trades_df=trades_df, resolution=1, type=BarType.TIME)\n",
"ohlcv_df.to_parquet(file_ohlcv, engine='pyarrow', compression='gzip')\n",
"\n",
"# ohlcv_df = pd.read_parquet(file_ohlcv,engine='pyarrow')\n",
"# trades_df = pd.read_parquet(file_trades,engine='pyarrow')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#list all files is dir directory with parquet extension\n",
"dir = DATA_DIR + \"/notebooks/\"\n",
"import os\n",
"files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n",
"file_name = \"\"\n",
"ohlcv_df = pd.read_parquet(file_ohlcv,engine='pyarrow')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"# Calculate daily returns\n",
"ohlcv_df['returns'] = ohlcv_df['close'].pct_change().dropna()\n",
"#same as above but pct_change is from 3 datapoints back, but only if it is the same date, else na\n",
"\n",
"\n",
"# Plot the probability distribution curve\n",
"plt.figure(figsize=(10, 6))\n",
"sns.histplot(df['returns'].dropna(), kde=True, stat='probability', bins=30)\n",
"plt.title('Probability Distribution of Daily Returns')\n",
"plt.xlabel('Daily Returns')\n",
"plt.ylabel('Probability')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"# Define the intervals from 5 to 20 s, returns for each interval\n",
"#maybe use rolling window?\n",
"intervals = range(5, 21, 5)\n",
"\n",
"# Create columns for percentage returns\n",
"rolling_window = 50\n",
"\n",
"# Normalize the returns using rolling mean and std\n",
"for N in intervals:\n",
" column_name = f'returns_{N}'\n",
" rolling_mean = ohlcv_df[column_name].rolling(window=rolling_window).mean()\n",
" rolling_std = ohlcv_df[column_name].rolling(window=rolling_window).std()\n",
" ohlcv_df[f'norm_{column_name}'] = (ohlcv_df[column_name] - rolling_mean) / rolling_std\n",
"\n",
"# Display the dataframe with normalized return columns\n",
"ohlcv_df\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Calculate the sum of the normalized return columns for each row\n",
"ohlcv_df['sum_norm_returns'] = ohlcv_df[[f'norm_returns_{N}' for N in intervals]].sum(axis=1)\n",
"\n",
"# Sort the DataFrame based on the sum of normalized returns in descending order\n",
"df_sorted = ohlcv_df.sort_values(by='sum_norm_returns', ascending=False)\n",
"\n",
"# Display the top rows with the highest sum of normalized returns\n",
"df_sorted\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Drop initial rows with NaN values due to pct_change\n",
"ohlcv_df.dropna(inplace=True)\n",
"\n",
"# Plotting the probability distribution curves\n",
"plt.figure(figsize=(14, 8))\n",
"for N in intervals:\n",
" sns.kdeplot(ohlcv_df[f'returns_{N}'].dropna(), label=f'Returns {N}', fill=True)\n",
"\n",
"plt.title('Probability Distribution of Percentage Returns')\n",
"plt.xlabel('Percentage Return')\n",
"plt.ylabel('Density')\n",
"plt.legend()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"# Plot the probability distribution curve\n",
"plt.figure(figsize=(10, 6))\n",
"sns.histplot(ohlcv_df['returns'].dropna(), kde=True, stat='probability', bins=30)\n",
"plt.title('Probability Distribution of Daily Returns')\n",
"plt.xlabel('Daily Returns')\n",
"plt.ylabel('Probability')\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#show only rows from ohlcv_df where returns > 0.005\n",
"ohlcv_df[ohlcv_df['returns'] > 0.0005]\n",
"\n",
"#ohlcv_df[ohlcv_df['returns'] < -0.005]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#ohlcv where index = date 2024-03-13 and between hour 12\n",
"\n",
"a = ohlcv_df.loc['2024-03-13 12:00:00':'2024-03-13 13:00:00']\n",
"a"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"trades_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"trades_df.to_parquet(\"trades_df-spy-0111-0111.parquett\", engine='pyarrow', compression='gzip')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"trades_df.to_parquet(\"trades_df-spy-111-0516.parquett\", engine='pyarrow', compression='gzip', allow_truncated_timestamps=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_df.to_parquet(\"ohlcv_df-spy-111-0516.parquett\", engine='pyarrow', compression='gzip')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"basic_data = vbt.Data.from_data(vbt.symbol_dict({symbol: ohlcv_df}), tz_convert=zoneNY)\n",
"vbt.settings['plotting']['auto_rangebreaks'] = True\n",
"basic_data.ohlcv.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#access just BCA\n",
"#df_filtered = df.loc[\"BAC\"]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -1,316 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Loading trades and vectorized aggregation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from numba import jit\n",
"from alpaca.data.historical import StockHistoricalDataClient\n",
"from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n",
"from alpaca.data.requests import StockTradesRequest\n",
"from v2realbot.enums.enums import BarType\n",
"import time\n",
"\n",
"from datetime import datetime\n",
"from v2realbot.utils.utils import parse_alpaca_timestamp, ltp, zoneNY, send_to_telegram, fetch_calendar_data\n",
"import pyarrow\n",
"from v2realbot.loader.aggregator_vectorized import fetch_daily_stock_trades, fetch_trades_parallel, generate_time_bars_nb, aggregate_trades\n",
"import vectorbtpro as vbt\n",
"\n",
"vbt.settings.set_theme(\"dark\")\n",
"vbt.settings['plotting']['layout']['width'] = 1280\n",
"vbt.settings.plotting.auto_rangebreaks = True\n",
"# Set the option to display with pagination\n",
"pd.set_option('display.notebook_repr_html', True)\n",
"pd.set_option('display.max_rows', 10) # Number of rows per page"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"symbol = \"SPY\"\n",
"#datetime in zoneNY \n",
"day_start = datetime(2024, 5, 15, 9, 30, 0)\n",
"day_stop = datetime(2024, 5, 16, 16, 00, 0)\n",
"day_start = zoneNY.localize(day_start)\n",
"day_stop = zoneNY.localize(day_stop)\n",
"#neslo by zrychlit, kdyz se zobrazuje pomalu Searching cache - nejaky bottle neck?\n",
"df = fetch_trades_parallel(symbol, day_start, day_stop, minsize=50) #exclude_conditions=['C','O','4','B','7','V','P','W','U','Z','F'])\n",
"ohlcv_df = aggregate_trades(symbol=symbol, trades_df=df, resolution=1, type=BarType.TIME)\n",
"#df.info()\n",
"ohlcv_df\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"basic_data = vbt.Data.from_data(vbt.symbol_dict({symbol: ohlcv_df}), tz_convert=zoneNY)\n",
"vbt.settings['plotting']['auto_rangebreaks'] = True\n",
"basic_data.ohlcv.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n",
"import gzip\n",
"\n",
"file_path = f\"{DATA_DIR}/tradecache/BAC-1709044200-1709067600.cache.gz\"\n",
"\n",
"with gzip.open(file_path, 'rb') as fp:\n",
" tradesResponse = pickle.load(fp)\n",
"\n",
"tradesResponse"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def convert_dict_to_multiindex_df(tradesResponse):\n",
" # Create a DataFrame for each key and add the key as part of the MultiIndex\n",
" dfs = []\n",
" for key, values in tradesResponse.items():\n",
" df = pd.DataFrame(values)\n",
" # Rename columns\n",
" # Select and order columns explicitly\n",
" #print(df)\n",
" df = df[['t', 'x', 'p', 's', 'i', 'c','z']]\n",
" df.rename(columns={'t': 'timestamp', 'c': 'conditions', 'p': 'price', 's': 'size', 'x': 'exchange', 'z':'tape', 'i':'id'}, inplace=True)\n",
" df['symbol'] = key # Add ticker as a column\n",
" df['timestamp'] = pd.to_datetime(df['timestamp']) # Convert 't' from string to datetime before setting it as an index\n",
" df.set_index(['symbol', 'timestamp'], inplace=True) # Set the multi-level index using both 'ticker' and 't'\n",
" df = df.tz_convert(zoneNY, level='timestamp')\n",
" dfs.append(df)\n",
"\n",
" # Concatenate all DataFrames into a single DataFrame with MultiIndex\n",
" final_df = pd.concat(dfs)\n",
"\n",
" return final_df\n",
"\n",
"# Convert and print the DataFrame\n",
"df = convert_dict_to_multiindex_df(tradesResponse)\n",
"df\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_df = aggregate_trades(symbol=symbol, trades_df=df, resolution=1000, type=\"dollar\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_df.index.strftime('%Y-%m-%d %H').unique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#ohlcv_df.groupby(ohlcv_df.index.date).size()\n",
"ohlcv_df.head(100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#access just BCA\n",
"df_filtered = df.loc[\"BAC\"]\n",
"\n",
"df_filtered.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_filtered= df_filtered.reset_index()\n",
"ticks = df_filtered[['timestamp', 'price', 'size']].to_numpy()\n",
"ticks\n",
"timestamps = ticks[:, 0]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_filtered= df_filtered.reset_index()\n",
"ticks = df_filtered[['timestamp', 'price', 'size']].to_numpy()\n",
"\n",
"#timestamp to integer\n",
"# Extract the timestamps column (assuming it's the first column)\n",
"timestamps = ticks[:, 0]\n",
"\n",
"# Convert the timestamps to Unix timestamps in seconds with microsecond precision\n",
"unix_timestamps_s = np.array([ts.timestamp() for ts in timestamps], dtype='float64')\n",
"\n",
"# Replace the original timestamps in the NumPy array with the converted Unix timestamps\n",
"ticks[:, 0] = unix_timestamps_s\n",
"\n",
"#ticks[:, 0] = pd.to_datetime(ticks[:, 0]).astype('int64') // 1_000_000_000 # Convert to Unix timestamp\n",
"ticks\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ticks = ticks.astype(np.float64)\n",
"ticks"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"resolution = 1 # Example resolution of 60 seconds\n",
"ohlcv_bars = generate_time_bars_nb(ticks, resolution)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ohlcv_bars"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Convert the resulting array back to a DataFrame\n",
"columns = ['time', 'open', 'high', 'low', 'close', 'volume', 'trades']\n",
"ohlcv_df = pd.DataFrame(ohlcv_bars, columns=columns)\n",
"ohlcv_df['time'] = pd.to_datetime(ohlcv_df['time'], unit='s')\n",
"ohlcv_df.set_index('time', inplace=True)\n",
"ohlcv_df.index = ohlcv_df.index.tz_localize('UTC').tz_convert(zoneNY)\n",
"#ohlcv_df = ohlcv_df.loc[\"2024-03-1 15:50:00\":\"2024-03-28 13:40:00\"]\n",
"#ohlcv_df.index.strftime('%Y-%m-%d %H').unique()\n",
"\n",
"ohlcv_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

105
research/test1.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -1,7 +1,9 @@
import os,sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
print(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from alpaca.data.historical import CryptoHistoricalDataClient, StockHistoricalDataClient
import pandas as pd
import numpy as np
from alpaca.data.historical import StockHistoricalDataClient
from alpaca.data.requests import CryptoLatestTradeRequest, StockLatestTradeRequest, StockLatestBarRequest, StockTradesRequest
from alpaca.data.enums import DataFeed
from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY

View File

@ -0,0 +1,66 @@
import os
from bs4 import BeautifulSoup
import html2text
def convert_html_to_markdown(html_content, link_mapping):
h = html2text.HTML2Text()
h.ignore_links = False
# Update internal links to point to the relevant sections in the Markdown
soup = BeautifulSoup(html_content, 'html.parser')
for a in soup.find_all('a', href=True):
href = a['href']
if href in link_mapping:
a['href'] = f"#{link_mapping[href]}"
return h.handle(str(soup))
def create_link_mapping(root_dir):
link_mapping = {}
for subdir, _, files in os.walk(root_dir):
for file in files:
if file == "index.html":
relative_path = os.path.relpath(os.path.join(subdir, file), root_dir)
chapter_id = relative_path.replace(os.sep, '-').replace('index.html', '')
link_mapping[relative_path] = chapter_id
link_mapping[relative_path.replace(os.sep, '/')] = chapter_id # for URLs with slashes
return link_mapping
def read_html_files(root_dir, link_mapping):
markdown_content = []
for subdir, _, files in os.walk(root_dir):
relative_path = os.path.relpath(subdir, root_dir)
if files and any(file == "index.html" for file in files):
# Add directory as a heading based on its depth
heading_level = relative_path.count(os.sep) + 1
markdown_content.append(f"{'#' * heading_level} {relative_path}\n")
for file in files:
if file == "index.html":
file_path = os.path.join(subdir, file)
with open(file_path, 'r', encoding='utf-8') as f:
html_content = f.read()
soup = BeautifulSoup(html_content, 'html.parser')
title = soup.title.string if soup.title else "No Title"
chapter_id = os.path.relpath(file_path, root_dir).replace(os.sep, '-').replace('index.html', '')
markdown_content.append(f"<a id='{chapter_id}'></a>\n")
markdown_content.append(f"{'#' * (heading_level + 1)} {title}\n")
markdown_content.append(convert_html_to_markdown(html_content, link_mapping))
return "\n".join(markdown_content)
def save_to_markdown_file(content, output_file):
with open(output_file, 'w', encoding='utf-8') as f:
f.write(content)
def main():
root_dir = "./v2realbot/static/js/vbt/"
output_file = "output.md"
link_mapping = create_link_mapping(root_dir)
markdown_content = read_html_files(root_dir, link_mapping)
save_to_markdown_file(markdown_content, output_file)
print(f"Markdown document created at {output_file}")
if __name__ == "__main__":
main()

View File

@ -4,6 +4,7 @@ from appdirs import user_data_dir
from pathlib import Path
import os
from collections import defaultdict
from dotenv import load_dotenv
# Global flag to track if the ml module has been imported (solution for long import times of tensorflow)
#the first occurence of using it will load it globally
_ml_module_loaded = False
@ -29,6 +30,12 @@ MODEL_DIR = Path(DATA_DIR)/"models"
PROFILING_NEXT_ENABLED = False
PROFILING_OUTPUT_DIR = DATA_DIR
#NALOADUJEME DOTENV ENV VARIABLES
if load_dotenv(ENV_FILE, verbose=True) is False:
raise Exception(f"Error loading.env file {ENV_FILE}")
else:
print(f"Loaded env variables from file {ENV_FILE}")
#WIP - FILL CONFIGURATION CLASS FOR BACKTESTING
class BT_FILL_CONF:
""""

View File

@ -18,6 +18,7 @@ import random
from alpaca.data.models import BarSet, QuoteSet, TradeSet
import v2realbot.utils.config_handler as cfh
from v2realbot.enums.enums import BarType
from tqdm import tqdm
""""
Module used for vectorized aggregation of trades.
@ -28,7 +29,7 @@ Includes fetch (remote/cached) methods and numba aggregator function for TIME BA
def aggregate_trades(symbol: str, trades_df: pd.DataFrame, resolution: int, type: BarType = BarType.TIME):
""""
Accepts dataframe with trades keyed by symbol. Preparess dataframe to
numpy and call nNumba optimized aggregator for given bar type. (time/volume/dollar)
numpy and calls Numba optimized aggregator for given bar type. (time/volume/dollar)
"""""
trades_df = trades_df.loc[symbol]
trades_df= trades_df.reset_index()
@ -54,15 +55,37 @@ def aggregate_trades(symbol: str, trades_df: pd.DataFrame, resolution: int, type
columns = ['time', 'open', 'high', 'low', 'close', 'volume', 'trades']
if type == BarType.DOLLAR:
columns.append('amount')
columns.append('updated')
if type == BarType.TIME:
columns.append('vwap')
columns.append('buyvolume')
columns.append('sellvolume')
if type == BarType.VOLUME:
columns.append('buyvolume')
columns.append('sellvolume')
ohlcv_df = pd.DataFrame(ohlcv_bars, columns=columns)
ohlcv_df['time'] = pd.to_datetime(ohlcv_df['time'], unit='s')
ohlcv_df['time'] = pd.to_datetime(ohlcv_df['time'], unit='s').dt.tz_localize('UTC').dt.tz_convert(zoneNY)
#print(ohlcv_df['updated'])
ohlcv_df['updated'] = pd.to_datetime(ohlcv_df['updated'], unit="s").dt.tz_localize('UTC').dt.tz_convert(zoneNY)
# Round to microseconds to maintain six decimal places
ohlcv_df['updated'] = ohlcv_df['updated'].dt.round('us')
ohlcv_df.set_index('time', inplace=True)
ohlcv_df.index = ohlcv_df.index.tz_localize('UTC').tz_convert(zoneNY)
#ohlcv_df.index = ohlcv_df.index.tz_localize('UTC').tz_convert(zoneNY)
return ohlcv_df
# Function to ensure fractional seconds are present
def ensure_fractional_seconds(timestamp):
if '.' not in timestamp:
# Inserting .000000 before the timezone indicator 'Z'
return timestamp.replace('Z', '.000000Z')
else:
return timestamp
def convert_dict_to_multiindex_df(tradesResponse):
""""
Converts dictionary from cache or from remote (raw input) to multiindex dataframe.
with microsecond precision (from nanoseconds in the raw data)
"""""
# Create a DataFrame for each key and add the key as part of the MultiIndex
dfs = []
@ -74,7 +97,17 @@ def convert_dict_to_multiindex_df(tradesResponse):
df = df[['t', 'x', 'p', 's', 'i', 'c','z']]
df.rename(columns={'t': 'timestamp', 'c': 'conditions', 'p': 'price', 's': 'size', 'x': 'exchange', 'z':'tape', 'i':'id'}, inplace=True)
df['symbol'] = key # Add ticker as a column
df['timestamp'] = pd.to_datetime(df['timestamp']) # Convert 't' from string to datetime before setting it as an index
# Apply the function to ensure all timestamps have fractional seconds
#zvazit zda toto ponechat a nebo dat jen pri urcitem erroru pri to_datetime
#pripadne pak pridelat efektivnejsi pristup, aneb nahrazeni NaT - https://chatgpt.com/c/d2be6f87-b38f-4050-a1c6-541d100b1474
df['timestamp'] = df['timestamp'].apply(ensure_fractional_seconds)
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce') # Convert 't' from string to datetime before setting it as an index
#Adjust to microsecond precision
df.loc[df['timestamp'].notna(), 'timestamp'] = df['timestamp'].dt.floor('us')
df.set_index(['symbol', 'timestamp'], inplace=True) # Set the multi-level index using both 'ticker' and 't'
df = df.tz_convert(zoneNY, level='timestamp')
dfs.append(df)
@ -123,66 +156,6 @@ def dict_to_df(tradesResponse, start, end, exclude_conditions = None, minsize =
df = df[df['size'] >= minsize]
return df
#fetches daily stock tradess - currently only main session is supported
def fetch_daily_stock_trades_old(symbol, start, end, exclude_conditions = None, minsize = None, force_remote = False, max_retries=5, backoff_factor=1):
"""
Attempts to fetch stock trades with exponential backoff. Raises an exception if all retries fail.
:param symbol: The stock symbol to fetch trades for.
:param start: The start time for the trade data.
:param end: The end time for the trade data.
:param max_retries: Maximum number of retries.
:param backoff_factor: Factor to determine the next sleep time.
:return: TradesResponse object.
:raises: ConnectionError if all retries fail.
We use tradecache only for main sessison request = 9:30 to 16:00
"""
use_daily_tradecache = False
if (start.time() >= time(9, 30) and end.time() <= time(16, 0)):
use_daily_tradecache = True
filename_start = zoneNY.localize(datetime.combine(start.date(), time(9, 30)))
filename_end= zoneNY.localize(datetime.combine(end.date(), time(16, 0)))
daily_file = "TS" + str(symbol) + '-' + str(int(filename_start.timestamp())) + '-' + str(int(filename_end.timestamp())) + '.cache.gz'
file_path = DATA_DIR + "/tradecache/"+daily_file
if use_daily_tradecache and not force_remote and os.path.exists(file_path):
print("Searching cache: " + daily_file)
with gzip.open (file_path, 'rb') as fp:
tradesResponse = pickle.load(fp)
print("FOUND in CACHE", daily_file)
#response je vzdy ulozena jako raw(dict), davame zpet do TradeSetu, ktery umi i df
return dict_to_df(tradesResponse, start, end, exclude_conditions, minsize)
#daily file doesnt exist
else:
print("NOT FOUND. Fetching from remote")
client = StockHistoricalDataClient(ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, raw_data=False)
stockTradeRequest = StockTradesRequest(symbol_or_symbols=symbol, start=start, end=end)
last_exception = None
for attempt in range(max_retries):
try:
tradesResponse = client.get_stock_trades(stockTradeRequest)
is_empty = not tradesResponse[symbol]
print(f"Remote fetched: {is_empty=}", start, end)
#pokud jde o dnešní den a nebyl konec trhu tak cache neukládáme, pripadne pri iex datapointu necachujeme
if use_daily_tradecache and not is_empty:
if (start < datetime.now().astimezone(zoneNY) < end):
print("not saving trade cache, market still open today")
else:
with gzip.open(file_path, 'wb') as fp:
pickle.dump(tradesResponse, fp)
print("Saving to Trade CACHE", file_path)
return pd.DataFrame() if is_empty else dict_to_df(tradesResponse, start, end)
except Exception as e:
print(f"Attempt {attempt + 1} failed: {e}")
last_exception = e
time_module.sleep(backoff_factor * (2 ** attempt))
print("All attempts to fetch data failed.")
raise ConnectionError(f"Failed to fetch stock trades after {max_retries} retries. Last exception: {str(last_exception)} and {format_exc()}")
def fetch_daily_stock_trades(symbol, start, end, exclude_conditions=None, minsize=None, force_remote=False, max_retries=5, backoff_factor=1):
#doc for this function
"""
@ -205,6 +178,7 @@ def fetch_daily_stock_trades(symbol, start, end, exclude_conditions=None, minsiz
Do budoucna ukládat celý den BAC-20240203.cache.gz a z toho si pak filtrovat bud main sesssionu a extended
Ale zatim je uloženo jen main session v BAC-timestampopenu-timestampclose.cache.gz
"""
is_same_day = start.date() == end.date()
# Determine if the requested times fall within the main session
in_main_session = (time(9, 30) <= start.time() < time(16, 0)) and (time(9, 30) <= end.time() <= time(16, 0))
file_path = ''
@ -215,7 +189,7 @@ def fetch_daily_stock_trades(symbol, start, end, exclude_conditions=None, minsiz
daily_file = f"{symbol}-{int(filename_start.timestamp())}-{int(filename_end.timestamp())}.cache.gz"
file_path = f"{DATA_DIR}/tradecache/{daily_file}"
if not force_remote and os.path.exists(file_path):
print("Searching cache: " + daily_file)
print(f"Searching {str(start.date())} cache: " + daily_file)
with gzip.open(file_path, 'rb') as fp:
tradesResponse = pickle.load(fp)
print("FOUND in CACHE", daily_file)
@ -240,7 +214,7 @@ def fetch_daily_stock_trades(symbol, start, end, exclude_conditions=None, minsiz
else: # Don't save the cache if the market is still open
print("Not saving trade cache, market still open today")
return pd.DataFrame() if is_empty else dict_to_df(tradesResponse, start, end)
return pd.DataFrame() if is_empty else dict_to_df(tradesResponse, start, end, exclude_conditions, minsize)
except Exception as e:
print(f"Attempt {attempt + 1} failed: {e}")
last_exception = e
@ -250,7 +224,7 @@ def fetch_daily_stock_trades(symbol, start, end, exclude_conditions=None, minsiz
raise ConnectionError(f"Failed to fetch stock trades after {max_retries} retries. Last exception: {str(last_exception)} and {format_exc()}")
def fetch_trades_parallel(symbol, start_date, end_date, exclude_conditions = cfh.config_handler.get_val('AGG_EXCLUDED_TRADES'), minsize = 100, force_remote = False):
def fetch_trades_parallel(symbol, start_date, end_date, exclude_conditions = cfh.config_handler.get_val('AGG_EXCLUDED_TRADES'), minsize = 100, force_remote = False, max_workers=None):
"""
Fetches trades for each day between start_date and end_date during market hours (9:30-16:00) in parallel and concatenates them into a single DataFrame.
@ -265,12 +239,11 @@ def fetch_trades_parallel(symbol, start_date, end_date, exclude_conditions = cfh
market_open_days = fetch_calendar_data(start_date, end_date)
day_count = len(market_open_days)
print("Contains", day_count, " market days")
max_workers = min(10, max(5, day_count // 2)) # Heuristic: half the days to process, but at least 1 and no more than 10
max_workers = min(10, max(2, day_count // 2)) if max_workers is None else max_workers # Heuristic: half the days to process, but at least 1 and no more than 10
with ThreadPoolExecutor(max_workers=max_workers) as executor:
#for single_date in (start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)):
for market_day in market_open_days:
for market_day in tqdm(market_open_days, desc="Processing market days"):
#start = datetime.combine(single_date, time(9, 30)) # Market opens at 9:30 AM
#end = datetime.combine(single_date, time(16, 0)) # Market closes at 4:00 PM
@ -286,14 +259,22 @@ def fetch_trades_parallel(symbol, start_date, end_date, exclude_conditions = cfh
future = executor.submit(fetch_daily_stock_trades, symbol, start, end, exclude_conditions, minsize, force_remote)
futures.append(future)
for future in futures:
for future in tqdm(futures, desc="Fetching data"):
try:
result = future.result()
results.append(result)
except Exception as e:
print(f"Error fetching data for a day: {e}")
return pd.concat(results, ignore_index=False)
# Batch concatenation to improve speed
batch_size = 10
batches = [results[i:i + batch_size] for i in range(0, len(results), batch_size)]
final_df = pd.concat([pd.concat(batch, ignore_index=False) for batch in batches], ignore_index=False)
return final_df
#original version
#return pd.concat(results, ignore_index=False)
@jit(nopython=True)
def generate_dollar_bars_nb(ticks, amount_per_bar):
@ -332,7 +313,7 @@ def generate_dollar_bars_nb(ticks, amount_per_bar):
# Check if the new tick is from a different day, then close the current bar
if tick_day != current_day:
if trades_count > 0:
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count, amount_per_bar])
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count, amount_per_bar, tick_time])
# Reset for the new day using the current tick data
open_price = price
high_price = price
@ -361,7 +342,7 @@ def generate_dollar_bars_nb(ticks, amount_per_bar):
volume += volume_to_add # Update the volume here before appending and resetting
# Append the partially filled bar to the list
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count + 1, amount_per_bar])
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count + 1, amount_per_bar, tick_time])
# Fill the current bar and continue with a new bar
tick_volume -= volume_to_add
@ -385,7 +366,7 @@ def generate_dollar_bars_nb(ticks, amount_per_bar):
# Add the last bar if it contains any trades
if trades_count > 0:
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count, amount_per_bar])
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count, amount_per_bar, tick_time])
return np.array(ohlcv_bars)
@ -414,6 +395,9 @@ def generate_volume_bars_nb(ticks, volume_per_bar):
trades_count = 0
current_day = np.floor(ticks[0, 0] / 86400) # Calculate the initial day from the first tick timestamp
bar_time = ticks[0, 0] # Initialize bar time with the time of the first tick
buy_volume = 0 # Volume of buy trades
sell_volume = 0 # Volume of sell trades
prev_price = ticks[0, 1] # Initialize previous price for the first tick
for tick in ticks:
tick_time = tick[0]
@ -424,7 +408,7 @@ def generate_volume_bars_nb(ticks, volume_per_bar):
# Check if the new tick is from a different day, then close the current bar
if tick_day != current_day:
if trades_count > 0:
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count])
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count, tick_time, buy_volume, sell_volume])
# Reset for the new day using the current tick data
open_price = price
high_price = price
@ -435,6 +419,10 @@ def generate_volume_bars_nb(ticks, volume_per_bar):
remaining_volume = volume_per_bar
current_day = tick_day
bar_time = tick_time # Update bar time to the current tick time
buy_volume = 0
sell_volume = 0
# Reset previous tick price (calulating imbalance for each day from the start)
prev_price = price
# Start new bar if needed because of the volume
while tick_volume > 0:
@ -446,6 +434,13 @@ def generate_volume_bars_nb(ticks, volume_per_bar):
volume += tick_volume
remaining_volume -= tick_volume
trades_count += 1
# Update buy and sell volumes
if price > prev_price:
buy_volume += tick_volume
elif price < prev_price:
sell_volume += tick_volume
tick_volume = 0
else:
# Fill the current bar and continue with a new bar
@ -453,8 +448,15 @@ def generate_volume_bars_nb(ticks, volume_per_bar):
volume += volume_to_add
tick_volume -= volume_to_add
trades_count += 1
# Update buy and sell volumes
if price > prev_price:
buy_volume += volume_to_add
elif price < prev_price:
sell_volume += volume_to_add
# Append the completed bar to the list
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count])
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count, tick_time, buy_volume, sell_volume])
# Reset bar values for the new bar using the current tick data
open_price = price
@ -464,16 +466,20 @@ def generate_volume_bars_nb(ticks, volume_per_bar):
volume = 0
trades_count = 0
remaining_volume = volume_per_bar
# Increment bar time if splitting a trade
if tick_volume > 0: #pokud v tradu je jeste zbytek nastavujeme cas o nanosekundu vetsi
buy_volume = 0
sell_volume = 0
# Increment bar time if splitting a trade
if tick_volume > 0: # If there's remaining volume in the trade, set bar time slightly later
bar_time = tick_time + 1e-6
else:
bar_time = tick_time #jinak nastavujeme cas ticku
bar_time = tick_time # Otherwise, set bar time to the tick time
prev_price = price
# Add the last bar if it contains any trades
if trades_count > 0:
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count])
ohlcv_bars.append([bar_time, open_price, high_price, low_price, close_price, volume, trades_count, tick_time, buy_volume, sell_volume])
return np.array(ohlcv_bars)
@ -497,16 +503,30 @@ def generate_time_bars_nb(ticks, resolution):
close_price = 0
volume = 0
trades_count = 0
vwap_cum_volume_price = 0 # Cumulative volume * price
cum_volume = 0 # Cumulative volume for VWAP
buy_volume = 0 # Volume of buy trades
sell_volume = 0 # Volume of sell trades
prev_price = ticks[0, 1] # Initialize previous price for the first tick
prev_day = np.floor(ticks[0, 0] / 86400) # Calculate the initial day from the first tick timestamp
for tick in ticks:
curr_time = tick[0] #updated time
tick_time = np.floor(tick[0] / resolution) * resolution
price = tick[1]
tick_volume = tick[2]
tick_day = np.floor(tick_time / 86400) # Calculate the day of the current tick
#if the new tick is from a new day, reset previous tick price (calculating imbalance starts over)
if tick_day != prev_day:
prev_price = price
prev_day = tick_day
# Check if the tick belongs to a new bar
if tick_time != start_time + current_bar_index * resolution:
if current_bar_index >= 0 and trades_count > 0: # Save the previous bar if trades happened
ohlcv_bars.append([start_time + current_bar_index * resolution, open_price, high_price, low_price, close_price, volume, trades_count])
vwap = vwap_cum_volume_price / cum_volume if cum_volume > 0 else 0
ohlcv_bars.append([start_time + current_bar_index * resolution, open_price, high_price, low_price, close_price, volume, trades_count, curr_time, vwap, buy_volume, sell_volume])
# Reset bar values
current_bar_index = int((tick_time - start_time) / resolution)
@ -515,6 +535,10 @@ def generate_time_bars_nb(ticks, resolution):
low_price = price
volume = 0
trades_count = 0
vwap_cum_volume_price = 0
cum_volume = 0
buy_volume = 0
sell_volume = 0
# Update the OHLCV values for the current bar
high_price = max(high_price, price)
@ -522,10 +546,21 @@ def generate_time_bars_nb(ticks, resolution):
close_price = price
volume += tick_volume
trades_count += 1
vwap_cum_volume_price += price * tick_volume
cum_volume += tick_volume
# Update buy and sell volumes
if price > prev_price:
buy_volume += tick_volume
elif price < prev_price:
sell_volume += tick_volume
prev_price = price
# Save the last processed bar
if trades_count > 0:
ohlcv_bars.append([start_time + current_bar_index * resolution, open_price, high_price, low_price, close_price, volume, trades_count])
vwap = vwap_cum_volume_price / cum_volume if cum_volume > 0 else 0
ohlcv_bars.append([start_time + current_bar_index * resolution, open_price, high_price, low_price, close_price, volume, trades_count, curr_time, vwap, buy_volume, sell_volume])
return np.array(ohlcv_bars)

View File

@ -1,8 +1,7 @@
import os,sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
os.environ["KERAS_BACKEND"] = "jax"
from dotenv import load_dotenv
from v2realbot.config import WEB_API_KEY, DATA_DIR, MEDIA_DIRECTORY, LOG_PATH, MODEL_DIR, ENV_FILE
from v2realbot.config import WEB_API_KEY, DATA_DIR, MEDIA_DIRECTORY, LOG_PATH, MODEL_DIR
from alpaca.data.timeframe import TimeFrame, TimeFrameUnit
from datetime import datetime
from rich import print
@ -12,7 +11,7 @@ import uvicorn
from uuid import UUID
from v2realbot.utils.ilog import get_log_window
from v2realbot.common.model import RunManagerRecord, StrategyInstance, RunnerView, RunRequest, Trade, RunArchive, RunArchiveView, RunArchiveViewPagination, RunArchiveDetail, Bar, RunArchiveChange, TestList, ConfigItem, InstantIndicator, DataTablesRequest, AnalyzerInputs
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Depends, HTTPException, status, WebSocketException, Cookie, Query
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Depends, HTTPException, status, WebSocketException, Cookie, Query, Request
from fastapi.responses import FileResponse, StreamingResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.security import HTTPBasic, HTTPBasicCredentials
@ -36,7 +35,7 @@ from traceback import format_exc
#from v2realbot.reporting.optimizecutoffs import find_optimal_cutoff
import v2realbot.reporting.analyzer as ci
import shutil
from starlette.responses import JSONResponse
from starlette.responses import JSONResponse, HTMLResponse, FileResponse, RedirectResponse
import mlroom
import mlroom.utils.mlutils as ml
from typing import List
@ -75,14 +74,52 @@ def api_key_auth(api_key: str = Depends(X_API_KEY)):
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Forbidden"
)
def authenticate_user(credentials: HTTPBasicCredentials = Depends(HTTPBasic())):
correct_username = "david"
correct_password = "david"
if credentials.username == correct_username and credentials.password == correct_password:
return True
else:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Incorrect username or password",
headers={"WWW-Authenticate": "Basic"},
)
app = FastAPI()
root = os.path.dirname(os.path.abspath(__file__))
app.mount("/static", StaticFiles(html=True, directory=os.path.join(root, 'static')), name="static")
#app.mount("/static", StaticFiles(html=True, directory=os.path.join(root, 'static')), name="static")
app.mount("/media", StaticFiles(directory=str(MEDIA_DIRECTORY)), name="media")
#app.mount("/", StaticFiles(html=True, directory=os.path.join(root, 'static')), name="www")
security = HTTPBasic()
@app.get("/static/{path:path}")
async def static_files(request: Request, path: str, authenticated: bool = Depends(authenticate_user)):
root = os.path.dirname(os.path.abspath(__file__))
static_dir = os.path.join(root, 'static')
if not path or path == "/":
file_path = os.path.join(static_dir, 'index.html')
else:
file_path = os.path.join(static_dir, path)
# Check if path is a directory
if os.path.isdir(file_path):
# If it's a directory, try to serve index.html within that directory
index_path = os.path.join(file_path, 'index.html')
if os.path.exists(index_path):
return FileResponse(index_path)
else:
# Optionally, you can return a directory listing or a custom 404 page here
return HTMLResponse("Directory listing not enabled.", status_code=403)
if not os.path.exists(file_path):
raise HTTPException(status_code=404, detail="File not found")
return FileResponse(file_path)
def get_current_username(
credentials: Annotated[HTTPBasicCredentials, Depends(security)]
@ -104,9 +141,9 @@ async def get_api_key(
return session or api_key
#TODO predelat z Async?
@app.get("/static")
async def get(username: Annotated[str, Depends(get_current_username)]):
return FileResponse("index.html")
# @app.get("/static")
# async def get(username: Annotated[str, Depends(get_current_username)]):
# return FileResponse("index.html")
@app.websocket("/runners/{runner_id}/ws")
async def websocket_endpoint(
@ -1025,7 +1062,6 @@ for i in cs.db.runners:
i.run_thread.join()
if __name__ == "__main__":
load_dotenv(ENV_FILE)
try:
#TOTO predelat na samostatnou tridu typu vlakna a dat do separatniho souboru, draft jiz na chatgpt
#spusteni vlakna pro zapis logů (mame single write vlakno, thready dodávají pres queue)

View File

@ -94,7 +94,9 @@ class ConfigHandler:
return FillCondition(value)
case "BT_FILL_CONDITION_SELL_LIMIT":
return FillCondition(value)
# Add cases for other enumeration conversions as needed
case "AGG_EXCLUDED_TRADES":
return sorted(value) # Convert to sorted
# Add cases for other enumeration conversions or transformations as needed
case _:
return value