{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Test persistance\n", "\n", "ohlcv and trades persistence with bar type and trade filtering and minsize support\n", "\n", "```\n", "/OHLCV/\n", " ├── {bar_type}/ (1s)\n", " │ ├── {resolution}/\n", " │ │ ├── {filtered_trades}-{min_trade_size}/\n", " │ │ │ ├── {day}/\n", " │ │ │ │ └── hashedname.parquet\n", "```" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from v2realbot.tools.loadbatch import load_batch\n", "from v2realbot.utils.utils import zoneNY\n", "import pandas as pd\n", "import numpy as np\n", "import vectorbtpro as vbt\n", "from itables import init_notebook_mode, show\n", "import datetime\n", "from itertools import product\n", "from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR\n", "import v2realbot.utils.config_handler as cfh\n", "init_notebook_mode(all_interactive=True)\n", "from v2realbot.enums.enums import BarType\n", "\n", "vbt.settings.set_theme(\"dark\")\n", "vbt.settings['plotting']['layout']['width'] = 1280\n", "vbt.settings.plotting.auto_rangebreaks = True\n", "# Set the option to display with pagination\n", "pd.set_option('display.notebook_repr_html', True)\n", "pd.set_option('display.max_rows', 10) # Number of rows per page\n", "\n", "# Define the market open and close times\n", "market_open = datetime.time(9, 30)\n", "market_close = datetime.time(16, 0)\n", "entry_window_opens = 1\n", "entry_window_closes = 370\n", "\n", "forced_exit_start = 380\n", "forced_exit_end = 390\n", "\n", "#LOAD FROM BATCH\n", "# res, df = load_batch(batch_id=\"f1ac6651\", #138170bc 0fb5043a bde6d0be f1ac6651\n", "# space_resolution_evenly=False,\n", "# indicators_columns=[\"Rsi14\"],\n", "# main_session_only=True,\n", "# verbose = False)\n", "# if res < 0:\n", "# print(\"Error\" + str(res) + str(df))\n", "# df = df[\"bars\"]\n", "\n", "# basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": df}), tz_convert=zoneNY)\n", "# #m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", "# basic_data = basic_data.transform(lambda df: df.between_time('09:30', '16:00'))\n", "# #basic_data.info()\n", "\n", "#LOAD FROM PARQUET\n", "#list all files is dir directory with parquet extension\n", "dir = DATA_DIR + \"/notebooks/\"\n", "import os\n", "files = [f for f in os.listdir(dir) if f.endswith(\".parquet\")]\n", "print('\\n'.join(map(str, files)))\n", "file_name = \"ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet\"\n", "ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')\n", "\n", "#filter ohlcv_df to certain date range (assuming datetime index)\n", "#ohlcv_df = ohlcv_df.loc[\"2024-05-14 09:30\":\"2024-05-15 09:35\"]\n", "\n", "#add vwap column to ohlcv_df\n", "#ohlcv_df[\"hlcc4\"] = (ohlcv_df[\"close\"] + ohlcv_df[\"high\"] + ohlcv_df[\"low\"] + ohlcv_df[\"close\"]) / 4\n", "\n", "basic_data = vbt.Data.from_data(vbt.symbol_dict({\"BAC\": ohlcv_df}), tz_convert=zoneNY)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#basic_data.data[\"BAC\"].info()\n", "#ohlcv_df group by week number of rows\n", "# ohlcv_df['close'].groupby(pd.Grouper(freq='ME')).mean()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#trade filtering\n", "exclude_conditions = cfh.config_handler.get_val('AGG_EXCLUDED_TRADES') #standard ['C','O','4','B','7','V','P','W','U','Z','F']\n", "minsize = 100\n", "exclude_conditions_str = ''.join(exclude_conditions)\n", "exclude_conditions_str" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "basic_data.data[\"BAC\"].info()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Aim is to store\n", "OHLCV grouped by symbol, day, resolution\n", "and \n", "bar type\n", "excluded_conditions\n", "minsize\n", "main session" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bartype= BarType.TIME\n", "resolution = \"1s\"\n", "trade_filter = exclude_conditions_str+\"-\"+str(minsize)\n", "dir = \"/OHLCV/\"+bartype+\"/\"+resolution+\"/\"+trade_filter+\"/\"\n", "#dir = DATA_DIR + dir\n", "basic_data.to_parquet(partition_by=\"day\", keep_groupby_names=False, path_or_buf=dir, mkdir_kwargs=dict(mkdir=True)) \n", "#partition_by=\"day\",\n", "\n", "#naloaduje partitionvana 1s data skrz 90 dni za 2s\n", "#day_data = vbt.ParquetData.pull(\"BAC\", paths=dir, filters=[(\"group\", \">\", \"2024-01-02\"),(\"group\", \"<=\", \"2024-01-09\")]) #, \n", "# day_data[\"2024-05-01\":\"2024-05-14\"].get()\n", "\n", "# day_data.data[\"BAC\"].info()\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#naloaduje partitionvana 1s data skrz 90 dni za 2s\n", "day_data = vbt.ParquetData.pull(\"BAC\", paths=dir, filters=[(\"group\", \">=\", \"2024-01-02\"),(\"group\", \"<=\", \"2024-01-09\")]) #, \n", "# day_data[\"2024-05-01\":\"2024-05-14\"].get()\n", "\n", "day_data.data[\"BAC\"].info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "close = basic_data.close\n", "#group by close by day, using pandas grouper\n", "#close.groupby(pd.Grouper(freq='ME')).mean()\n", "\n", "#using Grouper of vectorbtpro\n", "#close.vbt.group_by(pd.Grouper(freq='ME')).mean()\n", "\n", "#basic_data.wrapper.get_columns()\n", "basic_data.wrapper.get_freq()\n", "# vbt.pdir(basic_data.wrapper)\n", "# basic_data.wrapper\n", "basic_data.wrapper.grouper.is_grouped()\n", "\n", "vbt.pdir(basic_data.wrapper.grouper)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "grouper = basic_data.wrapper.index.vbt.get_grouper(\"ME\")\n", "\n", "for group, group_idx in grouper:\n", " print(group, group_idx)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#prevede 1milion dat (6mes 1s) na dict za 10ss\n", "df = day_data.data[\"BAC\"]\n", "df_dict = df.to_dict(orient='list')\n", "\n", "# Convert the index (which is the time) to a list of float timestamps\n", "df_dict['time'] = [timestamp.timestamp() for timestamp in df.index]\n", "\n", "df_dict" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" } }, "nbformat": 4, "nbformat_minor": 2 }