Files
strategy-lab/research/data/archive/OUTDATED_ohlc_persistance_structure.ipynb
David Brazda 6b93e53ab1 daily update
2024-10-04 08:15:16 +02:00

7.6 KiB

Experiment with file persistence structure

ohlcv and trades persistence with bar type and trade filtering and minsize support

/OHLCV/
    ├── {bar_type}/  (1s)
    │   ├── {resolution}/
    │   │   ├── {filtered_trades}-{min_trade_size}/
    │   │   │   ├── {day}/
    │   │   │   │  └── hashedname.parquet
In [ ]:
from v2realbot.tools.loadbatch import load_batch
from v2realbot.utils.utils import zoneNY
import pandas as pd
import numpy as np
import vectorbtpro as vbt
from itables import init_notebook_mode, show
import datetime
from itertools import product
from v2realbot.config import ACCOUNT1_PAPER_API_KEY, ACCOUNT1_PAPER_SECRET_KEY, DATA_DIR
import v2realbot.utils.config_handler as cfh
init_notebook_mode(all_interactive=True)
from v2realbot.enums.enums import BarType

vbt.settings.set_theme("dark")
vbt.settings['plotting']['layout']['width'] = 1280
vbt.settings.plotting.auto_rangebreaks = True
# Set the option to display with pagination
pd.set_option('display.notebook_repr_html', True)
pd.set_option('display.max_rows', 10)  # Number of rows per page

# Define the market open and close times
market_open = datetime.time(9, 30)
market_close = datetime.time(16, 0)
entry_window_opens = 1
entry_window_closes = 370

forced_exit_start = 380
forced_exit_end = 390

#LOAD FROM BATCH
# res, df = load_batch(batch_id="f1ac6651", #138170bc 0fb5043a  bde6d0be f1ac6651
#                      space_resolution_evenly=False,
#                      indicators_columns=["Rsi14"],
#                      main_session_only=True,
#                      verbose = False)
# if res < 0:
#     print("Error" + str(res) + str(df))
# df = df["bars"]

# basic_data = vbt.Data.from_data(vbt.symbol_dict({"BAC": df}), tz_convert=zoneNY)
# #m1_data = basic_data[['Open', 'High', 'Low', 'Close', 'Volume']]
# basic_data = basic_data.transform(lambda df: df.between_time('09:30', '16:00'))
# #basic_data.info()

#LOAD FROM PARQUET
#list all files is dir directory with parquet extension
dir = DATA_DIR + "/notebooks/"
import os
files = [f for f in os.listdir(dir) if f.endswith(".parquet")]
print('\n'.join(map(str, files)))
file_name = "ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet"
ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')

#filter ohlcv_df to certain date range (assuming datetime index)
#ohlcv_df = ohlcv_df.loc["2024-05-14 09:30":"2024-05-15 09:35"]

#add vwap column to ohlcv_df
#ohlcv_df["hlcc4"] = (ohlcv_df["close"] + ohlcv_df["high"] + ohlcv_df["low"] + ohlcv_df["close"]) / 4

basic_data = vbt.Data.from_data(vbt.symbol_dict({"BAC": ohlcv_df}), tz_convert=zoneNY)
In [ ]:
#basic_data.data["BAC"].info()
#ohlcv_df group by week number of rows
# ohlcv_df['close'].groupby(pd.Grouper(freq='ME')).mean()
In [ ]:
#trade filtering
exclude_conditions = cfh.config_handler.get_val('AGG_EXCLUDED_TRADES') #standard ['C','O','4','B','7','V','P','W','U','Z','F']
minsize = 100
exclude_conditions_str = ''.join(exclude_conditions)
exclude_conditions_str
In [ ]:
basic_data.data["BAC"].info()

Aim is to store OHLCV grouped by symbol, day, resolution and bar type excluded_conditions minsize main session

In [ ]:
bartype= BarType.TIME
resolution = "1s"
trade_filter = exclude_conditions_str+"-"+str(minsize)
dir = "/OHLCV/"+bartype+"/"+resolution+"/"+trade_filter+"/"
#dir = DATA_DIR + dir
basic_data.to_parquet(partition_by="day", keep_groupby_names=False, path_or_buf=dir, mkdir_kwargs=dict(mkdir=True))  
#partition_by="day",

#naloaduje partitionvana 1s data skrz 90 dni za 2s
#day_data = vbt.ParquetData.pull("BAC", paths=dir, filters=[("group", ">", "2024-01-02"),("group", "<=", "2024-01-09")]) #, 
# day_data["2024-05-01":"2024-05-14"].get()

# day_data.data["BAC"].info()
In [ ]:
#naloaduje partitionvana 1s data skrz 90 dni za 2s
day_data = vbt.ParquetData.pull("BAC", paths=dir, filters=[("group", ">=", "2024-01-02"),("group", "<=", "2024-01-09")]) #, 
# day_data["2024-05-01":"2024-05-14"].get()

day_data.data["BAC"].info()
In [ ]:
close = basic_data.close
#group by close by day, using pandas grouper
#close.groupby(pd.Grouper(freq='ME')).mean()

#using Grouper of vectorbtpro
#close.vbt.group_by(pd.Grouper(freq='ME')).mean()

#basic_data.wrapper.get_columns()
basic_data.wrapper.get_freq()
# vbt.pdir(basic_data.wrapper)
# basic_data.wrapper
basic_data.wrapper.grouper.is_grouped()

vbt.pdir(basic_data.wrapper.grouper)
In [ ]:
grouper = basic_data.wrapper.index.vbt.get_grouper("ME")

for group, group_idx in grouper:
    print(group, group_idx)
In [ ]:
df
In [ ]:
#prevede 1milion dat (6mes 1s) na dict za 10ss
df = day_data.data["BAC"]
df_dict = df.to_dict(orient='list')

# Convert the index (which is the time) to a list of float timestamps
df_dict['time'] = [timestamp.timestamp() for timestamp in df.index]

df_dict