Files
strategy-lab/research/data/db_loader_saver.ipynb
David Brazda 6b93e53ab1 daily update
2024-10-04 08:15:16 +02:00

4.5 MiB

How to load and save data to remote database

In [19]:
from lightweight_charts import Panel, chart, PlotAccessor
from v2realbot.utils.utils import zoneNY
import pandas as pd
import numpy as np
import vectorbtpro as vbt
# from itables import init_notebook_mode, show
import datetime
from itertools import product
from v2realbot.config import DATA_DIR
from IPython.display import display

# init_notebook_mode(all_interactive=True)

vbt.settings.set_theme("dark")
vbt.settings['plotting']['layout']['width'] = 1280
vbt.settings.plotting.auto_rangebreaks = True
# Set the option to display with pagination
pd.set_option('display.notebook_repr_html', True)
pd.set_option('display.max_rows', 10)  # Number of rows per page

Fetch STOCKS from db

In [20]:
#fetching US-STOCKS ohlcv_1s
from lib.db import Connection
SYMBOL = "BAC"
SCHEMA = "ohlcv_1s" #time based 1s other options ohlcv_vol_200 (volume based ohlcv with resolution of 200), ohlcv_renko_20 (renko with 20 bricks size) ...
DB = "market_data"

con = Connection(db_name=DB, default_schema=SCHEMA, create_db=True)
data = con.pull(symbols=[SYMBOL], schema=SCHEMA,start="2024-08-01", end="2024-09-05", tz_convert='America/New_York')

data.data[SYMBOL].info()
#1month 1s data - 15s - 24MB
100%|##########| 1/1 [00:10<00:00, 10.52s/it, symbol=BAC]
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 288034 entries, 2024-08-01 09:30:00-04:00 to 2024-09-04 15:59:59-04:00
Data columns (total 10 columns):
 #   Column      Non-Null Count   Dtype              
---  ------      --------------   -----              
 0   open        288034 non-null  float64            
 1   high        288034 non-null  float64            
 2   low         288034 non-null  float64            
 3   close       288034 non-null  float64            
 4   volume      288034 non-null  float64            
 5   trades      288034 non-null  float64            
 6   updated     288034 non-null  datetime64[ns, UTC]
 7   vwap        288034 non-null  float64            
 8   buyvolume   288034 non-null  float64            
 9   sellvolume  288034 non-null  float64            
dtypes: datetime64[ns, UTC](1), float64(9)
memory usage: 24.2 MB
In [25]:
data.xloc["2024-08-05":"2024-08-10"].data[SYMBOL].close.lw.plot()

Fetch CRYPTO

In [12]:
#fetching CRYPTO from db from krakenfutures exchange
from lib.db import Connection
SYMBOL = "BTC/USD"
con = Connection(db_name="market_data", default_schema="krakenfutures", create_db=True)
data = con.pull(symbols=[SYMBOL], start="2024-01-01", end="2025-01-01")
100%|##########| 1/1 [00:10<00:00, 10.14s/it, symbol=BTC/USD]

Load data from local CACHE

In [5]:
# Define the market open and close times
market_open = datetime.time(9, 30)
market_close = datetime.time(16, 0)
entry_window_opens = 1
entry_window_closes = 370
forced_exit_start = 380
forced_exit_end = 390

#LOAD FROM local PARQUET cache
#list all files is dir directory with parquet extension
dir = DATA_DIR + "/notebooks/"
import os
files = [f for f in os.listdir(dir) if f.endswith(".parquet")]
print('\n'.join(map(str, files)))
#file_name = "ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet"
file_name = "ohlcv_df-BAC-2023-01-01T09:30:00-2024-10-02T16:00:00-['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']-100.parquet"
ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow')
#filter ohlcv_df to certain date range (assuming datetime index)
#ohlcv_df = ohlcv_df.loc["2024-02-12 9:30":"2024-02-16 16:00"]

#add vwap column to ohlcv_df
#ohlcv_df["hlcc4"] = (ohlcv_df["close"] + ohlcv_df["high"] + ohlcv_df["low"] + ohlcv_df["close"]) / 4

basic_data = vbt.Data.from_data(vbt.symbol_dict({"BAC": ohlcv_df}), tz_convert=zoneNY)
#ohlcv_df= None
basic_data.wrapper.index.normalize().nunique()
bd = basic_data.transform(lambda df: df[:1000000])
bd.data["BAC"].info()
trades_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet
trades_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet
trades_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet
trades_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet
ohlcv_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet
trades_df-BAC-2023-01-01T09:30:00-2024-10-02T16:00:00-['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']-100.parquet
trades_df-BAC-2024-05-15T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet
ohlcv_df-BAC-2023-01-01T09:30:00-2024-10-02T16:00:00-['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']-100.parquet
ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet
ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet
ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet
ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet
ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet
Out[5]:
440

Saving to DATABASE

In [5]:
from lib.db import Connection
SYMBOL = "BAC"
SCHEMA = "ohlcv_1s" #time based 1s other options ohlcv_vol_200 (volume based ohlcv with resolution of 200), ohlcv_renko_20 (renko with 20 bricks size) ...
DB = "market_data"

try:
    con = Connection(db_name=DB, default_schema=SCHEMA, create_db=True)
    con.force_rollback_all()  # Force cleanup before attempting to save
    con.save(basic_data,1000)
except Exception as e:
    print(f"Error occurred: {str(e)}")

#63sec to insert remotely 5mio records
In [15]:
basic_data.data["BAC"].info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4549772 entries, 2023-01-03 09:30:01-05:00 to 2024-05-24 15:59:59-04:00
Data columns (total 10 columns):
 #   Column      Dtype                     
---  ------      -----                     
 0   open        float64                   
 1   high        float64                   
 2   low         float64                   
 3   close       float64                   
 4   volume      float64                   
 5   trades      float64                   
 6   updated     datetime64[ns, US/Eastern]
 7   vwap        float64                   
 8   buyvolume   float64                   
 9   sellvolume  float64                   
dtypes: datetime64[ns, US/Eastern](1), float64(9)
memory usage: 381.8 MB

Add resample function to custom columns

Resample to different resolution

In [4]:
from vectorbtpro.utils.config import merge_dicts, Config, HybridConfig
from vectorbtpro import _typing as tp
from vectorbtpro.generic import nb as generic_nb

_feature_config: tp.ClassVar[Config] = HybridConfig(
    {
        "buyvolume": dict(
            resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(
                resampler,
                generic_nb.sum_reduce_nb,
            )
        ),
        "sellvolume": dict(
            resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(
                resampler,
                generic_nb.sum_reduce_nb,
            )
        ),
        "trades": dict(
            resample_func=lambda self, obj, resampler: obj.vbt.resample_apply(
                resampler,
                generic_nb.sum_reduce_nb,
            )
        )
    }
)

basic_data._feature_config = _feature_config
In [5]:
s1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','trades','sellvolume']]

s5data = s1data.resample("12s")
s5data = s5data.transform(lambda df: df.between_time('09:30', '16:00').dropna())

t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','trades','sellvolume']].resample("1T")
t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna())
# t1data.data["BAC"].info()

t30data  = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','trades','sellvolume']].resample("30T")
t30data = t30data.transform(lambda df: df.between_time('09:30', '16:00').dropna())
# t30data.data["BAC"].info()

s1close = s1data.close
t1close = t1data.close
t30close = t30data.close
t30volume = t30data.volume
In [6]:
from lightweight_charts import JupyterChart, chart, Panel, PlotAccessor
s5data.close.lw.plot()

# pane1 = Panel(
# ohlcv=(s5data.ohlcv.get(),))

# # Create the chart with the panel
# ch = chart([pane1], title="Chart", sync=True, session=None, size="s")
In [6]:
s1data.data["BAC"].head()
Out[6]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
open high low close volume vwap buyvolume trades sellvolume
time
2024-02-12 09:30:00-05:00 33.000 33.00 33.00 33.000 800.0 33.000000 0.0 2.0 0.0
2024-02-12 09:30:01-05:00 33.020 33.05 33.00 33.010 267365.0 33.020039 265765.0 25.0 1000.0
2024-02-12 09:30:02-05:00 33.000 33.02 33.00 33.015 1100.0 33.009545 500.0 9.0 600.0
2024-02-12 09:30:03-05:00 33.005 33.08 32.99 33.080 7508.0 33.027898 1970.0 32.0 3638.0
2024-02-12 09:30:05-05:00 33.060 33.06 33.06 33.060 500.0 33.060000 0.0 5.0 100.0
In [7]:
#resample on specific index 
resampler = vbt.Resampler(t30data.index, s1data.index, source_freq="30T", target_freq="1s")
t30close_realigned = t30close.vbt.realign_closing(resampler)

#resample 1min to s
resampler_s = vbt.Resampler(t1data.index, s1data.index, source_freq="1T", target_freq="1s")
t1close_realigned = t1close.vbt.realign_closing(resampler_s)