4.5 MiB
4.5 MiB
How to load and save data to remote database¶
In [3]:
from lightweight_charts import Panel, chart, PlotDFAccessor from v2realbot.utils.utils import zoneNY import pandas as pd import numpy as np import vectorbtpro as vbt # from itables import init_notebook_mode, show import datetime from itertools import product from v2realbot.config import DATA_DIR from IPython.display import display # init_notebook_mode(all_interactive=True) vbt.settings.set_theme("dark") vbt.settings['plotting']['layout']['width'] = 1280 vbt.settings.plotting.auto_rangebreaks = True # Set the option to display with pagination pd.set_option('display.notebook_repr_html', True) pd.set_option('display.max_rows', 10) # Number of rows per page
None Loaded env variables from file None
Activating profile profile1
Fetch STOCKS from db¶
In [20]:
#fetching US-STOCKS ohlcv_1s from lib.db import Connection SYMBOL = "BAC" SCHEMA = "ohlcv_1s" #time based 1s other options ohlcv_vol_200 (volume based ohlcv with resolution of 200), ohlcv_renko_20 (renko with 20 bricks size) ... DB = "market_data" con = Connection(db_name=DB, default_schema=SCHEMA, create_db=True) data = con.pull(symbols=[SYMBOL], schema=SCHEMA,start="2024-08-01", end="2024-09-05", tz_convert='America/New_York') data.data[SYMBOL].info() #1month 1s data - 15s - 24MB
100%|##########| 1/1 [00:10<00:00, 10.52s/it, symbol=BAC]
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 288034 entries, 2024-08-01 09:30:00-04:00 to 2024-09-04 15:59:59-04:00 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 open 288034 non-null float64 1 high 288034 non-null float64 2 low 288034 non-null float64 3 close 288034 non-null float64 4 volume 288034 non-null float64 5 trades 288034 non-null float64 6 updated 288034 non-null datetime64[ns, UTC] 7 vwap 288034 non-null float64 8 buyvolume 288034 non-null float64 9 sellvolume 288034 non-null float64 dtypes: datetime64[ns, UTC](1), float64(9) memory usage: 24.2 MB
In [25]:
data.xloc["2024-08-05":"2024-08-10"].data[SYMBOL].close.lw.plot()
Fetch CRYPTO¶
In [12]:
#fetching CRYPTO from db from krakenfutures exchange from lib.db import Connection SYMBOL = "BTC/USD" con = Connection(db_name="market_data", default_schema="krakenfutures", create_db=True) data = con.pull(symbols=[SYMBOL], start="2024-01-01", end="2025-01-01")
100%|##########| 1/1 [00:10<00:00, 10.14s/it, symbol=BTC/USD]
Load data from local CACHE¶
In [4]:
# Define the market open and close times market_open = datetime.time(9, 30) market_close = datetime.time(16, 0) entry_window_opens = 1 entry_window_closes = 370 forced_exit_start = 380 forced_exit_end = 390 #LOAD FROM local PARQUET cache #list all files is dir directory with parquet extension dir = DATA_DIR + "/notebooks/" import os files = [f for f in os.listdir(dir) if f.endswith(".parquet")] print('\n'.join(map(str, files))) #file_name = "ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet" #file_name = "ohlcv_df-BAC-2023-01-01T09:30:00-2024-10-02T16:00:00-['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']-100.parquet" file_name = "ohlcv_df-BAC-2024-10-03T09:30:00-2024-10-16T16:00:00-['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']-100.parquet" ohlcv_df = pd.read_parquet(dir+file_name,engine='pyarrow') #filter ohlcv_df to certain date range (assuming datetime index) #ohlcv_df = ohlcv_df.loc["2024-02-12 9:30":"2024-02-16 16:00"] #add vwap column to ohlcv_df #ohlcv_df["hlcc4"] = (ohlcv_df["close"] + ohlcv_df["high"] + ohlcv_df["low"] + ohlcv_df["close"]) / 4 basic_data = vbt.Data.from_data(vbt.symbol_dict({"BAC": ohlcv_df}), tz_convert=zoneNY) #ohlcv_df= None # basic_data.wrapper.index.normalize().nunique() # bd = basic_data.transform(lambda df: df[:1000000]) basic_data.data["BAC"].info()
trades_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet trades_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet trades_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet trades_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet ohlcv_df-BAC-2024-01-11T09:30:00-2024-01-12T16:00:00.parquet trades_df-BAC-2024-10-03T09:30:00-2024-10-16T16:00:00-['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']-100.parquet trades_df-BAC-2023-01-01T09:30:00-2024-10-02T16:00:00-['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']-100.parquet trades_df-BAC-2024-05-15T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet ohlcv_df-BAC-2024-10-03T09:30:00-2024-10-16T16:00:00-['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']-100.parquet ohlcv_df-BAC-2023-01-01T09:30:00-2024-10-02T16:00:00-['4', '7', 'B', 'C', 'F', 'O', 'P', 'U', 'V', 'W', 'Z']-100.parquet ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet ohlcv_df-SPY-2024-01-01T09:30:00-2024-05-14T16:00:00.parquet ohlcv_df-BAC-2024-01-01T09_30_00-2024-05-14T16_00_00-CO4B7VPWUZF-100.parquet ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T16_00_00-47BCFOPUVWZ-100.parquet ohlcv_df-BAC-2023-01-01T09_30_00-2024-05-25T15_30_00-47BCFOPUVWZ-100.parquet <class 'pandas.core.frame.DataFrame'> DatetimeIndex: 114097 entries, 2024-10-03 09:30:00-04:00 to 2024-10-16 15:59:59-04:00 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 open 114097 non-null float64 1 high 114097 non-null float64 2 low 114097 non-null float64 3 close 114097 non-null float64 4 volume 114097 non-null float64 5 trades 114097 non-null float64 6 updated 114097 non-null datetime64[ns, US/Eastern] 7 vwap 114097 non-null float64 8 buyvolume 114097 non-null float64 9 sellvolume 114097 non-null float64 dtypes: datetime64[ns, US/Eastern](1), float64(9) memory usage: 9.6 MB
In [5]:
basic_data.wrapper.index.normalize().nunique()
Out[5]:
10
Saving to DATABASE¶
In [6]:
%load_ext autoreload %autoreload 2 from lib.db import Connection SYMBOL = "BAC" SCHEMA = "ohlcv_1s" #time based 1s other options ohlcv_vol_200 (volume based ohlcv with resolution of 200), ohlcv_renko_20 (renko with 20 bricks size) ... DB = "market_data" try: con = Connection(db_name=DB, default_schema=SCHEMA, create_db=False) #con.force_rollback_all() # Force cleanup before attempting to save con.save(basic_data, chunksize=500) except Exception as e: print(f"Error occurred: {str(e)}") #63sec to insert remotely 5mio records
The autoreload extension is already loaded. To reload it, use: %reload_ext autoreload
In [15]:
basic_data.data["BAC"].info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 4549772 entries, 2023-01-03 09:30:01-05:00 to 2024-05-24 15:59:59-04:00 Data columns (total 10 columns): # Column Dtype --- ------ ----- 0 open float64 1 high float64 2 low float64 3 close float64 4 volume float64 5 trades float64 6 updated datetime64[ns, US/Eastern] 7 vwap float64 8 buyvolume float64 9 sellvolume float64 dtypes: datetime64[ns, US/Eastern](1), float64(9) memory usage: 381.8 MB
Add resample function to custom columns
Resample to different resolution¶
In [4]:
from vectorbtpro.utils.config import merge_dicts, Config, HybridConfig from vectorbtpro import _typing as tp from vectorbtpro.generic import nb as generic_nb _feature_config: tp.ClassVar[Config] = HybridConfig( { "buyvolume": dict( resample_func=lambda self, obj, resampler: obj.vbt.resample_apply( resampler, generic_nb.sum_reduce_nb, ) ), "sellvolume": dict( resample_func=lambda self, obj, resampler: obj.vbt.resample_apply( resampler, generic_nb.sum_reduce_nb, ) ), "trades": dict( resample_func=lambda self, obj, resampler: obj.vbt.resample_apply( resampler, generic_nb.sum_reduce_nb, ) ) } ) basic_data._feature_config = _feature_config
In [5]:
s1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','trades','sellvolume']] s5data = s1data.resample("12s") s5data = s5data.transform(lambda df: df.between_time('09:30', '16:00').dropna()) t1data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','trades','sellvolume']].resample("1T") t1data = t1data.transform(lambda df: df.between_time('09:30', '16:00').dropna()) # t1data.data["BAC"].info() t30data = basic_data[['open', 'high', 'low', 'close', 'volume','vwap','buyvolume','trades','sellvolume']].resample("30T") t30data = t30data.transform(lambda df: df.between_time('09:30', '16:00').dropna()) # t30data.data["BAC"].info() s1close = s1data.close t1close = t1data.close t30close = t30data.close t30volume = t30data.volume
In [6]:
from lightweight_charts import JupyterChart, chart, Panel, PlotAccessor s5data.close.lw.plot() # pane1 = Panel( # ohlcv=(s5data.ohlcv.get(),)) # # Create the chart with the panel # ch = chart([pane1], title="Chart", sync=True, session=None, size="s")
In [6]:
s1data.data["BAC"].head()
Out[6]:
<style scoped="">
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| open | high | low | close | volume | vwap | buyvolume | trades | sellvolume | |
|---|---|---|---|---|---|---|---|---|---|
| time | |||||||||
| 2024-02-12 09:30:00-05:00 | 33.000 | 33.00 | 33.00 | 33.000 | 800.0 | 33.000000 | 0.0 | 2.0 | 0.0 |
| 2024-02-12 09:30:01-05:00 | 33.020 | 33.05 | 33.00 | 33.010 | 267365.0 | 33.020039 | 265765.0 | 25.0 | 1000.0 |
| 2024-02-12 09:30:02-05:00 | 33.000 | 33.02 | 33.00 | 33.015 | 1100.0 | 33.009545 | 500.0 | 9.0 | 600.0 |
| 2024-02-12 09:30:03-05:00 | 33.005 | 33.08 | 32.99 | 33.080 | 7508.0 | 33.027898 | 1970.0 | 32.0 | 3638.0 |
| 2024-02-12 09:30:05-05:00 | 33.060 | 33.06 | 33.06 | 33.060 | 500.0 | 33.060000 | 0.0 | 5.0 | 100.0 |
In [7]:
#resample on specific index resampler = vbt.Resampler(t30data.index, s1data.index, source_freq="30T", target_freq="1s") t30close_realigned = t30close.vbt.realign_closing(resampler) #resample 1min to s resampler_s = vbt.Resampler(t1data.index, s1data.index, source_freq="1T", target_freq="1s") t1close_realigned = t1close.vbt.realign_closing(resampler_s)