vectorbtdoc

This commit is contained in:
David Brazda
2024-04-16 15:53:51 +02:00
parent 919ddf2238
commit 074b6feaf8
4 changed files with 144885 additions and 1 deletions

144566
research/basic.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -347,6 +347,7 @@
<th>testlist_id</th> <th>testlist_id</th>
<th>Running</th> <th>Running</th>
<th>RunnerId</th> <th>RunnerId</th>
<th>Market</th>
</tr> </tr>
</thead> </thead>
<tbody></tbody> <tbody></tbody>

View File

@ -45,7 +45,8 @@ function initialize_runmanagerRecords() {
{data: 'valid_to', visible: true}, {data: 'valid_to', visible: true},
{data: 'testlist_id', visible: true}, {data: 'testlist_id', visible: true},
{data: 'strat_running', visible: true}, {data: 'strat_running', visible: true},
{data: 'runner_id', visible: true}, {data: 'runner_id', visible: true},
{data: 'market', visible: true},
], ],
paging: true, paging: true,
processing: true, processing: true,

View File

@ -0,0 +1,316 @@
import matplotlib
import matplotlib.dates as mdates
#matplotlib.use('Agg') # Set the Matplotlib backend to 'Agg'
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from datetime import datetime
from typing import List
from enum import Enum
import numpy as np
import v2realbot.controller.services as cs
from rich import print
from v2realbot.common.model import AnalyzerInputs
from v2realbot.common.PrescribedTradeModel import TradeDirection, TradeStatus, Trade, TradeStoplossType
from v2realbot.utils.utils import isrising, isfalling,zoneNY, price2dec, safe_get#, print
from pathlib import Path
from v2realbot.config import WEB_API_KEY, DATA_DIR, MEDIA_DIRECTORY
from v2realbot.enums.enums import RecordType, StartBarAlign, Mode, Account, OrderSide
from io import BytesIO
from v2realbot.utils.historicals import get_historical_bars
from alpaca.data.timeframe import TimeFrame, TimeFrameUnit
from collections import defaultdict
from scipy.stats import zscore
from io import BytesIO
from typing import Tuple, Optional, List
from v2realbot.common.PrescribedTradeModel import TradeDirection, TradeStatus, Trade, TradeStoplossType
from collections import Counter
import vectorbtpro as vbt
# Function to add 23 seconds to the last datetime (if it exists and is the same day)
def adjust_datetime_iteratively(df, resolution):
adjusted_times = []
for i, current_time in enumerate(df.index):
if i == 0:
# The first entry is unchanged
adjusted_times.append(current_time)
continue
previous_time = adjusted_times[-1]
# Check if it's the same day
if previous_time.date() == current_time.date():
# Add resolution to the previous datetime
adjusted_time = previous_time + pd.Timedelta(seconds=resolution)
else:
# Different day, leave it as is
adjusted_time = current_time
adjusted_times.append(adjusted_time)
# Update DataFrame index
df.index = pd.DatetimeIndex(adjusted_times)
return df
def convert_to_dataframe(ohlcv):
"""
Convert a dictionary containing OHLCV data into a pandas DataFrame.
Parameters:
ohlcv (dict): Dictionary containing OHLCV data.
It should have keys 'time', 'open', 'high', 'low', 'close', 'volume', 'updated'.
'time' should be a list of float timestamps.
'updated' should be a list of Python datetimes in UTC time zone.
Returns:
pd.DataFrame: DataFrame containing the OHLCV data with the index converted to East coast US time.
"""
#pokud existuje key index, tak menime na custom_index, aby nedelal neplechu v pd
try:
if ohlcv.get('index', False):
ohlcv['custom_index'] = ohlcv.pop('index')
except Exception as e:
pass
#keys that should not go uppercase letter first
keys_not_to_upper = ["time", "updated"]
# Update keys not in the exclusion list
for key in list(ohlcv.keys()): # Iterate over a copy of the keys
if key not in keys_not_to_upper:
ohlcv[key.title()] = ohlcv.pop(key)
# Create DataFrame from the dictionary
df = pd.DataFrame(ohlcv)
# Convert 'time' to datetime and set as index
df['time'] = pd.to_datetime(df['time'], unit='s', utc=True)
df.set_index('time', inplace=True)
# Convert index to East coast US time zone
df.index = df.index.tz_convert('US/Eastern')
if 'updated' in df.columns:
df['updated'] = pd.to_datetime(df['updated'], unit='s', utc=True)
df['updated'] = df['updated'].dt.tz_convert('US/Eastern')
return df
def load_batch(runner_ids: List = None, batch_id: str = None, space_resolution_evenly = False, main_session_only = True, merge_ind2bars = True, bars_columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'Vwap'], indicators_columns = []) -> Tuple[int, dict]:
"""Load batches (all runners from single batch) into pandas dataframes
Args:
runner_ids (List, optional): A list of runner identifiers (e.g., stock tickers). Defaults to None.
batch_id (str, optional): The ID of a specific batch to retrieve. Defaults to None.
merge_ind2bars (bool, optional): merge indicator into bars dataframe. Defaults to True.
bars_columns (list, optional): List of columns to keep in bars df. Defaults to ['Open', 'High', 'Low', 'Close', 'Volume', 'Vwap'].
indicators_columns (list, optional): List of columns to keep in indicators df. Defaults to an empty list.
space_resoution_evenly: If True then, it alters index so it is spaced evenly in given resolution in ['resooution']
Returns:
Tuple[int, dict]: A tuple containing:
* An integer potentially representing a status code or data count.
* A dictionary with keys bars, indicators and cbar_indicators - with pandas dataframe
"""
if runner_ids is None and batch_id is None:
return -2, f"runner_id or batch_id must be present", 0
if batch_id is not None:
res, runner_ids =cs.get_archived_runnerslist_byBatchID(batch_id)
if res != 0:
print(f"no batch {batch_id} found")
return -1, f"no batch {batch_id} found", 0
#DATA PREPARATION
bars = None
indicators = None
cnt = 0
dfs = dict(bars=[], indicators=[],cbar_indicators=[])
resolution = None
for id in runner_ids:
cnt += 1
#get runner detail
res, sada =cs.get_archived_runner_details_byID(id)
if res != 0:
print(f"no runner {id} found")
return -1, f"no runner {id} found", 0
if resolution is None:
resolution = sada["bars"]["resolution"][0]
print(f"Resolution : {resolution}")
#add daily bars limited to required columns, we keep updated as its mapping column to indicators
bars = convert_to_dataframe(sada["bars"])[bars_columns + ["updated"]]
#bars = bars.loc[:, bars_columns]
indicators = convert_to_dataframe(sada["indicators"][0])[indicators_columns]
#join indicators to bars dataframe
if merge_ind2bars:
#merge, time v indicators odpovida udpated v bars
bars = bars.reset_index()
bars = pd.merge(bars, indicators, left_on="updated", right_on="time", how="left")
bars = bars.set_index("time")
else:
dfs["indicators"].append(indicators)
#drop updated as mapping column
#bars = bars.drop("updated", axis=1)
dfs["bars"].append(bars)
#indicators = sada["indicators"][0]
#cbar_indicators = sada["indicators"][1]
#merge all days into single df
for key in dfs:
if len(dfs[key])>0:
concat_df = pd.concat(dfs[key], axis=0)
concat_df = concat_df.between_time('9:30', '16:00') if main_session_only else concat_df
# Count the number of duplicates (excluding the first occurrence)
num_duplicates = concat_df.index.duplicated().sum()
if num_duplicates > 0:
print(f"NOTE: DUPLICATES {num_duplicates}/{len(concat_df)} in {key}. REMOVING.")
concat_df = concat_df[~concat_df.index.duplicated()]
num_duplicates = concat_df.index.duplicated().sum()
print(f"Now there are {num_duplicates}/{len(concat_df)}")
if space_resolution_evenly and key != "cbar_indicators":
# Apply rounding to the datetime index according to resolution (in seconds)
concat_df = adjust_datetime_iteratively(concat_df, resolution)
dfs[key] = concat_df
return 0, dfs
if __name__ == "__main__":
res, df = load_batch(batch_id="e44a5075", space_resolution_evenly=True, indicators_columns=["Rsi14"], main_session_only=False)
if res < 0:
print("Error" + str(res) + str(df))
print(df)
df = df["bars"]
print(df.info(), df.head())
#filter columns
#columns_to_keep = ['Open', 'High', 'Low', 'Close', 'Volume', 'Vwap']
#df = df.loc[:, columns_to_keep]
#df = df.rename(columns={'index': 'custom_index'})
print(df.info(), df.head(), df.describe())
#filter times
#df = df.between_time('9:30', '16:00')
print(df.info())
# Set the frequency to 23 seconds
#df.index.freq = pd.tseries.offsets.Second(23)
# Check the frequency of the index
# Resample and aggregate the data
# resampled_df = df.resample('23S').agg({
# 'open': 'first',
# 'high': 'max',
# 'low': 'min',
# 'close': 'last',
# 'volume': 'sum'
# })
#df.index.freq = pd.infer_freq(df.index)
#print(df.index.freq)
# Set the frequency of the index explicitly - if it exists like 1T etc, if doesnt exists then custom_frequency will be used
#df.index.freq = pd.date_range(start=df.index[0], periods=len(df), freq='23S')
print(df.info())
vbt.settings.set_theme("dark")
vbt.settings['plotting']['layout']['width'] = 1280
vbt.settings.plotting.auto_rangebreaks = True
#naloadujeme do vbt symbol as column
bar_data = vbt.Data.from_data({"BAC": df}, tz_convert="US/Eastern")
print(bar_data)
print(bar_data.close)
print(bar_data.data["BAC"]["Rsi14"])
bar_data.data["BAC"]["Rsi14"].vbt.plot().show()
print(bar_data["Rsi14"])
#ohlcv plot (sublot 2x1)
bar_data.data["BAC"].vbt.ohlcv.plot().show()
#create two subplots 3x1 (ohlcv + RSI)
# fig = vbt.make_subplots(rows=3, cols=1)
# bar_data.data["BAC"].vbt.ohlcv.plot(add_trace_kwargs=dict(row=1, col=1),fig=fig)
# bar_data.data["BAC"]["Rsi14"].vbt.plot(add_trace_kwargs=dict(row=3, col=1),fig=fig)
# fig.show()
#create subplots with alternate Y axis - RSI overlay
fig1 = vbt.make_subplots(specs=[[{"secondary_y": True}]])
bar_data.data["BAC"]["Close"].vbt.plot(add_trace_kwargs=dict(secondary_y=False),fig=fig1)
bar_data.data["BAC"].vbt.plot(add_trace_kwargs=dict(secondary_y=True),fig=fig1)
fig1.show()
puv_df = bar_data.data["BAC"]
bar_data23s = bar_data[["Open", "High", "Low", "Close", "Volume"]]
print(bar_data23s)
#resample by vbt
bar_data46s = bar_data23s.get().resample("46s").agg({
"Open": "first",
"High": "max",
"Low": "min",
"Close": "last",
"Volume": "sum"
})
print(bar_data46s)
res_data = bar_data46s.data["BAC"]
#bar_data23s.data["BAC"].ptable()
#bar_data23s = bar_data.resample("23S")
print(bar_data46s)
print(bar_data46s.close)
vbt.settings.plotting.auto_rangebreaks = True
bar_data46s.data["BAC"].vbt.ohlcv.plot().show()
#TARGET DAYS - only one day or range
# Target Date
#target_date = pd.to_datetime('2023-10-12', tz='US/Eastern')
# Date Range
start_date = pd.to_datetime('2024-03-12')
#end_date = pd.to_datetime('2023-10-14')
new_data = bar_data.transform(lambda df: df[df.index.date == start_date.date()])
#range filtered_data = data[(data.index >= start_date) & (data.index <= end_date)
print(new_data)
new_data.data["BAC"].vbt.ohlcv.plot().show()
# Filtering RANGE or DAY
# filtered_data = data[(data.index >= start_date) & (data.index <= end_date)]g
# filtered_data = data[data.index.date == target_date.date()]
#custom aggregagation
# ohlcv_agg = pd.DataFrame({
# 'Open': df.resample('1T')['Open'].first(),
# 'High': df.resample('1T')['High'].max(),
# 'Low': df.resample('1T')['Low'].min(),
# 'Close': df.resample('1T')['Close'].last(),
# 'Volume': df.resample('1T')['Volume'].sum()
# })
#Define a custom frequency with a timedelta of 23 seconds
# custom_frequency = pd.tseries.offsets.DateOffset(seconds=23)
# # Create a new DataFrame with the desired frequency
# new_index = pd.date_range(start=df.index[0], end=df.index[-1], freq=custom_frequency)
# new_df = pd.DataFrame(index=new_index)
# # Reindex the DataFrame
# df = df.reindex(new_df.index)
# # Now you can check the frequency of the index
# print(df.index.freq)