vectorbtdoc
This commit is contained in:
144566
research/basic.ipynb
Normal file
144566
research/basic.ipynb
Normal file
File diff suppressed because one or more lines are too long
@ -347,6 +347,7 @@
|
||||
<th>testlist_id</th>
|
||||
<th>Running</th>
|
||||
<th>RunnerId</th>
|
||||
<th>Market</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody></tbody>
|
||||
|
||||
@ -45,7 +45,8 @@ function initialize_runmanagerRecords() {
|
||||
{data: 'valid_to', visible: true},
|
||||
{data: 'testlist_id', visible: true},
|
||||
{data: 'strat_running', visible: true},
|
||||
{data: 'runner_id', visible: true},
|
||||
{data: 'runner_id', visible: true},
|
||||
{data: 'market', visible: true},
|
||||
],
|
||||
paging: true,
|
||||
processing: true,
|
||||
|
||||
316
v2realbot/tools/loadbatch.py
Normal file
316
v2realbot/tools/loadbatch.py
Normal file
@ -0,0 +1,316 @@
|
||||
import matplotlib
|
||||
import matplotlib.dates as mdates
|
||||
#matplotlib.use('Agg') # Set the Matplotlib backend to 'Agg'
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
from enum import Enum
|
||||
import numpy as np
|
||||
import v2realbot.controller.services as cs
|
||||
from rich import print
|
||||
from v2realbot.common.model import AnalyzerInputs
|
||||
from v2realbot.common.PrescribedTradeModel import TradeDirection, TradeStatus, Trade, TradeStoplossType
|
||||
from v2realbot.utils.utils import isrising, isfalling,zoneNY, price2dec, safe_get#, print
|
||||
from pathlib import Path
|
||||
from v2realbot.config import WEB_API_KEY, DATA_DIR, MEDIA_DIRECTORY
|
||||
from v2realbot.enums.enums import RecordType, StartBarAlign, Mode, Account, OrderSide
|
||||
from io import BytesIO
|
||||
from v2realbot.utils.historicals import get_historical_bars
|
||||
from alpaca.data.timeframe import TimeFrame, TimeFrameUnit
|
||||
from collections import defaultdict
|
||||
from scipy.stats import zscore
|
||||
from io import BytesIO
|
||||
from typing import Tuple, Optional, List
|
||||
from v2realbot.common.PrescribedTradeModel import TradeDirection, TradeStatus, Trade, TradeStoplossType
|
||||
from collections import Counter
|
||||
import vectorbtpro as vbt
|
||||
|
||||
# Function to add 23 seconds to the last datetime (if it exists and is the same day)
|
||||
def adjust_datetime_iteratively(df, resolution):
|
||||
adjusted_times = []
|
||||
for i, current_time in enumerate(df.index):
|
||||
if i == 0:
|
||||
# The first entry is unchanged
|
||||
adjusted_times.append(current_time)
|
||||
continue
|
||||
|
||||
previous_time = adjusted_times[-1]
|
||||
# Check if it's the same day
|
||||
if previous_time.date() == current_time.date():
|
||||
# Add resolution to the previous datetime
|
||||
adjusted_time = previous_time + pd.Timedelta(seconds=resolution)
|
||||
else:
|
||||
# Different day, leave it as is
|
||||
adjusted_time = current_time
|
||||
|
||||
adjusted_times.append(adjusted_time)
|
||||
|
||||
# Update DataFrame index
|
||||
df.index = pd.DatetimeIndex(adjusted_times)
|
||||
return df
|
||||
|
||||
def convert_to_dataframe(ohlcv):
|
||||
"""
|
||||
Convert a dictionary containing OHLCV data into a pandas DataFrame.
|
||||
|
||||
Parameters:
|
||||
ohlcv (dict): Dictionary containing OHLCV data.
|
||||
It should have keys 'time', 'open', 'high', 'low', 'close', 'volume', 'updated'.
|
||||
'time' should be a list of float timestamps.
|
||||
'updated' should be a list of Python datetimes in UTC time zone.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: DataFrame containing the OHLCV data with the index converted to East coast US time.
|
||||
"""
|
||||
|
||||
#pokud existuje key index, tak menime na custom_index, aby nedelal neplechu v pd
|
||||
try:
|
||||
if ohlcv.get('index', False):
|
||||
ohlcv['custom_index'] = ohlcv.pop('index')
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
#keys that should not go uppercase letter first
|
||||
keys_not_to_upper = ["time", "updated"]
|
||||
|
||||
# Update keys not in the exclusion list
|
||||
for key in list(ohlcv.keys()): # Iterate over a copy of the keys
|
||||
if key not in keys_not_to_upper:
|
||||
ohlcv[key.title()] = ohlcv.pop(key)
|
||||
|
||||
# Create DataFrame from the dictionary
|
||||
df = pd.DataFrame(ohlcv)
|
||||
|
||||
# Convert 'time' to datetime and set as index
|
||||
df['time'] = pd.to_datetime(df['time'], unit='s', utc=True)
|
||||
df.set_index('time', inplace=True)
|
||||
# Convert index to East coast US time zone
|
||||
df.index = df.index.tz_convert('US/Eastern')
|
||||
if 'updated' in df.columns:
|
||||
df['updated'] = pd.to_datetime(df['updated'], unit='s', utc=True)
|
||||
df['updated'] = df['updated'].dt.tz_convert('US/Eastern')
|
||||
|
||||
return df
|
||||
|
||||
def load_batch(runner_ids: List = None, batch_id: str = None, space_resolution_evenly = False, main_session_only = True, merge_ind2bars = True, bars_columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'Vwap'], indicators_columns = []) -> Tuple[int, dict]:
|
||||
"""Load batches (all runners from single batch) into pandas dataframes
|
||||
|
||||
Args:
|
||||
runner_ids (List, optional): A list of runner identifiers (e.g., stock tickers). Defaults to None.
|
||||
batch_id (str, optional): The ID of a specific batch to retrieve. Defaults to None.
|
||||
merge_ind2bars (bool, optional): merge indicator into bars dataframe. Defaults to True.
|
||||
bars_columns (list, optional): List of columns to keep in bars df. Defaults to ['Open', 'High', 'Low', 'Close', 'Volume', 'Vwap'].
|
||||
indicators_columns (list, optional): List of columns to keep in indicators df. Defaults to an empty list.
|
||||
space_resoution_evenly: If True then, it alters index so it is spaced evenly in given resolution in ['resooution']
|
||||
Returns:
|
||||
Tuple[int, dict]: A tuple containing:
|
||||
* An integer potentially representing a status code or data count.
|
||||
* A dictionary with keys bars, indicators and cbar_indicators - with pandas dataframe
|
||||
"""
|
||||
|
||||
if runner_ids is None and batch_id is None:
|
||||
return -2, f"runner_id or batch_id must be present", 0
|
||||
|
||||
if batch_id is not None:
|
||||
res, runner_ids =cs.get_archived_runnerslist_byBatchID(batch_id)
|
||||
|
||||
if res != 0:
|
||||
print(f"no batch {batch_id} found")
|
||||
return -1, f"no batch {batch_id} found", 0
|
||||
|
||||
#DATA PREPARATION
|
||||
bars = None
|
||||
indicators = None
|
||||
cnt = 0
|
||||
dfs = dict(bars=[], indicators=[],cbar_indicators=[])
|
||||
resolution = None
|
||||
for id in runner_ids:
|
||||
cnt += 1
|
||||
#get runner detail
|
||||
res, sada =cs.get_archived_runner_details_byID(id)
|
||||
if res != 0:
|
||||
print(f"no runner {id} found")
|
||||
return -1, f"no runner {id} found", 0
|
||||
|
||||
if resolution is None:
|
||||
resolution = sada["bars"]["resolution"][0]
|
||||
print(f"Resolution : {resolution}")
|
||||
|
||||
#add daily bars limited to required columns, we keep updated as its mapping column to indicators
|
||||
bars = convert_to_dataframe(sada["bars"])[bars_columns + ["updated"]]
|
||||
#bars = bars.loc[:, bars_columns]
|
||||
|
||||
indicators = convert_to_dataframe(sada["indicators"][0])[indicators_columns]
|
||||
|
||||
#join indicators to bars dataframe
|
||||
if merge_ind2bars:
|
||||
#merge, time v indicators odpovida udpated v bars
|
||||
bars = bars.reset_index()
|
||||
bars = pd.merge(bars, indicators, left_on="updated", right_on="time", how="left")
|
||||
bars = bars.set_index("time")
|
||||
else:
|
||||
dfs["indicators"].append(indicators)
|
||||
|
||||
#drop updated as mapping column
|
||||
#bars = bars.drop("updated", axis=1)
|
||||
dfs["bars"].append(bars)
|
||||
|
||||
#indicators = sada["indicators"][0]
|
||||
#cbar_indicators = sada["indicators"][1]
|
||||
#merge all days into single df
|
||||
for key in dfs:
|
||||
if len(dfs[key])>0:
|
||||
concat_df = pd.concat(dfs[key], axis=0)
|
||||
concat_df = concat_df.between_time('9:30', '16:00') if main_session_only else concat_df
|
||||
|
||||
# Count the number of duplicates (excluding the first occurrence)
|
||||
num_duplicates = concat_df.index.duplicated().sum()
|
||||
|
||||
if num_duplicates > 0:
|
||||
print(f"NOTE: DUPLICATES {num_duplicates}/{len(concat_df)} in {key}. REMOVING.")
|
||||
concat_df = concat_df[~concat_df.index.duplicated()]
|
||||
|
||||
num_duplicates = concat_df.index.duplicated().sum()
|
||||
print(f"Now there are {num_duplicates}/{len(concat_df)}")
|
||||
|
||||
if space_resolution_evenly and key != "cbar_indicators":
|
||||
# Apply rounding to the datetime index according to resolution (in seconds)
|
||||
concat_df = adjust_datetime_iteratively(concat_df, resolution)
|
||||
|
||||
dfs[key] = concat_df
|
||||
return 0, dfs
|
||||
|
||||
if __name__ == "__main__":
|
||||
res, df = load_batch(batch_id="e44a5075", space_resolution_evenly=True, indicators_columns=["Rsi14"], main_session_only=False)
|
||||
if res < 0:
|
||||
print("Error" + str(res) + str(df))
|
||||
print(df)
|
||||
df = df["bars"]
|
||||
print(df.info(), df.head())
|
||||
#filter columns
|
||||
#columns_to_keep = ['Open', 'High', 'Low', 'Close', 'Volume', 'Vwap']
|
||||
#df = df.loc[:, columns_to_keep]
|
||||
#df = df.rename(columns={'index': 'custom_index'})
|
||||
print(df.info(), df.head(), df.describe())
|
||||
#filter times
|
||||
#df = df.between_time('9:30', '16:00')
|
||||
print(df.info())
|
||||
# Set the frequency to 23 seconds
|
||||
#df.index.freq = pd.tseries.offsets.Second(23)
|
||||
# Check the frequency of the index
|
||||
|
||||
# Resample and aggregate the data
|
||||
# resampled_df = df.resample('23S').agg({
|
||||
# 'open': 'first',
|
||||
# 'high': 'max',
|
||||
# 'low': 'min',
|
||||
# 'close': 'last',
|
||||
# 'volume': 'sum'
|
||||
# })
|
||||
|
||||
#df.index.freq = pd.infer_freq(df.index)
|
||||
#print(df.index.freq)
|
||||
|
||||
|
||||
# Set the frequency of the index explicitly - if it exists like 1T etc, if doesnt exists then custom_frequency will be used
|
||||
#df.index.freq = pd.date_range(start=df.index[0], periods=len(df), freq='23S')
|
||||
|
||||
print(df.info())
|
||||
|
||||
vbt.settings.set_theme("dark")
|
||||
vbt.settings['plotting']['layout']['width'] = 1280
|
||||
vbt.settings.plotting.auto_rangebreaks = True
|
||||
|
||||
#naloadujeme do vbt symbol as column
|
||||
bar_data = vbt.Data.from_data({"BAC": df}, tz_convert="US/Eastern")
|
||||
print(bar_data)
|
||||
print(bar_data.close)
|
||||
|
||||
print(bar_data.data["BAC"]["Rsi14"])
|
||||
bar_data.data["BAC"]["Rsi14"].vbt.plot().show()
|
||||
print(bar_data["Rsi14"])
|
||||
|
||||
|
||||
#ohlcv plot (sublot 2x1)
|
||||
bar_data.data["BAC"].vbt.ohlcv.plot().show()
|
||||
|
||||
#create two subplots 3x1 (ohlcv + RSI)
|
||||
# fig = vbt.make_subplots(rows=3, cols=1)
|
||||
# bar_data.data["BAC"].vbt.ohlcv.plot(add_trace_kwargs=dict(row=1, col=1),fig=fig)
|
||||
# bar_data.data["BAC"]["Rsi14"].vbt.plot(add_trace_kwargs=dict(row=3, col=1),fig=fig)
|
||||
# fig.show()
|
||||
|
||||
#create subplots with alternate Y axis - RSI overlay
|
||||
fig1 = vbt.make_subplots(specs=[[{"secondary_y": True}]])
|
||||
bar_data.data["BAC"]["Close"].vbt.plot(add_trace_kwargs=dict(secondary_y=False),fig=fig1)
|
||||
bar_data.data["BAC"].vbt.plot(add_trace_kwargs=dict(secondary_y=True),fig=fig1)
|
||||
fig1.show()
|
||||
|
||||
puv_df = bar_data.data["BAC"]
|
||||
|
||||
bar_data23s = bar_data[["Open", "High", "Low", "Close", "Volume"]]
|
||||
print(bar_data23s)
|
||||
#resample by vbt
|
||||
bar_data46s = bar_data23s.get().resample("46s").agg({
|
||||
"Open": "first",
|
||||
"High": "max",
|
||||
"Low": "min",
|
||||
"Close": "last",
|
||||
"Volume": "sum"
|
||||
})
|
||||
|
||||
print(bar_data46s)
|
||||
res_data = bar_data46s.data["BAC"]
|
||||
#bar_data23s.data["BAC"].ptable()
|
||||
#bar_data23s = bar_data.resample("23S")
|
||||
print(bar_data46s)
|
||||
print(bar_data46s.close)
|
||||
vbt.settings.plotting.auto_rangebreaks = True
|
||||
bar_data46s.data["BAC"].vbt.ohlcv.plot().show()
|
||||
|
||||
#TARGET DAYS - only one day or range
|
||||
# Target Date
|
||||
#target_date = pd.to_datetime('2023-10-12', tz='US/Eastern')
|
||||
|
||||
# Date Range
|
||||
start_date = pd.to_datetime('2024-03-12')
|
||||
#end_date = pd.to_datetime('2023-10-14')
|
||||
|
||||
new_data = bar_data.transform(lambda df: df[df.index.date == start_date.date()])
|
||||
#range filtered_data = data[(data.index >= start_date) & (data.index <= end_date)
|
||||
|
||||
print(new_data)
|
||||
new_data.data["BAC"].vbt.ohlcv.plot().show()
|
||||
|
||||
|
||||
# Filtering RANGE or DAY
|
||||
# filtered_data = data[(data.index >= start_date) & (data.index <= end_date)]g
|
||||
# filtered_data = data[data.index.date == target_date.date()]
|
||||
|
||||
|
||||
|
||||
#custom aggregagation
|
||||
# ohlcv_agg = pd.DataFrame({
|
||||
# 'Open': df.resample('1T')['Open'].first(),
|
||||
# 'High': df.resample('1T')['High'].max(),
|
||||
# 'Low': df.resample('1T')['Low'].min(),
|
||||
# 'Close': df.resample('1T')['Close'].last(),
|
||||
# 'Volume': df.resample('1T')['Volume'].sum()
|
||||
# })
|
||||
|
||||
#Define a custom frequency with a timedelta of 23 seconds
|
||||
# custom_frequency = pd.tseries.offsets.DateOffset(seconds=23)
|
||||
|
||||
# # Create a new DataFrame with the desired frequency
|
||||
# new_index = pd.date_range(start=df.index[0], end=df.index[-1], freq=custom_frequency)
|
||||
# new_df = pd.DataFrame(index=new_index)
|
||||
|
||||
# # Reindex the DataFrame
|
||||
# df = df.reindex(new_df.index)
|
||||
|
||||
# # Now you can check the frequency of the index
|
||||
# print(df.index.freq)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user