prvni draft LSTM

This commit is contained in:
David Brazda
2023-09-26 15:52:33 +02:00
parent b2365cc318
commit 940348412f
11 changed files with 827 additions and 15 deletions

79
testy/ml/test.py Normal file
View File

@ -0,0 +1,79 @@
import numpy as np
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
from v2realbot.controller.services import get_archived_runner_details_byID
from v2realbot.common.model import RunArchiveDetail
import json
runner_id = "838e918e-9be0-4251-a968-c13c83f3f173"
result = None
res, set = get_archived_runner_details_byID(runner_id)
if res == 0:
print("ok")
else:
print("error",res,set)
bars = set["bars"]
indicators = set["indicators"]
#print("bars",bars)
#print("indicators",indicators)
def scale_and_transform_data(bars, indicators):
"""Scales and transforms the `bars` and `indicators` dictionaries to use in an RNN time series prediction model.
Args:
bars: A dictionary containing OHLCV values and a timestamp.
indicators: A dictionary containing additional indicators and a timestamp.
Returns:
A tuple containing the scaled and transformed training data, validation data, and test data.
"""
# Combine the two dictionaries
#combined_data = {**bars, **indicators}
bar_data = np.column_stack((bars["time"], bars['high'], bars['low'], bars['volume'], bars['close'], bars['open']))
# Scale the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(bar_data)
# Create sequences of data
sequences = []
for i in range(len(scaled_data) - 100):
sequence = scaled_data[i:i + 100]
sequences.append(sequence)
# Split the data into training, validation, and test sets
train_sequences = sequences[:int(len(sequences) * 0.8)]
val_sequences = sequences[int(len(sequences) * 0.8):int(len(sequences) * 0.9)]
test_sequences = sequences[int(len(sequences) * 0.9):]
return train_sequences, val_sequences, test_sequences
#Scale and transform the data
train_sequences, val_sequences, test_sequences = scale_and_transform_data(bars, indicators)
# Convert the training sequences to a NumPy array
# Convert the training sequences array to a NumPy array
train_sequences_array = np.asarray(train_sequences)
# Reshape the training sequences to the correct format
train_sequences_array = np.reshape(train_sequences_array, (train_sequences_array.shape[0], train_sequences_array.shape[1], 1))
# Define the RNN model
model = Sequential()
model.add(LSTM(128, input_shape=(train_sequences_array.shape[1], train_sequences_array.shape[2])))
model.add(Dense(1))
# Compile the model
model.compile(loss='mse', optimizer='adam')
# Train the model on the sequence data
model.fit(train_sequences, train_sequences, epochs=100)
# Make a prediction for the next data point
prediction = model.predict(test_sequences[-1:])
# Print the prediction
print(prediction)

81
testy/ml/test2.py Normal file
View File

@ -0,0 +1,81 @@
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense
import matplotlib.pyplot as plt
from v2realbot.controller.services import get_archived_runner_details_byID
from v2realbot.common.model import RunArchiveDetail
# Sample data (replace this with your actual OHLCV data)
bars = {
'time': [1, 2, 3, 4, 5],
'high': [10, 11, 12, 13, 14],
'low': [8, 9, 7, 6, 8],
'volume': [1000, 1200, 900, 1100, 1300],
'close': [9, 10, 11, 12, 13],
'open': [9, 10, 8, 8, 8],
'resolution': [1, 1, 1, 1, 1]
}
indicators = {
'time': [1, 2, 3, 4, 5],
'fastslope': [90, 95, 100, 110, 115],
'ema': [1000, 1200, 900, 1100, 1300]
}
# Features and target
ohlc_features = ['high', 'low', 'volume', 'open', 'close']
indicator_features = ['fastslope']
target = 'close'
# Prepare the data for bars and indicators
bar_data = np.column_stack([bars[feature] for feature in ohlc_features])
indicator_data = np.column_stack([indicators[feature] for feature in indicator_features])
combined_data = np.column_stack([bar_data, indicator_data])
target_data = np.column_stack([bars[target]])
print(f"{combined_data=}")
print(f"{target_data=}")
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(combined_data, target_data, test_size=0.25, random_state=42)
# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
y_train = scaler.fit_transform(y_train)
# Reshape the input data for LSTM to have an additional dimension for the number of time steps
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
# Define the input shape of the LSTM layer dynamically based on the reshaped X_train value
input_shape = (X_train.shape[1], X_train.shape[2])
# Build the LSTM model
model = Sequential()
model.add(LSTM(128, input_shape=input_shape))
model.add(Dense(1))
# Compile the model
model.compile(loss='mse', optimizer='adam')
# Train the model
model.fit(X_train, y_train, epochs=500)
# Evaluate the model on the test set
# Reshape the test data for same structure as it was trained on
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))
y_pred = model.predict(X_test)
y_pred = scaler.inverse_transform(y_pred)
mse = mean_squared_error(y_test, y_pred)
print('Test MSE:', mse)
# Plot the predicted vs. actual close prices
plt.plot(y_test, label='Actual')
plt.plot(y_pred, label='Predicted')
plt.legend()
plt.show()

View File

@ -0,0 +1,208 @@
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense
import matplotlib.pyplot as plt
from v2realbot.controller.services import get_archived_runner_details_byID
from v2realbot.common.model import RunArchiveDetail
from v2realbot.config import DATA_DIR
from v2realbot.utils.utils import slice_dict_lists
from collections import defaultdict
from operator import itemgetter
from joblib import dump, load
#ZAKLAD PRO TRAINING SCRIPT na vytvareni model
# TODO (v budoucnu predelat do GUI)
#jednotlive funkcni bloky dat do modulů
#pridat natrenovani z listu runnerů (případně dodělat do RUNu a ty runnery si spustit nejdřív)
#TODO
#binary target
#random search a grid search
#TODO
#udelat to same jen na trend, pres binary target, sigmoid a crossentropy
#napr. pokud nasledujici 3 bary rostou (0-1)
def create_sequences(combined_data, target_data, seq, target_steps):
"""Creates sequences of given length seq and target N steps in the future.
Args:
combined_data: A list of combined data.
target_data: A list of target data.
seq: The sequence length.
target_steps: The number of steps in the future to target.
Returns:
A list of X sequences and a list of y sequences.
"""
X_train = []
y_train = []
for i in range(len(combined_data) - seq - target_steps):
X_train.append(combined_data[i:i + seq])
y_train.append(target_data[i + seq + target_steps])
return X_train, y_train
# Sample data (replace this with your actual OHLCV data)
bars = {
'time': [1, 2, 3, 4, 5,6,7,8,9,10,11,12,13,14,15],
'high': [10, 11, 12, 13, 14,10, 11, 12, 13, 14,10, 11, 12, 13, 14],
'low': [8, 9, 7, 6, 8,8, 9, 7, 6, 8,8, 9, 7, 6, 8],
'volume': [1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300],
'close': [9, 10, 11, 12, 13,9, 10, 11, 12, 13,9, 10, 11, 12, 13],
'open': [9, 10, 8, 8, 8,9, 10, 8, 8, 8,9, 10, 8, 8, 8],
'resolution': [1, 1, 1, 1, 1,1, 1, 1, 1, 1,1, 1, 1, 1, 1]
}
indicators = {
'time': [1, 2, 3, 4, 5,6,7,8,9,10,11,12,13,14,15],
'fastslope': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115],
'fsdelta': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115],
'fastslope2': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115],
'ema': [1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300]
}
#LOADING
runner_id = "838e918e-9be0-4251-a968-c13c83f3f173"
result = None
res, sada = get_archived_runner_details_byID(runner_id)
if res == 0:
print("ok")
else:
print("error",res,sada)
bars = sada["bars"]
indicators = sada["indicators"][0]
# Zakladni nastaveni
testlist_id = ""
ohlc_features = ['time','high', 'low', 'volume', 'open', 'close', 'trades', 'vwap']
indicator_features = ['samebarslope', 'fastslope','fsdelta', 'fastslope2', 'fsdelta2']
features = ["time","high","low","volume","open","close", "trades", "vwap","samebarslope", "fastslope","fsdelta", "fastslope2", "fsdelta2"]
#TODO toto je linearni prediction mod, dodelat podporu BINARY
#u binary bude target bud hotovy indikator a nebo jej vytvorit on the fly
target = 'vwap'
#predict how many bars in the future
target_steps = 5
name = "model1"
seq = 10
epochs = 500
features.sort()
# Prepare the data for bars and indicators
bar_data = np.column_stack([bars[feature] for feature in features if feature in bars])
indicator_data = np.column_stack([indicators[feature] for feature in features if feature in indicators])
combined_data = np.column_stack([bar_data, indicator_data])
###print(combined_data)
target_data = np.column_stack([bars[target]])
#print(target_data)
#for LSTM scaling before sequencing
# Standardize the data
scalerX = StandardScaler()
scalerY = StandardScaler()
combined_data = scalerX.fit_transform(combined_data)
target_data = scalerY.fit_transform(target_data)
# Create a sequence of seq elements and define target prediction horizona
X_train, y_train = create_sequences(combined_data, target_data, seq=seq, target_steps=target_steps)
#print("X_train", X_train)
#print("y_train", y_train)
X_complete = np.array(X_train.copy())
Y_complete = np.array(y_train.copy())
X_train = np.array(X_train)
y_train = np.array(y_train)
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.20, shuffle=False) #random_state=42)
# Define the input shape of the LSTM layer dynamically based on the reshaped X_train value
input_shape = (X_train.shape[1], X_train.shape[2])
# Build the LSTM model
model = Sequential()
model.add(LSTM(128, input_shape=input_shape))
model.add(Dense(1))
# Compile the model
model.compile(loss='mse', optimizer='adam')
# Train the model
model.fit(X_train, y_train, epochs=epochs)
#save the model
#model.save(DATA_DIR+'/my_model.keras')
#model = load_model(DATA_DIR+'/my_model.keras')
dump(scalerX, DATA_DIR+'/'+name+'scalerX.pkl')
dump(scalerY, DATA_DIR+'/'+name+'scalerY.pkl')
dump(model, DATA_DIR+'/'+name+'.pkl')
model = load(DATA_DIR+'/'+ name +'.pkl')
scalerX: StandardScaler = load(DATA_DIR+'/'+ name +'scalerX.pkl')
scalerY: StandardScaler = load(DATA_DIR+'/'+ name +'scalerY.pkl')
#LIVE PREDICTION - IMAGINE THIS HAPPENS LIVE
# Get the live data
# Prepare the data for bars and indicators
#asume ohlc_features and indicator_features remain the same
#get last 5 items of respective indicators
lastNbars = slice_dict_lists(bars, seq)
lastNindicators = slice_dict_lists(indicators, seq)
print("last5bars", lastNbars)
print("last5indicators",lastNindicators)
bar_data = np.column_stack([lastNbars[feature] for feature in features if feature in lastNbars])
indicator_data = np.column_stack([lastNindicators[feature] for feature in features if feature in lastNindicators])
combined_live_data = np.column_stack([bar_data, indicator_data])
print("combined_live_data",combined_live_data)
combined_live_data = scalerX.transform(combined_live_data)
#scaler = StandardScaler()
combined_live_data = np.array(combined_live_data)
#converts to 3D array
# 1 number of samples in the array.
# 2 represents the sequence length.
# 3 represents the number of features in the data.
combined_live_data = combined_live_data.reshape((1, seq, combined_live_data.shape[1]))
# Make a prediction
prediction = model(combined_live_data, training=False)
#prediction = prediction.reshape((1, 1))
# Convert the prediction back to the original scale
prediction = scalerY.inverse_transform(prediction)
print("prediction for last value", float(prediction))
#TEST PREDICATIONS
# Evaluate the model on the test set
#pozor testovaci sadu na produkc scalovat samostatne
#X_test = scalerX.transform(X_test)
#predikce nad testovacimi daty
X_complete = model.predict(X_complete)
X_complete = scalerY.inverse_transform(X_complete)
#target testovacim dat
Y_complete = scalerY.inverse_transform(Y_complete)
#mse = mean_squared_error(y_test, y_pred)
#print('Test MSE:', mse)
# Plot the predicted vs. actual close prices
plt.plot(Y_complete, label='Actual')
plt.plot(X_complete, label='Predicted')
plt.legend()
plt.show()
# To make a prediction, we can simply feed the model a sequence of 5 elements and it will predict the next element. For example, to predict the close price for the 6th time period, we would feed the model the following sequence:
# sequence = combined_data[0:5]
# prediction = model.predict(sequence)

View File

@ -6,23 +6,31 @@ from v2realbot.enums.enums import RecordType, StartBarAlign, Mode, Account, Orde
from v2realbot.indicators.indicators import ema, natr, roc
from v2realbot.indicators.oscillators import rsi
from v2realbot.common.PrescribedTradeModel import Trade, TradeDirection, TradeStatus, TradeStoplossType
from v2realbot.utils.utils import ltp, isrising, isfalling,trunc,AttributeDict, zoneNY, price2dec, print, safe_get, round2five, is_open_rush, is_close_rush, is_still, is_window_open, eval_cond_dict, crossed_down, crossed_up, crossed, is_pivot, json_serial, pct_diff, create_new_bars
from v2realbot.utils.utils import ltp, isrising, isfalling,trunc,AttributeDict, zoneNY, price2dec, print, safe_get, round2five, is_open_rush, is_close_rush, is_still, is_window_open, eval_cond_dict, crossed_down, crossed_up, crossed, is_pivot, json_serial, pct_diff, create_new_bars, slice_dict_lists
from v2realbot.utils.directive_utils import get_conditions_from_configuration
from v2realbot.common.model import SLHistory
from datetime import datetime, timedelta
from v2realbot.config import KW
from v2realbot.config import KW, DATA_DIR
from uuid import uuid4
#import random
import json
import numpy as np
#from icecream import install, ic
#from rich import print
from rich import print as printanyway
from threading import Event
from msgpack import packb, unpackb
import asyncio
import os
from traceback import format_exc
from collections import defaultdict
from joblib import load
#WIP - pripadne presunout do jineho modulu
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense
print(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
""""
@ -135,7 +143,7 @@ def next(data, state: StrategyState):
#funkce vytvori podminky (bud pro AND/OR) z pracovniho dict
def evaluate_directive_conditions_old(work_dict, cond_type):
#used for nots, reverse condition for not_ keywords
#used for reversing "not" kw conditions
def rev(kw, condition):
if directive.endswith(kw):
return not condition
@ -322,6 +330,10 @@ def next(data, state: StrategyState):
#if MA is required
MA_length = safe_get(options, "MA_length", None)
active = safe_get(options, 'active', True)
if not active:
return
def is_time_to_run():
# on_confirmed_only = true (def. False)
# start_at_bar_index = 2 (def. None)
@ -524,6 +536,66 @@ def next(data, state: StrategyState):
val = pct_diff(num1=float(source1_series[-1]),num2=float(source2_series[-1]))
return 0, val
#model - naloadovana instance modelu
#seq - sekvence pro vstup
#TODO optimaliziovat, pripadne dat do samostatneho modulu
def get_model_prediction(model: Sequential, scalerX: StandardScaler, scalerY: StandardScaler, features, seq, use_bars):
lastNbars = slice_dict_lists(state.bars, seq, True)
lastNindicators = slice_dict_lists(state.indicators, seq, False)
indicator_data = np.column_stack([lastNindicators[feature] for feature in features if feature in lastNindicators])
if use_bars:
bar_data = np.column_stack([lastNbars[feature] for feature in features if feature in lastNbars])
combined_live_data = np.column_stack([bar_data, indicator_data])
else:
combined_live_data = indicator_data
combined_live_data = scalerX.transform(combined_live_data)
combined_live_data = np.array(combined_live_data)
#converts to 3D array
# 1 number of samples in the array.
# 2 represents the sequence length.
# 3 represents the number of features in the data.
combined_live_data = combined_live_data.reshape((1, seq, combined_live_data.shape[1]))
#prediction = model.predict(combined_live_data, verbose=0)
prediction = model(combined_live_data, training=False)
# Convert the prediction back to the original scale
return float(scalerY.inverse_transform(prediction))
def model(params):
funcName = "model"
if params is None:
return -2, "params required"
name = safe_get(params, "name", None)
seq = safe_get(params, "seq", None)
use_bars = safe_get(params, "use_bars", True)
if seq is not None and len(state.bars["close"])< seq:
return 0, 0
#return -2, f"too soon - not enough data for seq {seq=}"
features = safe_get(params, "features", None)
if name is None or features is None:
return -2, "name/features required"
#zajistime poradi - jako v modelu (tbd presunout do sdileneho objektu s treningem)
features.sort()
#cas na prvnim miste
if "time" in features:
features.remove("time")
features.insert(0, "time")
if not name in state.vars.loaded_models:
return -2, "model not loaded"
if not name in state.vars.loaded_scalersX and not name in state.vars.loaded_scalersY:
return -2, "scaler X or Y not loaded"
try:
return 0, get_model_prediction(state.vars.loaded_models[name],state.vars.loaded_scalersX[name],state.vars.loaded_scalersY[name],features,seq, use_bars)
except Exception as e:
printanyway(str(e)+format_exc())
return -2, str(e)+format_exc()
#indicator allowing to be based on any bar parameter (index, high,open,close,trades,volume, etc.)
def barparams(params):
funcName = "barparams"
@ -2096,7 +2168,15 @@ def init(state: StrategyState):
#pro typ custom inicializujeme promenne
state.vars.indicators[indname]["last_run_time"] = None
state.vars.indicators[indname]["last_run_index"] = None
if option == "subtype":
if value == "model":
#load the model
modelname = safe_get(indsettings["cp"], 'name', None)
if modelname is not None:
state.vars.loaded_models[modelname] = load(DATA_DIR+'/'+ modelname +'.pkl')
state.vars.loaded_scalersX[modelname] = load(DATA_DIR+'/'+ modelname +'scalerX.pkl')
state.vars.loaded_scalersY[modelname] = load(DATA_DIR+'/'+ modelname +'scalerY.pkl')
printanyway(f"model {modelname} and scalers loaded")
#TODO hlavne tedy do INITu dat exit dict, ty jsou evaluovane kazdy tick
def intialize_directive_conditions():
@ -2207,7 +2287,10 @@ def init(state: StrategyState):
state.vars.limitka_price=0
state.vars.jevylozeno=0
state.vars.blockbuy = 0
#models
state.vars.loaded_models = {}
state.vars.loaded_scalersX = {}
state.vars.loaded_scalersY = {}
#state.cbar_indicators['ivwap'] = []
state.cbar_indicators['tick_price'] = []
state.cbar_indicators['tick_volume'] = []

331
v2realbot/LSTMtrain.py Normal file
View File

@ -0,0 +1,331 @@
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense
import matplotlib.pyplot as plt
from v2realbot.controller.services import get_archived_runner_details_byID
from v2realbot.common.model import RunArchiveDetail
from v2realbot.config import DATA_DIR
from v2realbot.utils.utils import slice_dict_lists
from collections import defaultdict
from operator import itemgetter
from joblib import dump, load
#ZAKLAD PRO TRAINING SCRIPT na vytvareni model
# TODO
# podpora pro BINARY TARGET
# podpora hyperpamaetru (activ.funkce sigmoid atp.)
# udelat vsechny config vars do cfg objektu
# dopracovat identifikatory typu lastday close, todays open atp.
# random SEARCG a grid search
# udelat nejaka model metadata (napr, trenovano na (runners+obdobi), nastaveni treningovych dat, počet epoch, hyperparametry, config atribu atp.) - mozna persistovat v db
# udelat nejake verzovani
# predelat do GUI a modulu
# prepare data do importovane funkce, aby bylo mozno pouzit v predict casti ve strategii a nemuselo se porad udrzovat
#s nastavenim modelu. To stejne i s nastavenim upravy features
#TODO NAPADY Na modely
#binary identifikace trendu napr. pokud nasledujici 3 bary rostou (0-1)
#soustredit se na modely s vystupem 0-1 nebo -1 až 1
# Sample data (replace this with your actual OHLCV data)
bars = {
'time': [1, 2, 3, 4, 5,6,7,8,9,10,11,12,13,14,15],
'high': [10, 11, 12, 13, 14,10, 11, 12, 13, 14,10, 11, 12, 13, 14],
'low': [8, 9, 7, 6, 8,8, 9, 7, 6, 8,8, 9, 7, 6, 8],
'volume': [1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300],
'close': [9, 10, 11, 12, 13,9, 10, 11, 12, 13,9, 10, 11, 12, 13],
'open': [9, 10, 8, 8, 8,9, 10, 8, 8, 8,9, 10, 8, 8, 8],
'resolution': [1, 1, 1, 1, 1,1, 1, 1, 1, 1,1, 1, 1, 1, 1]
}
indicators = {
'time': [1, 2, 3, 4, 5,6,7,8,9,10,11,12,13,14,15],
'fastslope': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115],
'fsdelta': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115],
'fastslope2': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115],
'ema': [1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300]
}
# Zakladni nastaveni
testlist_id = ""
runner_ids = ["838e918e-9be0-4251-a968-c13c83f3f173","c11c5cae-05f8-4b0a-aa4d-525ddac81684"]
features = ["time","high","low","volume","open","close", "trades", "vwap","samebarslope", "fastslope","fsdelta", "fastslope2", "fsdelta2"]
#TODO toto je linearni prediction mod, dodelat podporu BINARY
#u binary bude target bud hotovy indikator a nebo jej vytvorit on the fly
#model muze byt take bez barů, tzn. jen indikatory
use_bars = True
target = 'fastslope2'
#predict how many bars in the future
target_steps = 5
name = "model1"
seq = 10
epochs = 200
#crossday identifier je time (hodnota resolution je pouzita ne odstraneni sekvenci skrz dny)
#predpoklad pouziti je crossday_sequence je time ve features
resolution = 1
crossday_sequence = False
#zda se model uci i crosseday (skrz runner/day data). Pokud ne, pak se crossday sekvence odstrani
#realizovano pomoci pomocneho identifikatoru (runner)
#zajistime poradi
features.sort()
#cas na prvnim miste
if "time" in features:
features.remove("time")
features.insert(0, "time")
def merge_dicts(dict_list):
# Initialize an empty merged dictionary
merged_dict = {}
# Iterate through the dictionaries in the list
for i,d in enumerate(dict_list):
for key, value in d.items():
if key in merged_dict:
merged_dict[key] += value
else:
merged_dict[key] = value
#vlozime element s idenitfikaci runnera
return merged_dict
# # Initialize the merged dictionary with the first dictionary in the list
# merged_dict = dict_list[0].copy()
# merged_dict["index"] = []
# # Iterate through the remaining dictionaries and concatenate their lists
# for i, d in enumerate(dict_list[1:]):
# merged_dict["index"] =
# for key, value in d.items():
# if key in merged_dict:
# merged_dict[key] += value
# else:
# merged_dict[key] = value
# return merged_dict
def load_runner(runner_id):
res, sada = get_archived_runner_details_byID(runner_id)
if res == 0:
print("ok")
else:
print("error",res,sada)
bars = sada["bars"]
indicators = sada["indicators"][0]
return bars, indicators
def prepare_data(bars, indicators, features, target) -> tuple[np.array, np.array]:
#create SOURCE DATA with features
# bars and indicators dictionary and features as input
indicator_data = np.column_stack([indicators[feature] for feature in features if feature in indicators])
if len(bars)>0:
bar_data = np.column_stack([bars[feature] for feature in features if feature in bars])
combined_day_data = np.column_stack([bar_data,indicator_data])
else:
combined_day_data = indicator_data
#create TARGET DATA
try:
target_base = bars[target]
except KeyError:
target_base = indicators[target]
target_day_data = np.column_stack([target_base])
return combined_day_data, target_day_data
def load_runners_as_list(runner_ids: list, use_bars: bool):
"""Loads all runners data (bars, indicators) for runner_ids into list of dicts-
Args:
runner_ids: list of runner_ids.
use_bars: Whether to use also bars or just indicators
Returns:
tuple (barslist, indicatorslist) - lists with dictionaries for each runner
"""
barslist = []
indicatorslist = []
for runner_id in runner_ids:
bars, indicators = load_runner(runner_id)
if use_bars:
barslist.append(bars)
indicatorslist.append(indicators)
return barslist, indicatorslist
def create_sequences(combined_data, target_data, seq, target_steps, crossday_sequence = True):
"""Creates sequences of given length seq and target N steps in the future.
Args:
combined_data: A list of combined data.
target_data: A list of target data.
seq: The sequence length.
target_steps: The number of steps in the future to target.
crossday_sequence: Zda vytvaret sekvenci i skrz dny (runnery)
Returns:
A list of X sequences and a list of y sequences.
"""
X_train = []
y_train = []
last_delta = None
for i in range(len(combined_data) - seq - target_steps):
if last_delta is None:
last_delta = 2*(combined_data[i + seq + target_steps, 0] - combined_data[i, 0])
curr_delta = combined_data[i + seq + target_steps, 0] - combined_data[i, 0]
#pokud je cas konce sequence vyrazne vetsi (2x) nez predchozi
#print(f"standardní zacatek {combined_data[i, 0]} konec {combined_data[i + seq + target_steps, 0]} delta: {curr_delta}")
if crossday_sequence is False and curr_delta > last_delta:
print(f"sekvence vyrazena. Zacatek {combined_data[i, 0]} konec {combined_data[i + seq + target_steps, 0]}")
continue
X_train.append(combined_data[i:i + seq])
y_train.append(target_data[i + seq + target_steps])
last_delta = 2*(combined_data[i + seq + target_steps, 0] - combined_data[i, 0])
return np.array(X_train), np.array(y_train)
barslist, indicatorslist = load_runners_as_list(runner_ids, use_bars)
#zmergujeme vsechny data dohromady
bars = merge_dicts(barslist)
indicators = merge_dicts(indicatorslist)
print(f"{len(indicators)}")
print(f"{len(bars)}")
source_data, target_data = prepare_data(bars, indicators, features, target)
# Set the printing threshold to print only the first and last 10 rows of the array
np.set_printoptions(threshold=10)
print("source_data", source_data, "shape", np.shape(source_data))
# Standardize the data
scalerX = StandardScaler()
scalerY = StandardScaler()
#FIT SCALER také fixuje počet FEATURES !!
source_data = scalerX.fit_transform(source_data)
target_data = scalerY.fit_transform(target_data)
#print("source_data shape",np.shape(source_data))
# Create a sequence of seq elements and define target prediction horizona
X_train, y_train = create_sequences(source_data, target_data, seq=seq, target_steps=target_steps, crossday_sequence=crossday_sequence)
#X_train (6205, 10, 14)
print("X_train", np.shape(X_train))
X_complete = np.array(X_train.copy())
Y_complete = np.array(y_train.copy())
X_train = np.array(X_train)
y_train = np.array(y_train)
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.20, shuffle=False) #random_state=42)
#print(np.shape(X_train))
# Define the input shape of the LSTM layer dynamically based on the reshaped X_train value
input_shape = (X_train.shape[1], X_train.shape[2])
# Build the LSTM model
model = Sequential()
model.add(LSTM(128, input_shape=input_shape))
model.add(Dense(1))
# Compile the model
model.compile(loss='mse', optimizer='adam')
# Train the model
model.fit(X_train, y_train, epochs=epochs)
#save the model
#model.save(DATA_DIR+'/my_model.keras')
#model = load_model(DATA_DIR+'/my_model.keras')
dump(scalerX, DATA_DIR+'/'+name+'scalerX.pkl')
dump(scalerY, DATA_DIR+'/'+name+'scalerY.pkl')
dump(model, DATA_DIR+'/'+name+'.pkl')
model = load(DATA_DIR+'/'+ name +'.pkl')
scalerX: StandardScaler = load(DATA_DIR+'/'+ name +'scalerX.pkl')
scalerY: StandardScaler = load(DATA_DIR+'/'+ name +'scalerY.pkl')
#LIVE PREDICTION - IMAGINE THIS HAPPENS LIVE
# Get the live data
# Prepare the data for bars and indicators
#asume ohlc_features and indicator_features remain the same
#get last 5 items of respective indicators
#mazeme runner indikator pokud tu je
if "runner" in indicators:
del indicators["runner"]
print("runner key deleted from indicators")
if "runner" in features:
features.remove("runner")
print("runner removed from features")
lastNbars = slice_dict_lists(bars, seq)
lastNindicators = slice_dict_lists(indicators, seq)
print("last5bars", lastNbars)
print("last5indicators",lastNindicators)
indicator_data = np.column_stack([lastNindicators[feature] for feature in features if feature in lastNindicators])
if use_bars:
bar_data = np.column_stack([lastNbars[feature] for feature in features if feature in lastNbars])
combined_live_data = np.column_stack([bar_data, indicator_data])
else:
combined_live_data = indicator_data
print("combined_live_data",combined_live_data)
combined_live_data = scalerX.transform(combined_live_data)
#scaler = StandardScaler()
combined_live_data = np.array(combined_live_data)
#converts to 3D array
# 1 number of samples in the array.
# 2 represents the sequence length.
# 3 represents the number of features in the data.
combined_live_data = combined_live_data.reshape((1, seq, combined_live_data.shape[1]))
# Make a prediction
prediction = model(combined_live_data, training=False)
#prediction = prediction.reshape((1, 1))
# Convert the prediction back to the original scale
prediction = scalerY.inverse_transform(prediction)
print("prediction for last value", float(prediction))
#TEST PREDICATIONS
# Evaluate the model on the test set
#pozor testovaci sadu na produkc scalovat samostatne
#X_test = scalerX.transform(X_test)
#predikce nad testovacimi daty
X_complete = model.predict(X_complete)
X_complete = scalerY.inverse_transform(X_complete)
#target testovacim dat
Y_complete = scalerY.inverse_transform(Y_complete)
mse = mean_squared_error(Y_complete, X_complete)
print('Test MSE:', mse)
# Plot the predicted vs. actual close prices
plt.plot(Y_complete, label='Actual')
plt.plot(X_complete, label='Predicted')
plt.legend()
plt.show()
# To make a prediction, we can simply feed the model a sequence of 5 elements and it will predict the next element. For example, to predict the close price for the 6th time period, we would feed the model the following sequence:
# sequence = combined_data[0:5]
# prediction = model.predict(sequence)

View File

@ -668,6 +668,7 @@ def archive_runner(runner: Runner, strat: StrategyInstance, inter_batch_params:
#file pro vyvoj: ouptut_metriky_tradeList.py
results_metrics = populate_metrics_output_directory(strat, inter_batch_params)
runArchive: RunArchive = RunArchive(id = runner.id,
strat_id = runner.strat_id,
name=runner.run_name,
@ -936,7 +937,7 @@ def get_testlists():
testlists = []
for row in rows:
print(row)
#print(row)
testlist = TestList(id=row[0], name=row[1], dates=json.loads(row[2]))
testlists.append(testlist)

View File

@ -1,7 +0,0 @@
word = "buy_if_not_something"
if word.endswith("something") and word[:-len] == "not_":
print("Word meets the condition.")
else:
print("Word does not meet the condition.")

View File

@ -431,6 +431,11 @@ class Strategy:
if self.rtqueue is not None:
self.rtqueue.put("break")
#get rid of attributes that are links to the models
self.state.vars["loaded_models"] = {}
self.state.vars["loaded_scalersX"] = {}
self.state.vars["loaded_scalersY"] = {}
#zavolame na loaderu remove streamer - mohou byt dalsi bezici strategie, ktery loader vyuzivaji
#pripadne udelat shared loader a nebo dedicated loader
#pokud je shared tak volat remove

View File

@ -27,8 +27,39 @@ from collections import deque
import numpy as np
def slice_dict_lists(d, last_item, to_tmstp = False):
"""Slices every list in the dictionary to the last last_item items.
Args:
d: A dictionary.
last_item: The number of items to keep at the end of each list.
to_tmstp: For "time" elements change it to timestamp from datetime if required.
Returns:
A new dictionary with the sliced lists.
"""
sliced_d = {}
for key in d.keys():
if key == "time" and to_tmstp:
sliced_d[key] = [datetime.timestamp(t) for t in d[key][-last_item:]]
else:
sliced_d[key] = d[key][-last_item:]
return sliced_d
# keys_set = set(keys)
# sliced_d = {}
# for key, value in d.items():
# if key in keys_set and isinstance(value, list):
# if key == "time" and to_tmstp:
# sliced_d[key] = [datetime.timestamp(t) for t in value[-last_item:]]
# else:
# sliced_d[key] = value[-last_item:]
# return sliced_d
#WIP
def create_new_bars(bars, new_resolution):
"""Creates new bars dictionary in the new resolution.
"""WIP - Creates new bars dictionary in the new resolution.
Args:
bars: A dictionary representing ohlcv bars.