diff --git a/testy/ml/test.py b/testy/ml/test.py new file mode 100644 index 0000000..332a52c --- /dev/null +++ b/testy/ml/test.py @@ -0,0 +1,79 @@ +import numpy as np +from sklearn.preprocessing import StandardScaler +from keras.models import Sequential +from keras.layers import LSTM, Dense +from v2realbot.controller.services import get_archived_runner_details_byID +from v2realbot.common.model import RunArchiveDetail +import json + +runner_id = "838e918e-9be0-4251-a968-c13c83f3f173" +result = None +res, set = get_archived_runner_details_byID(runner_id) +if res == 0: + print("ok") +else: + print("error",res,set) + +bars = set["bars"] +indicators = set["indicators"] +#print("bars",bars) +#print("indicators",indicators) + +def scale_and_transform_data(bars, indicators): + """Scales and transforms the `bars` and `indicators` dictionaries to use in an RNN time series prediction model. + + Args: + bars: A dictionary containing OHLCV values and a timestamp. + indicators: A dictionary containing additional indicators and a timestamp. + + Returns: + A tuple containing the scaled and transformed training data, validation data, and test data. + """ + + # Combine the two dictionaries + #combined_data = {**bars, **indicators} + bar_data = np.column_stack((bars["time"], bars['high'], bars['low'], bars['volume'], bars['close'], bars['open'])) + + # Scale the data + scaler = StandardScaler() + scaled_data = scaler.fit_transform(bar_data) + + # Create sequences of data + sequences = [] + for i in range(len(scaled_data) - 100): + sequence = scaled_data[i:i + 100] + sequences.append(sequence) + + # Split the data into training, validation, and test sets + train_sequences = sequences[:int(len(sequences) * 0.8)] + val_sequences = sequences[int(len(sequences) * 0.8):int(len(sequences) * 0.9)] + test_sequences = sequences[int(len(sequences) * 0.9):] + + return train_sequences, val_sequences, test_sequences + +#Scale and transform the data +train_sequences, val_sequences, test_sequences = scale_and_transform_data(bars, indicators) +# Convert the training sequences to a NumPy array + +# Convert the training sequences array to a NumPy array +train_sequences_array = np.asarray(train_sequences) + +# Reshape the training sequences to the correct format +train_sequences_array = np.reshape(train_sequences_array, (train_sequences_array.shape[0], train_sequences_array.shape[1], 1)) + +# Define the RNN model +model = Sequential() +model.add(LSTM(128, input_shape=(train_sequences_array.shape[1], train_sequences_array.shape[2]))) +model.add(Dense(1)) + +# Compile the model +model.compile(loss='mse', optimizer='adam') + +# Train the model on the sequence data +model.fit(train_sequences, train_sequences, epochs=100) + +# Make a prediction for the next data point +prediction = model.predict(test_sequences[-1:]) + +# Print the prediction +print(prediction) diff --git a/testy/ml/test2.py b/testy/ml/test2.py new file mode 100644 index 0000000..85528e0 --- /dev/null +++ b/testy/ml/test2.py @@ -0,0 +1,81 @@ +import numpy as np +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split +from keras.models import Sequential +from keras.layers import LSTM, Dense +import matplotlib.pyplot as plt +from v2realbot.controller.services import get_archived_runner_details_byID +from v2realbot.common.model import RunArchiveDetail + +# Sample data (replace this with your actual OHLCV data) +bars = { + 'time': [1, 2, 3, 4, 5], + 'high': [10, 11, 12, 13, 14], + 'low': [8, 9, 7, 6, 8], + 'volume': [1000, 1200, 900, 1100, 1300], + 'close': [9, 10, 11, 12, 13], + 'open': [9, 10, 8, 8, 8], + 'resolution': [1, 1, 1, 1, 1] +} + +indicators = { + 'time': [1, 2, 3, 4, 5], + 'fastslope': [90, 95, 100, 110, 115], + 'ema': [1000, 1200, 900, 1100, 1300] +} + +# Features and target +ohlc_features = ['high', 'low', 'volume', 'open', 'close'] +indicator_features = ['fastslope'] +target = 'close' + +# Prepare the data for bars and indicators +bar_data = np.column_stack([bars[feature] for feature in ohlc_features]) +indicator_data = np.column_stack([indicators[feature] for feature in indicator_features]) +combined_data = np.column_stack([bar_data, indicator_data]) +target_data = np.column_stack([bars[target]]) + + +print(f"{combined_data=}") +print(f"{target_data=}") +# Split the data into training and test sets +X_train, X_test, y_train, y_test = train_test_split(combined_data, target_data, test_size=0.25, random_state=42) + +# Standardize the data +scaler = StandardScaler() +X_train = scaler.fit_transform(X_train) +y_train = scaler.fit_transform(y_train) + +# Reshape the input data for LSTM to have an additional dimension for the number of time steps +X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1])) + +# Define the input shape of the LSTM layer dynamically based on the reshaped X_train value +input_shape = (X_train.shape[1], X_train.shape[2]) + +# Build the LSTM model +model = Sequential() +model.add(LSTM(128, input_shape=input_shape)) +model.add(Dense(1)) + +# Compile the model +model.compile(loss='mse', optimizer='adam') + +# Train the model +model.fit(X_train, y_train, epochs=500) + +# Evaluate the model on the test set + + +# Reshape the test data for same structure as it was trained on +X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1])) +y_pred = model.predict(X_test) +y_pred = scaler.inverse_transform(y_pred) +mse = mean_squared_error(y_test, y_pred) +print('Test MSE:', mse) + +# Plot the predicted vs. actual close prices +plt.plot(y_test, label='Actual') +plt.plot(y_pred, label='Predicted') +plt.legend() +plt.show() \ No newline at end of file diff --git a/testy/ml/test3_LSTMwindow.py b/testy/ml/test3_LSTMwindow.py new file mode 100644 index 0000000..dc824a9 --- /dev/null +++ b/testy/ml/test3_LSTMwindow.py @@ -0,0 +1,208 @@ +import numpy as np +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split +from keras.models import Sequential, load_model +from keras.layers import LSTM, Dense +import matplotlib.pyplot as plt +from v2realbot.controller.services import get_archived_runner_details_byID +from v2realbot.common.model import RunArchiveDetail +from v2realbot.config import DATA_DIR +from v2realbot.utils.utils import slice_dict_lists +from collections import defaultdict +from operator import itemgetter +from joblib import dump, load + + +#ZAKLAD PRO TRAINING SCRIPT na vytvareni model +# TODO (v budoucnu predelat do GUI) +#jednotlive funkcni bloky dat do modulů +#pridat natrenovani z listu runnerů (případně dodělat do RUNu a ty runnery si spustit nejdřív) +#TODO +#binary target +#random search a grid search + +#TODO +#udelat to same jen na trend, pres binary target, sigmoid a crossentropy +#napr. pokud nasledujici 3 bary rostou (0-1) + +def create_sequences(combined_data, target_data, seq, target_steps): + """Creates sequences of given length seq and target N steps in the future. + + Args: + combined_data: A list of combined data. + target_data: A list of target data. + seq: The sequence length. + target_steps: The number of steps in the future to target. + + Returns: + A list of X sequences and a list of y sequences. + """ + + X_train = [] + y_train = [] + for i in range(len(combined_data) - seq - target_steps): + X_train.append(combined_data[i:i + seq]) + y_train.append(target_data[i + seq + target_steps]) + + return X_train, y_train + +# Sample data (replace this with your actual OHLCV data) +bars = { + 'time': [1, 2, 3, 4, 5,6,7,8,9,10,11,12,13,14,15], + 'high': [10, 11, 12, 13, 14,10, 11, 12, 13, 14,10, 11, 12, 13, 14], + 'low': [8, 9, 7, 6, 8,8, 9, 7, 6, 8,8, 9, 7, 6, 8], + 'volume': [1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300], + 'close': [9, 10, 11, 12, 13,9, 10, 11, 12, 13,9, 10, 11, 12, 13], + 'open': [9, 10, 8, 8, 8,9, 10, 8, 8, 8,9, 10, 8, 8, 8], + 'resolution': [1, 1, 1, 1, 1,1, 1, 1, 1, 1,1, 1, 1, 1, 1] +} + +indicators = { + 'time': [1, 2, 3, 4, 5,6,7,8,9,10,11,12,13,14,15], + 'fastslope': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115], + 'fsdelta': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115], + 'fastslope2': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115], + 'ema': [1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300] +} + +#LOADING +runner_id = "838e918e-9be0-4251-a968-c13c83f3f173" +result = None +res, sada = get_archived_runner_details_byID(runner_id) +if res == 0: + print("ok") +else: + print("error",res,sada) + +bars = sada["bars"] +indicators = sada["indicators"][0] + +# Zakladni nastaveni +testlist_id = "" +ohlc_features = ['time','high', 'low', 'volume', 'open', 'close', 'trades', 'vwap'] +indicator_features = ['samebarslope', 'fastslope','fsdelta', 'fastslope2', 'fsdelta2'] + +features = ["time","high","low","volume","open","close", "trades", "vwap","samebarslope", "fastslope","fsdelta", "fastslope2", "fsdelta2"] +#TODO toto je linearni prediction mod, dodelat podporu BINARY +#u binary bude target bud hotovy indikator a nebo jej vytvorit on the fly +target = 'vwap' +#predict how many bars in the future +target_steps = 5 +name = "model1" +seq = 10 +epochs = 500 + +features.sort() +# Prepare the data for bars and indicators +bar_data = np.column_stack([bars[feature] for feature in features if feature in bars]) +indicator_data = np.column_stack([indicators[feature] for feature in features if feature in indicators]) +combined_data = np.column_stack([bar_data, indicator_data]) +###print(combined_data) +target_data = np.column_stack([bars[target]]) +#print(target_data) +#for LSTM scaling before sequencing +# Standardize the data +scalerX = StandardScaler() +scalerY = StandardScaler() +combined_data = scalerX.fit_transform(combined_data) +target_data = scalerY.fit_transform(target_data) + +# Create a sequence of seq elements and define target prediction horizona +X_train, y_train = create_sequences(combined_data, target_data, seq=seq, target_steps=target_steps) + +#print("X_train", X_train) +#print("y_train", y_train) +X_complete = np.array(X_train.copy()) +Y_complete = np.array(y_train.copy()) +X_train = np.array(X_train) +y_train = np.array(y_train) + +# Split the data into training and test sets +X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.20, shuffle=False) #random_state=42) + +# Define the input shape of the LSTM layer dynamically based on the reshaped X_train value +input_shape = (X_train.shape[1], X_train.shape[2]) + +# Build the LSTM model +model = Sequential() +model.add(LSTM(128, input_shape=input_shape)) +model.add(Dense(1)) + +# Compile the model +model.compile(loss='mse', optimizer='adam') + +# Train the model +model.fit(X_train, y_train, epochs=epochs) + +#save the model +#model.save(DATA_DIR+'/my_model.keras') +#model = load_model(DATA_DIR+'/my_model.keras') +dump(scalerX, DATA_DIR+'/'+name+'scalerX.pkl') +dump(scalerY, DATA_DIR+'/'+name+'scalerY.pkl') +dump(model, DATA_DIR+'/'+name+'.pkl') + +model = load(DATA_DIR+'/'+ name +'.pkl') +scalerX: StandardScaler = load(DATA_DIR+'/'+ name +'scalerX.pkl') +scalerY: StandardScaler = load(DATA_DIR+'/'+ name +'scalerY.pkl') + +#LIVE PREDICTION - IMAGINE THIS HAPPENS LIVE +# Get the live data +# Prepare the data for bars and indicators + +#asume ohlc_features and indicator_features remain the same + + +#get last 5 items of respective indicators +lastNbars = slice_dict_lists(bars, seq) +lastNindicators = slice_dict_lists(indicators, seq) +print("last5bars", lastNbars) +print("last5indicators",lastNindicators) + +bar_data = np.column_stack([lastNbars[feature] for feature in features if feature in lastNbars]) +indicator_data = np.column_stack([lastNindicators[feature] for feature in features if feature in lastNindicators]) +combined_live_data = np.column_stack([bar_data, indicator_data]) +print("combined_live_data",combined_live_data) +combined_live_data = scalerX.transform(combined_live_data) +#scaler = StandardScaler() + +combined_live_data = np.array(combined_live_data) + +#converts to 3D array +# 1 number of samples in the array. +# 2 represents the sequence length. +# 3 represents the number of features in the data. +combined_live_data = combined_live_data.reshape((1, seq, combined_live_data.shape[1])) + + +# Make a prediction +prediction = model(combined_live_data, training=False) +#prediction = prediction.reshape((1, 1)) +# Convert the prediction back to the original scale +prediction = scalerY.inverse_transform(prediction) + +print("prediction for last value", float(prediction)) + +#TEST PREDICATIONS +# Evaluate the model on the test set +#pozor testovaci sadu na produkc scalovat samostatne +#X_test = scalerX.transform(X_test) +#predikce nad testovacimi daty +X_complete = model.predict(X_complete) +X_complete = scalerY.inverse_transform(X_complete) + +#target testovacim dat +Y_complete = scalerY.inverse_transform(Y_complete) +#mse = mean_squared_error(y_test, y_pred) +#print('Test MSE:', mse) + +# Plot the predicted vs. actual close prices +plt.plot(Y_complete, label='Actual') +plt.plot(X_complete, label='Predicted') +plt.legend() +plt.show() + +# To make a prediction, we can simply feed the model a sequence of 5 elements and it will predict the next element. For example, to predict the close price for the 6th time period, we would feed the model the following sequence: + +# sequence = combined_data[0:5] +# prediction = model.predict(sequence) diff --git a/v2realbot/ENTRY_ClassicSL_v01.py b/v2realbot/ENTRY_ClassicSL_v01.py index 8644b69..0db1288 100644 --- a/v2realbot/ENTRY_ClassicSL_v01.py +++ b/v2realbot/ENTRY_ClassicSL_v01.py @@ -6,23 +6,31 @@ from v2realbot.enums.enums import RecordType, StartBarAlign, Mode, Account, Orde from v2realbot.indicators.indicators import ema, natr, roc from v2realbot.indicators.oscillators import rsi from v2realbot.common.PrescribedTradeModel import Trade, TradeDirection, TradeStatus, TradeStoplossType -from v2realbot.utils.utils import ltp, isrising, isfalling,trunc,AttributeDict, zoneNY, price2dec, print, safe_get, round2five, is_open_rush, is_close_rush, is_still, is_window_open, eval_cond_dict, crossed_down, crossed_up, crossed, is_pivot, json_serial, pct_diff, create_new_bars +from v2realbot.utils.utils import ltp, isrising, isfalling,trunc,AttributeDict, zoneNY, price2dec, print, safe_get, round2five, is_open_rush, is_close_rush, is_still, is_window_open, eval_cond_dict, crossed_down, crossed_up, crossed, is_pivot, json_serial, pct_diff, create_new_bars, slice_dict_lists from v2realbot.utils.directive_utils import get_conditions_from_configuration from v2realbot.common.model import SLHistory from datetime import datetime, timedelta -from v2realbot.config import KW +from v2realbot.config import KW, DATA_DIR from uuid import uuid4 #import random import json import numpy as np #from icecream import install, ic -#from rich import print +from rich import print as printanyway from threading import Event from msgpack import packb, unpackb import asyncio import os from traceback import format_exc from collections import defaultdict +from joblib import load + +#WIP - pripadne presunout do jineho modulu +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split +from keras.models import Sequential, load_model +from keras.layers import LSTM, Dense print(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) """" @@ -135,7 +143,7 @@ def next(data, state: StrategyState): #funkce vytvori podminky (bud pro AND/OR) z pracovniho dict def evaluate_directive_conditions_old(work_dict, cond_type): - #used for nots, reverse condition for not_ keywords + #used for reversing "not" kw conditions def rev(kw, condition): if directive.endswith(kw): return not condition @@ -322,6 +330,10 @@ def next(data, state: StrategyState): #if MA is required MA_length = safe_get(options, "MA_length", None) + active = safe_get(options, 'active', True) + if not active: + return + def is_time_to_run(): # on_confirmed_only = true (def. False) # start_at_bar_index = 2 (def. None) @@ -524,6 +536,66 @@ def next(data, state: StrategyState): val = pct_diff(num1=float(source1_series[-1]),num2=float(source2_series[-1])) return 0, val + + #model - naloadovana instance modelu + #seq - sekvence pro vstup + #TODO optimaliziovat, pripadne dat do samostatneho modulu + def get_model_prediction(model: Sequential, scalerX: StandardScaler, scalerY: StandardScaler, features, seq, use_bars): + lastNbars = slice_dict_lists(state.bars, seq, True) + lastNindicators = slice_dict_lists(state.indicators, seq, False) + indicator_data = np.column_stack([lastNindicators[feature] for feature in features if feature in lastNindicators]) + if use_bars: + bar_data = np.column_stack([lastNbars[feature] for feature in features if feature in lastNbars]) + combined_live_data = np.column_stack([bar_data, indicator_data]) + else: + combined_live_data = indicator_data + combined_live_data = scalerX.transform(combined_live_data) + combined_live_data = np.array(combined_live_data) + #converts to 3D array + # 1 number of samples in the array. + # 2 represents the sequence length. + # 3 represents the number of features in the data. + combined_live_data = combined_live_data.reshape((1, seq, combined_live_data.shape[1])) + #prediction = model.predict(combined_live_data, verbose=0) + prediction = model(combined_live_data, training=False) + + # Convert the prediction back to the original scale + return float(scalerY.inverse_transform(prediction)) + + def model(params): + funcName = "model" + if params is None: + return -2, "params required" + name = safe_get(params, "name", None) + seq = safe_get(params, "seq", None) + use_bars = safe_get(params, "use_bars", True) + if seq is not None and len(state.bars["close"])< seq: + return 0, 0 + #return -2, f"too soon - not enough data for seq {seq=}" + features = safe_get(params, "features", None) + if name is None or features is None: + return -2, "name/features required" + + #zajistime poradi - jako v modelu (tbd presunout do sdileneho objektu s treningem) + features.sort() + #cas na prvnim miste + if "time" in features: + features.remove("time") + features.insert(0, "time") + + if not name in state.vars.loaded_models: + return -2, "model not loaded" + + if not name in state.vars.loaded_scalersX and not name in state.vars.loaded_scalersY: + return -2, "scaler X or Y not loaded" + + try: + return 0, get_model_prediction(state.vars.loaded_models[name],state.vars.loaded_scalersX[name],state.vars.loaded_scalersY[name],features,seq, use_bars) + except Exception as e: + printanyway(str(e)+format_exc()) + return -2, str(e)+format_exc() + + #indicator allowing to be based on any bar parameter (index, high,open,close,trades,volume, etc.) def barparams(params): funcName = "barparams" @@ -2096,7 +2168,15 @@ def init(state: StrategyState): #pro typ custom inicializujeme promenne state.vars.indicators[indname]["last_run_time"] = None state.vars.indicators[indname]["last_run_index"] = None - + if option == "subtype": + if value == "model": + #load the model + modelname = safe_get(indsettings["cp"], 'name', None) + if modelname is not None: + state.vars.loaded_models[modelname] = load(DATA_DIR+'/'+ modelname +'.pkl') + state.vars.loaded_scalersX[modelname] = load(DATA_DIR+'/'+ modelname +'scalerX.pkl') + state.vars.loaded_scalersY[modelname] = load(DATA_DIR+'/'+ modelname +'scalerY.pkl') + printanyway(f"model {modelname} and scalers loaded") #TODO hlavne tedy do INITu dat exit dict, ty jsou evaluovane kazdy tick def intialize_directive_conditions(): @@ -2207,7 +2287,10 @@ def init(state: StrategyState): state.vars.limitka_price=0 state.vars.jevylozeno=0 state.vars.blockbuy = 0 - + #models + state.vars.loaded_models = {} + state.vars.loaded_scalersX = {} + state.vars.loaded_scalersY = {} #state.cbar_indicators['ivwap'] = [] state.cbar_indicators['tick_price'] = [] state.cbar_indicators['tick_volume'] = [] diff --git a/v2realbot/LSTMtrain.py b/v2realbot/LSTMtrain.py new file mode 100644 index 0000000..6d5ed5e --- /dev/null +++ b/v2realbot/LSTMtrain.py @@ -0,0 +1,331 @@ +import numpy as np +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split +from keras.models import Sequential, load_model +from keras.layers import LSTM, Dense +import matplotlib.pyplot as plt +from v2realbot.controller.services import get_archived_runner_details_byID +from v2realbot.common.model import RunArchiveDetail +from v2realbot.config import DATA_DIR +from v2realbot.utils.utils import slice_dict_lists +from collections import defaultdict +from operator import itemgetter +from joblib import dump, load + + +#ZAKLAD PRO TRAINING SCRIPT na vytvareni model +# TODO +# podpora pro BINARY TARGET +# podpora hyperpamaetru (activ.funkce sigmoid atp.) +# udelat vsechny config vars do cfg objektu +# dopracovat identifikatory typu lastday close, todays open atp. +# random SEARCG a grid search +# udelat nejaka model metadata (napr, trenovano na (runners+obdobi), nastaveni treningovych dat, počet epoch, hyperparametry, config atribu atp.) - mozna persistovat v db +# udelat nejake verzovani +# predelat do GUI a modulu +# prepare data do importovane funkce, aby bylo mozno pouzit v predict casti ve strategii a nemuselo se porad udrzovat +#s nastavenim modelu. To stejne i s nastavenim upravy features + + +#TODO NAPADY Na modely +#binary identifikace trendu napr. pokud nasledujici 3 bary rostou (0-1) +#soustredit se na modely s vystupem 0-1 nebo -1 až 1 + + +# Sample data (replace this with your actual OHLCV data) +bars = { + 'time': [1, 2, 3, 4, 5,6,7,8,9,10,11,12,13,14,15], + 'high': [10, 11, 12, 13, 14,10, 11, 12, 13, 14,10, 11, 12, 13, 14], + 'low': [8, 9, 7, 6, 8,8, 9, 7, 6, 8,8, 9, 7, 6, 8], + 'volume': [1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300], + 'close': [9, 10, 11, 12, 13,9, 10, 11, 12, 13,9, 10, 11, 12, 13], + 'open': [9, 10, 8, 8, 8,9, 10, 8, 8, 8,9, 10, 8, 8, 8], + 'resolution': [1, 1, 1, 1, 1,1, 1, 1, 1, 1,1, 1, 1, 1, 1] +} + +indicators = { + 'time': [1, 2, 3, 4, 5,6,7,8,9,10,11,12,13,14,15], + 'fastslope': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115], + 'fsdelta': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115], + 'fastslope2': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115], + 'ema': [1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300] +} + + +# Zakladni nastaveni +testlist_id = "" +runner_ids = ["838e918e-9be0-4251-a968-c13c83f3f173","c11c5cae-05f8-4b0a-aa4d-525ddac81684"] +features = ["time","high","low","volume","open","close", "trades", "vwap","samebarslope", "fastslope","fsdelta", "fastslope2", "fsdelta2"] +#TODO toto je linearni prediction mod, dodelat podporu BINARY +#u binary bude target bud hotovy indikator a nebo jej vytvorit on the fly + +#model muze byt take bez barů, tzn. jen indikatory +use_bars = True +target = 'fastslope2' +#predict how many bars in the future +target_steps = 5 +name = "model1" +seq = 10 +epochs = 200 + + +#crossday identifier je time (hodnota resolution je pouzita ne odstraneni sekvenci skrz dny) +#predpoklad pouziti je crossday_sequence je time ve features +resolution = 1 +crossday_sequence = False +#zda se model uci i crosseday (skrz runner/day data). Pokud ne, pak se crossday sekvence odstrani +#realizovano pomoci pomocneho identifikatoru (runner) + +#zajistime poradi +features.sort() +#cas na prvnim miste +if "time" in features: + features.remove("time") + features.insert(0, "time") + +def merge_dicts(dict_list): + # Initialize an empty merged dictionary + merged_dict = {} + + # Iterate through the dictionaries in the list + for i,d in enumerate(dict_list): + for key, value in d.items(): + if key in merged_dict: + merged_dict[key] += value + else: + merged_dict[key] = value + #vlozime element s idenitfikaci runnera + + return merged_dict + + # # Initialize the merged dictionary with the first dictionary in the list + # merged_dict = dict_list[0].copy() + # merged_dict["index"] = [] + + # # Iterate through the remaining dictionaries and concatenate their lists + # for i, d in enumerate(dict_list[1:]): + # merged_dict["index"] = + # for key, value in d.items(): + # if key in merged_dict: + # merged_dict[key] += value + # else: + # merged_dict[key] = value + + # return merged_dict + +def load_runner(runner_id): + res, sada = get_archived_runner_details_byID(runner_id) + if res == 0: + print("ok") + else: + print("error",res,sada) + + bars = sada["bars"] + indicators = sada["indicators"][0] + return bars, indicators + +def prepare_data(bars, indicators, features, target) -> tuple[np.array, np.array]: + #create SOURCE DATA with features + # bars and indicators dictionary and features as input + indicator_data = np.column_stack([indicators[feature] for feature in features if feature in indicators]) + if len(bars)>0: + bar_data = np.column_stack([bars[feature] for feature in features if feature in bars]) + combined_day_data = np.column_stack([bar_data,indicator_data]) + else: + combined_day_data = indicator_data + + #create TARGET DATA + try: + target_base = bars[target] + except KeyError: + target_base = indicators[target] + target_day_data = np.column_stack([target_base]) + return combined_day_data, target_day_data + +def load_runners_as_list(runner_ids: list, use_bars: bool): + """Loads all runners data (bars, indicators) for runner_ids into list of dicts- + + Args: + runner_ids: list of runner_ids. + use_bars: Whether to use also bars or just indicators + + Returns: + tuple (barslist, indicatorslist) - lists with dictionaries for each runner + """ + barslist = [] + indicatorslist = [] + for runner_id in runner_ids: + bars, indicators = load_runner(runner_id) + if use_bars: + barslist.append(bars) + indicatorslist.append(indicators) + + return barslist, indicatorslist + +def create_sequences(combined_data, target_data, seq, target_steps, crossday_sequence = True): + """Creates sequences of given length seq and target N steps in the future. + + Args: + combined_data: A list of combined data. + target_data: A list of target data. + seq: The sequence length. + target_steps: The number of steps in the future to target. + crossday_sequence: Zda vytvaret sekvenci i skrz dny (runnery) + + Returns: + A list of X sequences and a list of y sequences. + """ + X_train = [] + y_train = [] + last_delta = None + for i in range(len(combined_data) - seq - target_steps): + if last_delta is None: + last_delta = 2*(combined_data[i + seq + target_steps, 0] - combined_data[i, 0]) + + curr_delta = combined_data[i + seq + target_steps, 0] - combined_data[i, 0] + #pokud je cas konce sequence vyrazne vetsi (2x) nez predchozi + #print(f"standardní zacatek {combined_data[i, 0]} konec {combined_data[i + seq + target_steps, 0]} delta: {curr_delta}") + if crossday_sequence is False and curr_delta > last_delta: + print(f"sekvence vyrazena. Zacatek {combined_data[i, 0]} konec {combined_data[i + seq + target_steps, 0]}") + continue + X_train.append(combined_data[i:i + seq]) + y_train.append(target_data[i + seq + target_steps]) + last_delta = 2*(combined_data[i + seq + target_steps, 0] - combined_data[i, 0]) + return np.array(X_train), np.array(y_train) + +barslist, indicatorslist = load_runners_as_list(runner_ids, use_bars) + +#zmergujeme vsechny data dohromady +bars = merge_dicts(barslist) +indicators = merge_dicts(indicatorslist) +print(f"{len(indicators)}") +print(f"{len(bars)}") +source_data, target_data = prepare_data(bars, indicators, features, target) + +# Set the printing threshold to print only the first and last 10 rows of the array +np.set_printoptions(threshold=10) +print("source_data", source_data, "shape", np.shape(source_data)) + +# Standardize the data +scalerX = StandardScaler() +scalerY = StandardScaler() +#FIT SCALER také fixuje počet FEATURES !! +source_data = scalerX.fit_transform(source_data) +target_data = scalerY.fit_transform(target_data) + +#print("source_data shape",np.shape(source_data)) + +# Create a sequence of seq elements and define target prediction horizona +X_train, y_train = create_sequences(source_data, target_data, seq=seq, target_steps=target_steps, crossday_sequence=crossday_sequence) + +#X_train (6205, 10, 14) +print("X_train", np.shape(X_train)) + +X_complete = np.array(X_train.copy()) +Y_complete = np.array(y_train.copy()) +X_train = np.array(X_train) +y_train = np.array(y_train) + +# Split the data into training and test sets +X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.20, shuffle=False) #random_state=42) + +#print(np.shape(X_train)) +# Define the input shape of the LSTM layer dynamically based on the reshaped X_train value +input_shape = (X_train.shape[1], X_train.shape[2]) + +# Build the LSTM model +model = Sequential() +model.add(LSTM(128, input_shape=input_shape)) +model.add(Dense(1)) + +# Compile the model +model.compile(loss='mse', optimizer='adam') + +# Train the model +model.fit(X_train, y_train, epochs=epochs) + +#save the model +#model.save(DATA_DIR+'/my_model.keras') +#model = load_model(DATA_DIR+'/my_model.keras') +dump(scalerX, DATA_DIR+'/'+name+'scalerX.pkl') +dump(scalerY, DATA_DIR+'/'+name+'scalerY.pkl') +dump(model, DATA_DIR+'/'+name+'.pkl') + +model = load(DATA_DIR+'/'+ name +'.pkl') +scalerX: StandardScaler = load(DATA_DIR+'/'+ name +'scalerX.pkl') +scalerY: StandardScaler = load(DATA_DIR+'/'+ name +'scalerY.pkl') + +#LIVE PREDICTION - IMAGINE THIS HAPPENS LIVE +# Get the live data +# Prepare the data for bars and indicators + +#asume ohlc_features and indicator_features remain the same + + +#get last 5 items of respective indicators + +#mazeme runner indikator pokud tu je +if "runner" in indicators: + del indicators["runner"] + print("runner key deleted from indicators") + +if "runner" in features: + features.remove("runner") + print("runner removed from features") + +lastNbars = slice_dict_lists(bars, seq) +lastNindicators = slice_dict_lists(indicators, seq) +print("last5bars", lastNbars) +print("last5indicators",lastNindicators) + +indicator_data = np.column_stack([lastNindicators[feature] for feature in features if feature in lastNindicators]) +if use_bars: + bar_data = np.column_stack([lastNbars[feature] for feature in features if feature in lastNbars]) + combined_live_data = np.column_stack([bar_data, indicator_data]) +else: + combined_live_data = indicator_data +print("combined_live_data",combined_live_data) +combined_live_data = scalerX.transform(combined_live_data) +#scaler = StandardScaler() + +combined_live_data = np.array(combined_live_data) + +#converts to 3D array +# 1 number of samples in the array. +# 2 represents the sequence length. +# 3 represents the number of features in the data. +combined_live_data = combined_live_data.reshape((1, seq, combined_live_data.shape[1])) + + +# Make a prediction +prediction = model(combined_live_data, training=False) +#prediction = prediction.reshape((1, 1)) +# Convert the prediction back to the original scale +prediction = scalerY.inverse_transform(prediction) + +print("prediction for last value", float(prediction)) + +#TEST PREDICATIONS +# Evaluate the model on the test set +#pozor testovaci sadu na produkc scalovat samostatne +#X_test = scalerX.transform(X_test) +#predikce nad testovacimi daty +X_complete = model.predict(X_complete) +X_complete = scalerY.inverse_transform(X_complete) + +#target testovacim dat +Y_complete = scalerY.inverse_transform(Y_complete) +mse = mean_squared_error(Y_complete, X_complete) +print('Test MSE:', mse) + +# Plot the predicted vs. actual close prices +plt.plot(Y_complete, label='Actual') +plt.plot(X_complete, label='Predicted') +plt.legend() +plt.show() + +# To make a prediction, we can simply feed the model a sequence of 5 elements and it will predict the next element. For example, to predict the close price for the 6th time period, we would feed the model the following sequence: + +# sequence = combined_data[0:5] +# prediction = model.predict(sequence) diff --git a/v2realbot/controller/services.py b/v2realbot/controller/services.py index 6e65038..ac7b378 100644 --- a/v2realbot/controller/services.py +++ b/v2realbot/controller/services.py @@ -668,6 +668,7 @@ def archive_runner(runner: Runner, strat: StrategyInstance, inter_batch_params: #file pro vyvoj: ouptut_metriky_tradeList.py results_metrics = populate_metrics_output_directory(strat, inter_batch_params) + runArchive: RunArchive = RunArchive(id = runner.id, strat_id = runner.strat_id, name=runner.run_name, @@ -936,7 +937,7 @@ def get_testlists(): testlists = [] for row in rows: - print(row) + #print(row) testlist = TestList(id=row[0], name=row[1], dates=json.loads(row[2])) testlists.append(testlist) diff --git a/v2realbot/slicingtest.py b/v2realbot/slicingtest.py deleted file mode 100644 index f359609..0000000 --- a/v2realbot/slicingtest.py +++ /dev/null @@ -1,7 +0,0 @@ -word = "buy_if_not_something" - - -if word.endswith("something") and word[:-len] == "not_": - print("Word meets the condition.") -else: - print("Word does not meet the condition.") \ No newline at end of file diff --git a/v2realbot/strategy/__pycache__/base.cpython-310.pyc b/v2realbot/strategy/__pycache__/base.cpython-310.pyc index c6f382e..8ed5849 100644 Binary files a/v2realbot/strategy/__pycache__/base.cpython-310.pyc and b/v2realbot/strategy/__pycache__/base.cpython-310.pyc differ diff --git a/v2realbot/strategy/base.py b/v2realbot/strategy/base.py index e0ac7d1..7fb9d6b 100644 --- a/v2realbot/strategy/base.py +++ b/v2realbot/strategy/base.py @@ -431,6 +431,11 @@ class Strategy: if self.rtqueue is not None: self.rtqueue.put("break") + #get rid of attributes that are links to the models + self.state.vars["loaded_models"] = {} + self.state.vars["loaded_scalersX"] = {} + self.state.vars["loaded_scalersY"] = {} + #zavolame na loaderu remove streamer - mohou byt dalsi bezici strategie, ktery loader vyuzivaji #pripadne udelat shared loader a nebo dedicated loader #pokud je shared tak volat remove diff --git a/v2realbot/utils/__pycache__/utils.cpython-310.pyc b/v2realbot/utils/__pycache__/utils.cpython-310.pyc index 137554b..46de4a5 100644 Binary files a/v2realbot/utils/__pycache__/utils.cpython-310.pyc and b/v2realbot/utils/__pycache__/utils.cpython-310.pyc differ diff --git a/v2realbot/utils/utils.py b/v2realbot/utils/utils.py index 188bff4..51d195d 100644 --- a/v2realbot/utils/utils.py +++ b/v2realbot/utils/utils.py @@ -27,8 +27,39 @@ from collections import deque import numpy as np +def slice_dict_lists(d, last_item, to_tmstp = False): + """Slices every list in the dictionary to the last last_item items. + + Args: + d: A dictionary. + last_item: The number of items to keep at the end of each list. + to_tmstp: For "time" elements change it to timestamp from datetime if required. + + Returns: + A new dictionary with the sliced lists. + """ + sliced_d = {} + for key in d.keys(): + if key == "time" and to_tmstp: + sliced_d[key] = [datetime.timestamp(t) for t in d[key][-last_item:]] + else: + sliced_d[key] = d[key][-last_item:] + return sliced_d + + +# keys_set = set(keys) +# sliced_d = {} +# for key, value in d.items(): +# if key in keys_set and isinstance(value, list): +# if key == "time" and to_tmstp: +# sliced_d[key] = [datetime.timestamp(t) for t in value[-last_item:]] +# else: +# sliced_d[key] = value[-last_item:] +# return sliced_d + +#WIP def create_new_bars(bars, new_resolution): - """Creates new bars dictionary in the new resolution. + """WIP - Creates new bars dictionary in the new resolution. Args: bars: A dictionary representing ohlcv bars.