retreat commit
This commit is contained in:
@ -2,331 +2,276 @@ import numpy as np
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.model_selection import train_test_split
|
||||
from keras.models import Sequential, load_model
|
||||
import v2realbot.ml.mlutils as mu
|
||||
from keras.layers import LSTM, Dense
|
||||
import matplotlib.pyplot as plt
|
||||
from v2realbot.controller.services import get_archived_runner_details_byID
|
||||
from v2realbot.common.model import RunArchiveDetail
|
||||
from v2realbot.config import DATA_DIR
|
||||
from v2realbot.utils.utils import slice_dict_lists
|
||||
from collections import defaultdict
|
||||
from operator import itemgetter
|
||||
from joblib import dump, load
|
||||
from v2realbot.ml.ml import ModelML
|
||||
from v2realbot.enums.enums import PredOutput, Source, TargetTRFM
|
||||
# from collections import defaultdict
|
||||
# from operator import itemgetter
|
||||
from joblib import load
|
||||
|
||||
# region Notes
|
||||
|
||||
#ZAKLAD PRO TRAINING SCRIPT na vytvareni model
|
||||
#ZAKLAD PRO TRAINING SCRIPT na vytvareni model u
|
||||
# TODO
|
||||
# podpora pro BINARY TARGET
|
||||
# podpora hyperpamaetru (activ.funkce sigmoid atp.)
|
||||
# udelat vsechny config vars do cfg objektu
|
||||
# vyuzit distribuovane prostredi - nebo aspon vlastni VM
|
||||
# dopracovat identifikatory typu lastday close, todays open atp.
|
||||
# random SEARCG a grid search
|
||||
# dopracovat denni identifikatory typu lastday close, todays open atp.
|
||||
# random SEARCH a grid search
|
||||
# udelat nejaka model metadata (napr, trenovano na (runners+obdobi), nastaveni treningovych dat, počet epoch, hyperparametry, config atribu atp.) - mozna persistovat v db
|
||||
# udelat nejake verzovani
|
||||
# predelat do GUI a modulu
|
||||
# prepare data do importovane funkce, aby bylo mozno pouzit v predict casti ve strategii a nemuselo se porad udrzovat
|
||||
#s nastavenim modelu. To stejne i s nastavenim upravy features
|
||||
|
||||
# vyuzit VectorBT na dohledani optimalizovanych parametru napr. pro buy,sell atp. Vyuzit podobne API na pripravu dat jako model.
|
||||
# EVAL MODEL - umoznit vektorové přidání indikátoru do runneru (např. predikce v modulu, vectorBT, optimalizace atp) - vytvorit si na to API, podobne co mam, nacte runner, transformuje, sekvencuje, provede a pak zpetne transformuje a prida jako dalsi indikator. Lze pak použít i v gui.
|
||||
# nove tlacitko "Display model prediction" na urovni archrunnera, které
|
||||
# - má volbu model + jestli zobrazit jen predictionu jako novy indikator nebo i mse from ytarget (nutny i target)
|
||||
# po spusteni pak:
|
||||
# - zkonztoluje jestli runner ma indikatory,ktere odpovidaji features modelu (bar_ftrs, ind_ftrs, optional i target)
|
||||
# - vektorově doplní predictionu (transformuje data, udela predictionu a Y transformuje zpet)
|
||||
# - vysledek (jako nove indikatory) implantuje do runnerdetailu a zobrazi
|
||||
# podivat se na dalsi parametry kerasu, napr. false positive atp.
|
||||
# podivat se jeste na rozdil mezi vectorovou predikci a skalarni - proc je nekdy rozdil, odtrasovat - pripadne pogooglit
|
||||
# odtrasovat, nekde je sum (zkusit si oboji v jednom skriptu a porovnat)
|
||||
|
||||
#TODO NAPADY Na modely
|
||||
#binary identifikace trendu napr. pokud nasledujici 3 bary rostou (0-1)
|
||||
#soustredit se na modely s vystupem 0-1 nebo -1 až 1
|
||||
#1.binary identifikace trendu napr. pokud nasledujici 3 bary rostou (0-1) nebo nasledujici bary roste momentum
|
||||
#2.soustredit se na modely s vystupem 0-1 nebo -1 až 1
|
||||
#3.Vyzkouset jeden model, ktery by identifikoval trendy v obou smerech - -1 pro klesani a 1 pro stoupání.
|
||||
#4.vyzkouset zda model vytvoreny z casti dne nebude funkcni na druhe casti (on the fly daily models)
|
||||
#5.zkusit modely s a bez time (prizpusobit tomu kod v ModelML - zejmena jak na crossday sekvence) - mozna ze zecatku dat aspon pryc z indikatoru?
|
||||
# Dat vsechny zbytecne features pryc, nechat tam jen ty podstatne - attention, tak cílím.
|
||||
#6. zkusit vyuzit tickprice v nejaekm modelu, pripadne pak dalsi CBAR indikatory . vymslet tickbased features
|
||||
#7. zkusit jako features nevyuzit standardni ceny, ale pouze indikatory reprezentujici chovani (fastslope,samebarslope,volume,tradencnt)
|
||||
#8. relativni OHLC - model pouzivajici (jen) bary, ale misto hodnot ohlc udelat features reprezentujici vztahy(pomery) mezi temito velicinami. tzn. relativni ohlc
|
||||
#9. jiny pristup by byl ucit model na konkretnich chunkach, ktere chci aby mi identifikoval. Např. určité úseky. Vymyslet. Buď nyni jako test intervaly, ale v budoucnu to treba jen nejak oznacit a poslat k nauceni. Pripadne pak udelat nejaky vycuc.
|
||||
#10. mozna správným výběrem targetu, můžu taky naučit jen určité věci. Specializace. Stačí když se jednou dvakrát denně aktivuje.
|
||||
# 11. udelat si go IN model, ktery pomuze strategii generovat vstup - staci jen aby mel trochu lepsi edge nez conditiony, o zbytek se postara logika strategie
|
||||
# 12. model pro neagregované nebo jen filtroné či velmi lehce agregované trady?
|
||||
|
||||
#DULEZITE
|
||||
# soustredit se v modelech na predikci nasledujici hodnoty, ideálně nějaký vektor ukazující směr (např. 0 - 1, kde nula nebude růst, 1 - bude růst strmě)
|
||||
# pro predikcí nějakého většího trendu, zkusti více modelů na různých rozlišení, každý ukazuje
|
||||
# hodnotu na svém rozlišení a jeho kombinace mi může určit vstup. Zkusit zda by nešel i jeden model.
|
||||
# Každopádně se soustředit
|
||||
# 1) na další hodnotu (tzn. vstupy musí být bezprostředně ovlivňující tuto (samebasrlope, atp.))
|
||||
# 2) její výše ukazuje směr na tomto rozlišení
|
||||
# 3) ideálně se učit z každého baru, tzn. cílová hodnota musí být známá u každého baru
|
||||
# (binary ne, potřebuju linární vektor) - i když 1 a 0 target v závislosti na stoupání a klesání by mohla být ok,
|
||||
# ale asi příliš restriktivní, spíš bych tam mohl dát jak moc. Tzn. +0.32, -0.04. Učilo by se to míru stoupání.
|
||||
# Tu míru tam potřebuju zachovanou.
|
||||
# pak si muzu rict, když je urcite pravdepodobnost, ze to bude stoupat (tzn. dalsi hodnota) na urovni 1,2,3 - tak jduvstup
|
||||
# zkusit na nejnižší úrovni i předvídat CBARy, směr dalšího ticku. Vyzkoušet.
|
||||
|
||||
##TODO - doma
|
||||
#bar_features a ind_features do dokumentace SL classic, stejne tak conditional indikator a mathop indikator
|
||||
#TODO - co je třeba vyvinout
|
||||
# GENERATOR test intervalu (vstup name, note, od,do,step)
|
||||
# napsat API, doma pak simple GUI
|
||||
# vyuziti ATR (jako hranice historickeho rozsahu) - atr-up, atr-down
|
||||
# nakreslit v grafu atru = close+atr, atrd = close-atr
|
||||
# pripadne si vypocet atr nejak customizovat, prip. ruzne multiplikatory pro high low, pripadne si to vypocist podle sebe
|
||||
# vyuziti:
|
||||
# pro prekroceni nejake lajny, napr. ema nebo yesterdayclose
|
||||
# - k identifikaci ze se pohybuje v jejim rozsahu
|
||||
# - proste je to buffer, ktery musi byt prekonan, aby byla urcita akce
|
||||
# pro learning pro vypocet conditional parametru (1,0,-1) prekroceni napr. dailyopen, yesterdayclose, gapclose
|
||||
# kde 1 prekroceno, 0 v rozsahu (atr), -1 prekroceno dolu - to pomuze uceni
|
||||
# vlastni supertrend strateige
|
||||
# zaroven moznost vyuzit klouzave či parametrizovane atr, které se na základě
|
||||
# určitých parametrů bude samo upravovat a cíleně vybočovat z KONTRA frekvencí, např. randomizovaný multiplier nebo nejak jinak ovlivneny minulým
|
||||
# v indikatorech vsude kde je odkaz ma source jako hodnotu tak defaultne mit moznost uvest lookback, napr. bude treba porovnavat nejak cenu vs predposledni hodnotu ATRka (nechat az vyvstane pozadavek)
|
||||
# zacit doma na ATRku si postavit supertrend, viz pinescript na ploše
|
||||
|
||||
|
||||
# Sample data (replace this with your actual OHLCV data)
|
||||
bars = {
|
||||
'time': [1, 2, 3, 4, 5,6,7,8,9,10,11,12,13,14,15],
|
||||
'high': [10, 11, 12, 13, 14,10, 11, 12, 13, 14,10, 11, 12, 13, 14],
|
||||
'low': [8, 9, 7, 6, 8,8, 9, 7, 6, 8,8, 9, 7, 6, 8],
|
||||
'volume': [1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300],
|
||||
'close': [9, 10, 11, 12, 13,9, 10, 11, 12, 13,9, 10, 11, 12, 13],
|
||||
'open': [9, 10, 8, 8, 8,9, 10, 8, 8, 8,9, 10, 8, 8, 8],
|
||||
'resolution': [1, 1, 1, 1, 1,1, 1, 1, 1, 1,1, 1, 1, 1, 1]
|
||||
}
|
||||
#TODO - obecne vylepsovaky
|
||||
# 1. v GUI graf container do n-TABů, mozna i draggable order, zaviratelne na Xko (innerContainer)
|
||||
# 2. mit mozna specialni mod na pripravu dat (agreg+indikator, tzn. vse jen bez vstupů) - můžu pak zapracovat víc vectorové doplňování dat
|
||||
# TOTO:: mozna by postacil vypnout backtester (tzn. no trades) - a projet jen indikatory. mozna by slo i vectorove optimalizovat.
|
||||
# indikatory by se mohli predsunout pred next a next by se vubec nemusel volat (jen nekompatibilita s predch.strategiemi)
|
||||
# 3. kombinace fastslope na fibonacci delkach (1,2,3,5..) jako dobry vstup pro ML
|
||||
# 4. podivat se na attention based LSTM zda je v kerasu implementace
|
||||
# do grafu přidat togglovatelné hranice barů určitých rozlišení - což mi jen udělá čáry Xs od sebe (dobré pro navrhování)
|
||||
# 5. vymyslet optimalizovane vyuziti modelu na produkci (nejak mit zkompilovane, aby to bylo raketově pro skalár) - nyní to backtest zpomalí 4x
|
||||
# 6. CONVNETS for time series forecasting - small 1D convnets can offer a fast alternative to RNNs for simple tasks such as text classification and timeseries forecasting.
|
||||
# zkusit small conv1D pro identifikaci víření před trendem, např. jen 6 barů - identifikovat dobře target, musí jít o tutovku na targetu
|
||||
# pro covnet zkusit cbar price, volume a time. Třeba to zachytí víření (ripples)
|
||||
# Další oblasti k predikci jsou ripples, vlnky - předzvěst nějakého mocnějšího pohybu. A je pravda, že předtím se mohou objevit nějaké indicie. Ty zkus zachytit.
|
||||
# Do runner_headers pridat bt_from, bt_to - pro razeni order_by, aby se runnery vzdy vraceli vzestupne dle data (pro machine l)
|
||||
|
||||
indicators = {
|
||||
'time': [1, 2, 3, 4, 5,6,7,8,9,10,11,12,13,14,15],
|
||||
'fastslope': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115],
|
||||
'fsdelta': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115],
|
||||
'fastslope2': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115],
|
||||
'ema': [1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300]
|
||||
}
|
||||
#TODO
|
||||
# vyvoj modelů workflow s LSTMtrain.py
|
||||
# 1) POC - pouze zde ve skriptu, nad 1-2 runnery, okamžité zobrazení v plotu,
|
||||
# optimalizace zakl. features a hyperparams. Zobrazit i u binary nejak cenu.
|
||||
# 2) REALITY CHECK - trening modelu na batchi test intervalu, overeni ve strategii v BT, zobrazeni predikce v RT chartu
|
||||
# 3) FINAL TRAINING
|
||||
# testovani predikce
|
||||
|
||||
|
||||
# Zakladni nastaveni
|
||||
testlist_id = ""
|
||||
runner_ids = ["838e918e-9be0-4251-a968-c13c83f3f173","c11c5cae-05f8-4b0a-aa4d-525ddac81684"]
|
||||
features = ["time","high","low","volume","open","close", "trades", "vwap","samebarslope", "fastslope","fsdelta", "fastslope2", "fsdelta2"]
|
||||
#TODO toto je linearni prediction mod, dodelat podporu BINARY
|
||||
#TODO tady
|
||||
# train model
|
||||
# - train data- batch nebo runners
|
||||
# - test data - batch or runners (s cim porovnavat/validovat)
|
||||
# - vyber architektury
|
||||
# - soucast skriptu muze byt i porovnavacka pripadne nejaky search optimalnich parametru
|
||||
|
||||
#lstmtrain - podporit jednotlive kroky vyse
|
||||
#modelML - udelat lepsi PODMINKY
|
||||
#frontend? ma cenu? asi ano - GUI na model - new - train/retrain-change
|
||||
# (vymyslet jak v gui chytře vybírat arch modelu a hyperparams, loss, optim - treba nejaka templata?)
|
||||
# mozna ciselnik architektur s editačním polem pro kód -jen pár řádků(.add, .compile) přidat v editoru
|
||||
# vymyslet jak to udělat pythonově
|
||||
#testlist generator api
|
||||
|
||||
# endregion
|
||||
|
||||
#if null,the validation is made on 10% of train data
|
||||
#runnery pro testovani
|
||||
validation_runners = ["a38fc269-8df3-4374-9506-f0280d798854"]
|
||||
|
||||
#u binary bude target bud hotovy indikator a nebo jej vytvorit on the fly
|
||||
cfg = ModelML(name="model1",
|
||||
version = "0.1",
|
||||
note = None,
|
||||
pred_output=PredOutput.LINEAR,
|
||||
input_sequences = 10,
|
||||
use_bars = True,
|
||||
bar_features = ["volume","trades"],
|
||||
ind_features = ["slope20", "ema20","emaFast","samebarslope","fastslope","fastslope4"],
|
||||
target='target', #referencni hodnota pro target - napr pro graf
|
||||
target_reference='vwap',
|
||||
train_target_steps=3,
|
||||
train_target_transformation=TargetTRFM.KEEPVAL,
|
||||
train_runner_ids = ["08b7f96e-79bc-4849-9142-19d5b28775a8"],
|
||||
train_batch_id = None,
|
||||
train_epochs = 10,
|
||||
train_remove_cross_sequences = True,
|
||||
)
|
||||
|
||||
#model muze byt take bez barů, tzn. jen indikatory
|
||||
use_bars = True
|
||||
target = 'fastslope2'
|
||||
#predict how many bars in the future
|
||||
target_steps = 5
|
||||
name = "model1"
|
||||
seq = 10
|
||||
epochs = 200
|
||||
#TODO toto cele dat do TRAIN metody - vcetne pripadneho loopu a podpory API
|
||||
|
||||
test_size = None
|
||||
|
||||
#crossday identifier je time (hodnota resolution je pouzita ne odstraneni sekvenci skrz dny)
|
||||
#predpoklad pouziti je crossday_sequence je time ve features
|
||||
resolution = 1
|
||||
crossday_sequence = False
|
||||
#zda se model uci i crosseday (skrz runner/day data). Pokud ne, pak se crossday sekvence odstrani
|
||||
#realizovano pomoci pomocneho identifikatoru (runner)
|
||||
#kdyz neplnime vstup, automaticky se loaduje training data z nastaveni classy
|
||||
source_data, target_data, rows_in_day = cfg.load_data()
|
||||
|
||||
#zajistime poradi
|
||||
features.sort()
|
||||
#cas na prvnim miste
|
||||
if "time" in features:
|
||||
features.remove("time")
|
||||
features.insert(0, "time")
|
||||
if len(target_data) == 0:
|
||||
raise Exception("target is empty - required for TRAINING - check target column name")
|
||||
|
||||
def merge_dicts(dict_list):
|
||||
# Initialize an empty merged dictionary
|
||||
merged_dict = {}
|
||||
np.set_printoptions(threshold=10,edgeitems=5)
|
||||
#print("source_data", source_data)
|
||||
#print("target_data", target_data)
|
||||
print("rows_in_day", rows_in_day)
|
||||
source_data = cfg.scalerX.fit_transform(source_data)
|
||||
|
||||
# Iterate through the dictionaries in the list
|
||||
for i,d in enumerate(dict_list):
|
||||
for key, value in d.items():
|
||||
if key in merged_dict:
|
||||
merged_dict[key] += value
|
||||
else:
|
||||
merged_dict[key] = value
|
||||
#vlozime element s idenitfikaci runnera
|
||||
#TODO mozna vyhodit to UNTR
|
||||
#TODO asi vyhodit i target reference a vymyslet jinak
|
||||
|
||||
return merged_dict
|
||||
#vytvořeni sekvenci po vstupních sadách (např. 10 barů) - výstup 3D např. #X_train (6205, 10, 14)
|
||||
#doplneni transformace target data
|
||||
X_train, y_train, y_train_ref = cfg.create_sequences(combined_data=source_data,
|
||||
target_data=target_data,
|
||||
remove_cross_sequences=cfg.train_remove_cross_sequences,
|
||||
rows_in_day=rows_in_day)
|
||||
|
||||
# # Initialize the merged dictionary with the first dictionary in the list
|
||||
# merged_dict = dict_list[0].copy()
|
||||
# merged_dict["index"] = []
|
||||
#zobrazime si transformovany target a jeho referncni sloupec
|
||||
#ZHOMOGENIZOVAT OSY
|
||||
plt.plot(y_train, label='Transf target')
|
||||
plt.plot(y_train_ref, label='Ref target')
|
||||
plt.plot()
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
# # Iterate through the remaining dictionaries and concatenate their lists
|
||||
# for i, d in enumerate(dict_list[1:]):
|
||||
# merged_dict["index"] =
|
||||
# for key, value in d.items():
|
||||
# if key in merged_dict:
|
||||
# merged_dict[key] += value
|
||||
# else:
|
||||
# merged_dict[key] = value
|
||||
|
||||
# return merged_dict
|
||||
|
||||
def load_runner(runner_id):
|
||||
res, sada = get_archived_runner_details_byID(runner_id)
|
||||
if res == 0:
|
||||
print("ok")
|
||||
else:
|
||||
print("error",res,sada)
|
||||
|
||||
bars = sada["bars"]
|
||||
indicators = sada["indicators"][0]
|
||||
return bars, indicators
|
||||
|
||||
def prepare_data(bars, indicators, features, target) -> tuple[np.array, np.array]:
|
||||
#create SOURCE DATA with features
|
||||
# bars and indicators dictionary and features as input
|
||||
indicator_data = np.column_stack([indicators[feature] for feature in features if feature in indicators])
|
||||
if len(bars)>0:
|
||||
bar_data = np.column_stack([bars[feature] for feature in features if feature in bars])
|
||||
combined_day_data = np.column_stack([bar_data,indicator_data])
|
||||
else:
|
||||
combined_day_data = indicator_data
|
||||
|
||||
#create TARGET DATA
|
||||
try:
|
||||
target_base = bars[target]
|
||||
except KeyError:
|
||||
target_base = indicators[target]
|
||||
target_day_data = np.column_stack([target_base])
|
||||
return combined_day_data, target_day_data
|
||||
|
||||
def load_runners_as_list(runner_ids: list, use_bars: bool):
|
||||
"""Loads all runners data (bars, indicators) for runner_ids into list of dicts-
|
||||
|
||||
Args:
|
||||
runner_ids: list of runner_ids.
|
||||
use_bars: Whether to use also bars or just indicators
|
||||
|
||||
Returns:
|
||||
tuple (barslist, indicatorslist) - lists with dictionaries for each runner
|
||||
"""
|
||||
barslist = []
|
||||
indicatorslist = []
|
||||
for runner_id in runner_ids:
|
||||
bars, indicators = load_runner(runner_id)
|
||||
if use_bars:
|
||||
barslist.append(bars)
|
||||
indicatorslist.append(indicators)
|
||||
|
||||
return barslist, indicatorslist
|
||||
|
||||
def create_sequences(combined_data, target_data, seq, target_steps, crossday_sequence = True):
|
||||
"""Creates sequences of given length seq and target N steps in the future.
|
||||
|
||||
Args:
|
||||
combined_data: A list of combined data.
|
||||
target_data: A list of target data.
|
||||
seq: The sequence length.
|
||||
target_steps: The number of steps in the future to target.
|
||||
crossday_sequence: Zda vytvaret sekvenci i skrz dny (runnery)
|
||||
|
||||
Returns:
|
||||
A list of X sequences and a list of y sequences.
|
||||
"""
|
||||
X_train = []
|
||||
y_train = []
|
||||
last_delta = None
|
||||
for i in range(len(combined_data) - seq - target_steps):
|
||||
if last_delta is None:
|
||||
last_delta = 2*(combined_data[i + seq + target_steps, 0] - combined_data[i, 0])
|
||||
|
||||
curr_delta = combined_data[i + seq + target_steps, 0] - combined_data[i, 0]
|
||||
#pokud je cas konce sequence vyrazne vetsi (2x) nez predchozi
|
||||
#print(f"standardní zacatek {combined_data[i, 0]} konec {combined_data[i + seq + target_steps, 0]} delta: {curr_delta}")
|
||||
if crossday_sequence is False and curr_delta > last_delta:
|
||||
print(f"sekvence vyrazena. Zacatek {combined_data[i, 0]} konec {combined_data[i + seq + target_steps, 0]}")
|
||||
continue
|
||||
X_train.append(combined_data[i:i + seq])
|
||||
y_train.append(target_data[i + seq + target_steps])
|
||||
last_delta = 2*(combined_data[i + seq + target_steps, 0] - combined_data[i, 0])
|
||||
return np.array(X_train), np.array(y_train)
|
||||
|
||||
barslist, indicatorslist = load_runners_as_list(runner_ids, use_bars)
|
||||
|
||||
#zmergujeme vsechny data dohromady
|
||||
bars = merge_dicts(barslist)
|
||||
indicators = merge_dicts(indicatorslist)
|
||||
print(f"{len(indicators)}")
|
||||
print(f"{len(bars)}")
|
||||
source_data, target_data = prepare_data(bars, indicators, features, target)
|
||||
|
||||
# Set the printing threshold to print only the first and last 10 rows of the array
|
||||
np.set_printoptions(threshold=10)
|
||||
print("source_data", source_data, "shape", np.shape(source_data))
|
||||
|
||||
# Standardize the data
|
||||
scalerX = StandardScaler()
|
||||
scalerY = StandardScaler()
|
||||
#FIT SCALER také fixuje počet FEATURES !!
|
||||
source_data = scalerX.fit_transform(source_data)
|
||||
target_data = scalerY.fit_transform(target_data)
|
||||
|
||||
#print("source_data shape",np.shape(source_data))
|
||||
|
||||
# Create a sequence of seq elements and define target prediction horizona
|
||||
X_train, y_train = create_sequences(source_data, target_data, seq=seq, target_steps=target_steps, crossday_sequence=crossday_sequence)
|
||||
|
||||
#X_train (6205, 10, 14)
|
||||
print("X_train", np.shape(X_train))
|
||||
print("After sequencing")
|
||||
print("source:X_train", np.shape(X_train))
|
||||
print("target:y_train", np.shape(y_train))
|
||||
print("target:", y_train)
|
||||
y_train = y_train.reshape(-1, 1)
|
||||
|
||||
X_complete = np.array(X_train.copy())
|
||||
Y_complete = np.array(y_train.copy())
|
||||
X_train = np.array(X_train)
|
||||
y_train = np.array(y_train)
|
||||
|
||||
# Split the data into training and test sets
|
||||
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.20, shuffle=False) #random_state=42)
|
||||
#target scaluji az po transformaci v create sequence -narozdil od X je stejny shape
|
||||
y_train = cfg.scalerY.fit_transform(y_train)
|
||||
|
||||
|
||||
if len(validation_runners) == 0:
|
||||
test_size = 0.10
|
||||
# Split the data into training and test sets - kazdy vstupni pole rozdeli na dve
|
||||
#nechame si takhle rozdelit i referencni sloupec
|
||||
X_train, X_test, y_train, y_test, y_train_ref, y_test_ref = train_test_split(X_train, y_train, y_train_ref, test_size=test_size, shuffle=False) #random_state=42)
|
||||
|
||||
print("Splittig the data")
|
||||
|
||||
print("X_train", np.shape(X_train))
|
||||
print("X_test", np.shape(X_test))
|
||||
print("y_train", np.shape(y_train))
|
||||
print("y_test", np.shape(y_test))
|
||||
print("y_test_ref", np.shape(y_test_ref))
|
||||
print("y_train_ref", np.shape(y_train_ref))
|
||||
|
||||
#print(np.shape(X_train))
|
||||
# Define the input shape of the LSTM layer dynamically based on the reshaped X_train value
|
||||
input_shape = (X_train.shape[1], X_train.shape[2])
|
||||
|
||||
# Build the LSTM model
|
||||
model = Sequential()
|
||||
model.add(LSTM(128, input_shape=input_shape))
|
||||
model.add(Dense(1))
|
||||
|
||||
#cfg.model = Sequential()
|
||||
cfg.model.add(LSTM(128, input_shape=input_shape))
|
||||
cfg.model.add(Dense(1, activation="relu"))
|
||||
#activation: Gelu, relu, elu, sigmoid...
|
||||
# Compile the model
|
||||
model.compile(loss='mse', optimizer='adam')
|
||||
cfg.model.compile(loss='mse', optimizer='adam')
|
||||
#loss: mse, binary_crossentropy
|
||||
|
||||
# Train the model
|
||||
model.fit(X_train, y_train, epochs=epochs)
|
||||
cfg.model.fit(X_train, y_train, epochs=cfg.train_epochs)
|
||||
|
||||
#save the model
|
||||
#model.save(DATA_DIR+'/my_model.keras')
|
||||
#model = load_model(DATA_DIR+'/my_model.keras')
|
||||
dump(scalerX, DATA_DIR+'/'+name+'scalerX.pkl')
|
||||
dump(scalerY, DATA_DIR+'/'+name+'scalerY.pkl')
|
||||
dump(model, DATA_DIR+'/'+name+'.pkl')
|
||||
cfg.save()
|
||||
|
||||
model = load(DATA_DIR+'/'+ name +'.pkl')
|
||||
scalerX: StandardScaler = load(DATA_DIR+'/'+ name +'scalerX.pkl')
|
||||
scalerY: StandardScaler = load(DATA_DIR+'/'+ name +'scalerY.pkl')
|
||||
#TBD db layer
|
||||
cfg: ModelML = mu.load_model(cfg.name, cfg.version)
|
||||
|
||||
#LIVE PREDICTION - IMAGINE THIS HAPPENS LIVE
|
||||
# Get the live data
|
||||
# Prepare the data for bars and indicators
|
||||
# region Live predict
|
||||
#EVALUATE SIM LIVE - PREDICT SCALAR - based on last X items
|
||||
barslist, indicatorslist = cfg.load_runners_as_list(runner_id_list=["67b51211-d353-44d7-a58a-5ae298436da7"])
|
||||
#zmergujeme vsechny data dohromady
|
||||
bars = mu.merge_dicts(barslist)
|
||||
indicators = mu.merge_dicts(indicatorslist)
|
||||
cfg.validate_available_features(bars, indicators)
|
||||
#VSTUPEM JE standardni pole v strategii
|
||||
value = cfg.predict(bars, indicators)
|
||||
print("prediction for LIVE SIM:", value)
|
||||
# endregion
|
||||
|
||||
#asume ohlc_features and indicator_features remain the same
|
||||
#EVALUATE TEST DATA - VECTOR BASED
|
||||
#pokud mame eval runners pouzijeme ty, jinak bereme cast z testovacich dat
|
||||
if len(validation_runners) > 0:
|
||||
source_data, target_data, rows_in_day = cfg.load_data(runners_ids=validation_runners)
|
||||
source_data = cfg.scalerX.fit_transform(source_data)
|
||||
X_test, y_test, y_test_ref = cfg.create_sequences(combined_data=source_data, target_data=target_data,remove_cross_sequences=True, rows_in_day=rows_in_day)
|
||||
|
||||
#prepnout ZDE pokud testovat cely bundle - jinak testujeme jen neznama
|
||||
#X_test = X_complete
|
||||
#y_test = Y_complete
|
||||
|
||||
#get last 5 items of respective indicators
|
||||
X_test = cfg.model.predict(X_test)
|
||||
X_test = cfg.scalerY.inverse_transform(X_test)
|
||||
|
||||
#mazeme runner indikator pokud tu je
|
||||
if "runner" in indicators:
|
||||
del indicators["runner"]
|
||||
print("runner key deleted from indicators")
|
||||
|
||||
if "runner" in features:
|
||||
features.remove("runner")
|
||||
print("runner removed from features")
|
||||
|
||||
lastNbars = slice_dict_lists(bars, seq)
|
||||
lastNindicators = slice_dict_lists(indicators, seq)
|
||||
print("last5bars", lastNbars)
|
||||
print("last5indicators",lastNindicators)
|
||||
|
||||
indicator_data = np.column_stack([lastNindicators[feature] for feature in features if feature in lastNindicators])
|
||||
if use_bars:
|
||||
bar_data = np.column_stack([lastNbars[feature] for feature in features if feature in lastNbars])
|
||||
combined_live_data = np.column_stack([bar_data, indicator_data])
|
||||
else:
|
||||
combined_live_data = indicator_data
|
||||
print("combined_live_data",combined_live_data)
|
||||
combined_live_data = scalerX.transform(combined_live_data)
|
||||
#scaler = StandardScaler()
|
||||
|
||||
combined_live_data = np.array(combined_live_data)
|
||||
|
||||
#converts to 3D array
|
||||
# 1 number of samples in the array.
|
||||
# 2 represents the sequence length.
|
||||
# 3 represents the number of features in the data.
|
||||
combined_live_data = combined_live_data.reshape((1, seq, combined_live_data.shape[1]))
|
||||
|
||||
|
||||
# Make a prediction
|
||||
prediction = model(combined_live_data, training=False)
|
||||
#prediction = prediction.reshape((1, 1))
|
||||
# Convert the prediction back to the original scale
|
||||
prediction = scalerY.inverse_transform(prediction)
|
||||
|
||||
print("prediction for last value", float(prediction))
|
||||
|
||||
#TEST PREDICATIONS
|
||||
# Evaluate the model on the test set
|
||||
#pozor testovaci sadu na produkc scalovat samostatne
|
||||
#X_test = scalerX.transform(X_test)
|
||||
#predikce nad testovacimi daty
|
||||
X_complete = model.predict(X_complete)
|
||||
X_complete = scalerY.inverse_transform(X_complete)
|
||||
|
||||
#target testovacim dat
|
||||
Y_complete = scalerY.inverse_transform(Y_complete)
|
||||
mse = mean_squared_error(Y_complete, X_complete)
|
||||
#target testovacim dat proc tu je reshape? y_test.reshape(-1, 1)
|
||||
y_test = cfg.scalerY.inverse_transform(y_test)
|
||||
#celkovy mean? nebo spis vector pro graf?
|
||||
mse = mean_squared_error(y_test, X_test)
|
||||
print('Test MSE:', mse)
|
||||
|
||||
# Plot the predicted vs. actual close prices
|
||||
plt.plot(Y_complete, label='Actual')
|
||||
plt.plot(X_complete, label='Predicted')
|
||||
# Plot the predicted vs. actual
|
||||
plt.plot(y_test, label='Actual')
|
||||
plt.plot(X_test, label='Predicted')
|
||||
#TODO zde nejak vymyslet jinou pricelinu - jako lightweight chart
|
||||
plt.plot(y_test_ref, label='reference column - price')
|
||||
plt.plot()
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
# To make a prediction, we can simply feed the model a sequence of 5 elements and it will predict the next element. For example, to predict the close price for the 6th time period, we would feed the model the following sequence:
|
||||
|
||||
# sequence = combined_data[0:5]
|
||||
# prediction = model.predict(sequence)
|
||||
|
||||
Reference in New Issue
Block a user