diff --git a/requirements.txt b/requirements.txt index 1bf72ea..22fb813 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,12 @@ +absl-py==2.0.0 alpaca==1.0.0 alpaca-py==0.7.1 altair==4.2.2 anyio==3.6.2 appdirs==1.4.4 +appnope==0.1.3 asttokens==2.2.1 +astunparse==1.6.3 attrs==22.2.0 better-exceptions==0.3.3 bleach==6.0.0 @@ -13,6 +16,8 @@ certifi==2022.12.7 chardet==5.1.0 charset-normalizer==3.0.1 click==8.1.3 +colorama==0.4.6 +comm==0.1.4 contourpy==1.0.7 cycler==0.11.0 dash==2.9.1 @@ -20,35 +25,70 @@ dash-bootstrap-components==1.4.1 dash-core-components==2.0.0 dash-html-components==2.0.0 dash-table==5.0.0 +dateparser==1.1.8 decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.7 entrypoints==0.4 +exceptiongroup==1.1.3 executing==1.2.0 fastapi==0.95.0 Flask==2.2.3 +flatbuffers==23.5.26 fonttools==4.39.0 +fpdf2==2.7.6 +gast==0.4.0 gitdb==4.0.10 GitPython==3.1.31 +google-auth==2.23.0 +google-auth-oauthlib==1.0.0 +google-pasta==0.2.0 +grpcio==1.58.0 h11==0.14.0 +h5py==3.9.0 icecream==2.1.3 idna==3.4 +imageio==2.31.6 importlib-metadata==6.1.0 +ipython==8.17.2 +ipywidgets==8.1.1 itsdangerous==2.1.2 +jedi==0.19.1 +Jinja2==3.1.2 +joblib==1.3.2 jsonschema==4.17.3 +jupyterlab-widgets==3.0.9 +keras==2.13.1 kiwisolver==1.4.4 +libclang==16.0.6 +llvmlite==0.39.1 Markdown==3.4.3 markdown-it-py==2.2.0 MarkupSafe==2.1.2 +matplotlib==3.8.2 +matplotlib-inline==0.1.6 mdurl==0.1.2 +mlroom @ git+https://github.com/drew2323/mlroom.git@967b1e3b5071854910ea859eca68bf0c3e67f951 +mplfinance==0.12.10b0 msgpack==1.0.4 +mypy-extensions==1.0.0 newtulipy==0.4.6 -numpy==1.24.2 +numba==0.56.4 +numpy==1.23.5 +oauthlib==3.2.2 +opt-einsum==3.3.0 packaging==23.0 pandas==1.5.3 param==1.13.0 +parso==0.8.3 +pexpect==4.8.0 Pillow==9.4.0 plotly==5.13.1 +prompt-toolkit==3.0.39 proto-plus==1.22.2 protobuf==3.20.3 +ptyprocess==0.7.0 +pure-eval==0.2.2 pyarrow==11.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 @@ -56,41 +96,65 @@ pyct==0.5.0 pydantic==1.10.5 pydeck==0.8.0 Pygments==2.14.0 +pyinstrument==4.5.3 Pympler==1.0.1 pyparsing==3.0.9 pyrsistent==0.19.3 pysos==1.3.0 python-dateutil==2.8.2 python-dotenv==1.0.0 +python-multipart==0.0.6 pytz==2022.7.1 pytz-deprecation-shim==0.1.0.post0 pyviz-comms==2.2.1 PyYAML==6.0 -requests==2.28.2 +regex==2023.10.3 +requests==2.31.0 +requests-oauthlib==1.3.1 rich==13.3.1 rsa==4.9 +schedule==1.2.1 +scikit-learn==1.3.1 +scipy==1.11.2 seaborn==0.12.2 semver==2.13.0 six==1.16.0 smmap==5.0.0 sniffio==1.3.0 sseclient-py==1.7.2 +stack-data==0.6.3 starlette==0.26.1 streamlit==1.20.0 structlog==23.1.0 +TA-Lib==0.4.28 tenacity==8.2.2 +tensorboard==2.13.0 +tensorboard-data-server==0.7.1 +tensorflow==2.13.0 +tensorflow-estimator==2.13.0 +tensorflow-io-gcs-filesystem==0.34.0 +termcolor==2.3.0 +threadpoolctl==3.2.0 +tinydb==4.7.1 +tinydb-serialization==2.1.0 +tinyflux==0.4.0 toml==0.10.2 tomli==2.0.1 toolz==0.12.0 tornado==6.2 tqdm==4.65.0 +traitlets==5.13.0 typing_extensions==4.5.0 tzdata==2023.2 tzlocal==4.3 urllib3==1.26.14 uvicorn==0.21.1 +-e git+https://github.com/drew2323/v2trading.git@d38bf0600fbadbffba78ae23625eaecd1febc7f4#egg=v2realbot validators==0.20.0 +wcwidth==0.2.9 webencodings==0.5.1 websockets==10.4 Werkzeug==2.2.3 +widgetsnbextension==4.0.9 +wrapt==1.15.0 zipp==3.15.0 diff --git a/res_pred_act.png b/res_pred_act.png new file mode 100644 index 0000000..b77e9ea Binary files /dev/null and b/res_pred_act.png differ diff --git a/res_target.png b/res_target.png new file mode 100644 index 0000000..76ee56e Binary files /dev/null and b/res_target.png differ diff --git a/run.sh b/run.sh index bcb9231..b4c1cf2 100755 --- a/run.sh +++ b/run.sh @@ -26,12 +26,27 @@ PYTHON_TO_USE="python3" #----END EDITABLE VARS------- +# Additions for handling strat.log backup +HISTORY_DIR="$HOME/stratlogs" +TIMESTAMP=$(date +"%Y%m%d-%H%M%S") +LOG_FILE="strat.log" +BACKUP_LOG_FILE="$HISTORY_DIR/${TIMESTAMP}_$LOG_FILE" + # If virtualenv specified & exists, using that version of python instead. if [ -d "$VIRTUAL_ENV_DIR" ]; then PYTHON_TO_USE="$VIRTUAL_ENV_DIR/bin/python" fi start() { + # Check and create history directory if it doesn't exist + [ ! -d "$HISTORY_DIR" ] && mkdir -p "$HISTORY_DIR" + + # Check if strat.log exists and back it up + if [ -f "$LOG_FILE" ]; then + mv "$LOG_FILE" "$BACKUP_LOG_FILE" + echo "Backed up log to $BACKUP_LOG_FILE" + fi + if [ ! -e "$OUTPUT_PID_PATH/$OUTPUT_PID_FILE" ]; then nohup "$PYTHON_TO_USE" ./$SCRIPT_TO_EXECUTE_PLUS_ARGS > strat.log 2>&1 & echo $! > "$OUTPUT_PID_PATH/$OUTPUT_PID_FILE" echo "Started $SCRIPT_TO_EXECUTE_PLUS_ARGS @ Process: $!" diff --git a/tested_runner.png b/tested_runner.png new file mode 100644 index 0000000..2162db4 Binary files /dev/null and b/tested_runner.png differ diff --git a/testy/archive/interpolace.py b/testy/archive/interpolace.py index 60adcaf..8d67af7 100644 --- a/testy/archive/interpolace.py +++ b/testy/archive/interpolace.py @@ -1,12 +1,14 @@ import scipy.interpolate as spi import matplotlib.pyplot as plt +import numpy as np +# x = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20] +# y = [4, 7, 11, 16, 22, 29, 38, 49, 63, 80] -x = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20] -y = [4, 7, 11, 16, 22, 29, 38, 49, 63, 80] - - -y_interp = spi.interp1d(x, y) +val = 10 +new = np.interp(val, [0, 50, 100], [0, 1, 2]) +print(new) +# y_interp = spi.interp1d(x, y) #find y-value associated with x-value of 13 #print(y_interp(13)) diff --git a/v2realbot/ENTRY_ClassicSL_v01.py b/v2realbot/ENTRY_ClassicSL_v01.py index bb6b053..1e5f7a4 100644 --- a/v2realbot/ENTRY_ClassicSL_v01.py +++ b/v2realbot/ENTRY_ClassicSL_v01.py @@ -167,6 +167,12 @@ def init(state: StrategyState): #history_datetime_from = zoneNY.localize(cal_dates[0].open) history_datetime_from = cal_dates[0].open + #ulozime si dnesni market close + #pro automaticke ukonceni + #TODO pripadne enablovat na parametr + state.today_market_close = cal_dates[-1].close + + # Find the previous market day history_datetime_to = None for session in reversed(cal_dates): diff --git a/v2realbot/LSTMevalrunner.py b/v2realbot/LSTMevalrunner.py deleted file mode 100644 index 399b164..0000000 --- a/v2realbot/LSTMevalrunner.py +++ /dev/null @@ -1,102 +0,0 @@ -import numpy as np -from sklearn.preprocessing import StandardScaler -from sklearn.metrics import mean_squared_error -from sklearn.model_selection import train_test_split -import v2realbot.ml.mlutils as mu -from keras.layers import LSTM, Dense -import matplotlib.pyplot as plt -from v2realbot.ml.ml import ModelML -from v2realbot.enums.enums import PredOutput, Source, TargetTRFM -from v2realbot.controller.services import get_archived_runner_details_byID, update_archive_detail -# from collections import defaultdict -# from operator import itemgetter -from joblib import load - -#TODO - DO API -# v ml atomicke api pro evaluaci (runneru, batche) -# v services: model.add_vector_prediction_to_archrunner_as_new_indicator (vrátí v podstate obohacený archDetail) - nebo i ukládat do db? uvidime -# v rest api prevolani -# db support: zatim jen ciselnik modelu + jeho zakladni nastaveni, obrabeci api, load modelu zatim z file - -cfg: ModelML = mu.load_model("model1", "0.1") - - -#EVALUATE SPECIFIC RUNNER - VECTOR BASED (toto dat do samostatne API pripadne pak udelat nadstavnu na batch a runners) -#otestuje model na neznamem runnerovi, seznamu runneru nebo batch_id - - - -runner_id = "a38fc269-8df3-4374-9506-f0280d798854" -save_new_ind = True -source_data, target_data, rows_in_day = cfg.load_data(runners_ids=[runner_id]) - -if len(rows_in_day) > 1: - #pro vis se cela tato sluzba volat v loopu - raise Exception("Vytvareni indikatoru dostupne zatim jen pro jeden runner") - -#scalujeme X -source_data = cfg.scalerX.fit_transform(source_data) - -#tady si vyzkousim i skrz vice runneru -X_eval, y_eval, y_eval_ref = cfg.create_sequences(combined_data=source_data, target_data=target_data,remove_cross_sequences=True, rows_in_day=rows_in_day) - -#toto nutne? -X_eval = np.array(X_eval) -y_eval = np.array(y_eval) -y_eval_ref = np.array(y_eval_ref) -#scaluji target - nemusis -#y_eval = cfg.scalerY.fit_transform(y_eval) - -X_eval = cfg.model.predict(X_eval) -X_eval = cfg.scalerY.inverse_transform(X_eval) -print("po predikci x_eval shape", X_eval.shape) - -#pokud mame dostupnou i target v runneru, pak pridame porovnavaci indikator -difference_mse = None -if len(y_eval) > 0: - #TODO porad to pliva 1 hodnotu - difference_mse = mean_squared_error(y_eval, X_eval,multioutput="raw_values") - -print("ted mam tedy dva nove sloupce") -print("X_eval", X_eval.shape) -if difference_mse is not None: - print("difference_mse", difference_mse.shape) -print(f"zplostime je, dopredu pridame {cfg.input_sequences-1} a dozadu nic") -#print(f"a melo by nam to celkem dat {len(bars['time'])}") -#tohle pak nejak doladit, ale vypada to good -#plus do druheho indikatoru pridat ten difference_mse - -#TODO jeste je posledni hodnota predikce nejak OFF (2.52... ) - podivat se na to -#TODO na produkci srovnat se skutecnym BT predictem (mozna zde bude treba seq-1) - -# prvni predikce nejspis uz bude na desítce -ind_pred = list(np.concatenate([np.zeros(cfg.input_sequences-1), X_eval.ravel()])) -print(ind_pred) -print(len(ind_pred)) -print("tada") -#ted k nim pridame - -if save_new_ind: - #novy ind ulozime do archrunnera (na produkci nejspis jen show) - res, sada = get_archived_runner_details_byID(runner_id) - if res == 0: - print("ok") - else: - print("error",res,sada) - raise Exception(f"error loading runner {runner_id} : {res} {sada}") - - sada["indicators"][0]["pred_added"] = ind_pred - - req, res = update_archive_detail(runner_id, sada) - print(f"indicator pred_added was ADDED to {runner_id}") - - -# Plot the predicted vs. actual -plt.plot(y_eval, label='Target') -plt.plot(X_eval, label='Predicted') -#TODO zde nejak vymyslet jinou pricelinu - jako lightweight chart -if difference_mse is not None: - plt.plot(difference_mse, label='diference') - plt.plot(y_eval_ref, label='reference column - vwap') -plt.plot() -plt.legend() -plt.show() diff --git a/v2realbot/LSTMtrain.py b/v2realbot/LSTMtrain.py deleted file mode 100644 index f3d6b2e..0000000 --- a/v2realbot/LSTMtrain.py +++ /dev/null @@ -1,278 +0,0 @@ -import numpy as np -from sklearn.preprocessing import StandardScaler -from sklearn.metrics import mean_squared_error -from sklearn.model_selection import train_test_split -import v2realbot.ml.mlutils as mu -from keras.layers import LSTM, Dense -import matplotlib.pyplot as plt -from v2realbot.ml.ml import ModelML -from v2realbot.enums.enums import PredOutput, Source, TargetTRFM -# from collections import defaultdict -# from operator import itemgetter -from joblib import load - -# region Notes - -#ZAKLAD PRO TRAINING SCRIPT na vytvareni model u -# TODO -# podpora pro BINARY TARGET -# podpora hyperpamaetru (activ.funkce sigmoid atp.) -# vyuzit distribuovane prostredi - nebo aspon vlastni VM -# dopracovat denni identifikatory typu lastday close, todays open atp. -# random SEARCH a grid search -# udelat nejaka model metadata (napr, trenovano na (runners+obdobi), nastaveni treningovych dat, počet epoch, hyperparametry, config atribu atp.) - mozna persistovat v db -# udelat nejake verzovani -# predelat do GUI a modulu -# vyuzit VectorBT na dohledani optimalizovanych parametru napr. pro buy,sell atp. Vyuzit podobne API na pripravu dat jako model. -# EVAL MODEL - umoznit vektorové přidání indikátoru do runneru (např. predikce v modulu, vectorBT, optimalizace atp) - vytvorit si na to API, podobne co mam, nacte runner, transformuje, sekvencuje, provede a pak zpetne transformuje a prida jako dalsi indikator. Lze pak použít i v gui. -# nove tlacitko "Display model prediction" na urovni archrunnera, které -# - má volbu model + jestli zobrazit jen predictionu jako novy indikator nebo i mse from ytarget (nutny i target) -# po spusteni pak: -# - zkonztoluje jestli runner ma indikatory,ktere odpovidaji features modelu (bar_ftrs, ind_ftrs, optional i target) -# - vektorově doplní predictionu (transformuje data, udela predictionu a Y transformuje zpet) -# - vysledek (jako nove indikatory) implantuje do runnerdetailu a zobrazi -# podivat se na dalsi parametry kerasu, napr. false positive atp. -# podivat se jeste na rozdil mezi vectorovou predikci a skalarni - proc je nekdy rozdil, odtrasovat - pripadne pogooglit -# odtrasovat, nekde je sum (zkusit si oboji v jednom skriptu a porovnat) - -#TODO NAPADY Na modely -#1.binary identifikace trendu napr. pokud nasledujici 3 bary rostou (0-1) nebo nasledujici bary roste momentum -#2.soustredit se na modely s vystupem 0-1 nebo -1 až 1 -#3.Vyzkouset jeden model, ktery by identifikoval trendy v obou smerech - -1 pro klesani a 1 pro stoupání. -#4.vyzkouset zda model vytvoreny z casti dne nebude funkcni na druhe casti (on the fly daily models) -#5.zkusit modely s a bez time (prizpusobit tomu kod v ModelML - zejmena jak na crossday sekvence) - mozna ze zecatku dat aspon pryc z indikatoru? -# Dat vsechny zbytecne features pryc, nechat tam jen ty podstatne - attention, tak cílím. -#6. zkusit vyuzit tickprice v nejaekm modelu, pripadne pak dalsi CBAR indikatory . vymslet tickbased features -#7. zkusit jako features nevyuzit standardni ceny, ale pouze indikatory reprezentujici chovani (fastslope,samebarslope,volume,tradencnt) -#8. relativni OHLC - model pouzivajici (jen) bary, ale misto hodnot ohlc udelat features reprezentujici vztahy(pomery) mezi temito velicinami. tzn. relativni ohlc -#9. jiny pristup by byl ucit model na konkretnich chunkach, ktere chci aby mi identifikoval. Např. určité úseky. Vymyslet. Buď nyni jako test intervaly, ale v budoucnu to treba jen nejak oznacit a poslat k nauceni. Pripadne pak udelat nejaky vycuc. -#10. mozna správným výběrem targetu, můžu taky naučit jen určité věci. Specializace. Stačí když se jednou dvakrát denně aktivuje. -# 11. udelat si go IN model, ktery pomuze strategii generovat vstup - staci jen aby mel trochu lepsi edge nez conditiony, o zbytek se postara logika strategie -# 12. model pro neagregované nebo jen filtroné či velmi lehce agregované trady? - tickprice -# 13. jako featury pouzit Fourierovo transformaci, na sekundovem baru nebo tickprice - -#DULEZITE -# soustredit se v modelech na predikci nasledujici hodnoty, ideálně nějaký vektor ukazující směr (např. 0 - 1, kde nula nebude růst, 1 - bude růst strmě) -# pro predikcí nějakého většího trendu, zkusti více modelů na různých rozlišení, každý ukazuje -# hodnotu na svém rozlišení a jeho kombinace mi může určit vstup. Zkusit zda by nešel i jeden model. -# Každopádně se soustředit -# 1) na další hodnotu (tzn. vstupy musí být bezprostředně ovlivňující tuto (samebasrlope, atp.)) -# 2) její výše ukazuje směr na tomto rozlišení -# 3) ideálně se učit z každého baru, tzn. cílová hodnota musí být známá u každého baru -# (binary ne, potřebuju linární vektor) - i když 1 a 0 target v závislosti na stoupání a klesání by mohla být ok, -# ale asi příliš restriktivní, spíš bych tam mohl dát jak moc. Tzn. +0.32, -0.04. Učilo by se to míru stoupání. -# Tu míru tam potřebuju zachovanou. -# pak si muzu rict, když je urcite pravdepodobnost, ze to bude stoupat (tzn. dalsi hodnota) na urovni 1,2,3 - tak jduvstup -# zkusit na nejnižší úrovni i předvídat CBARy, směr dalšího ticku. Vyzkoušet. - -##TODO - doma -#bar_features a ind_features do dokumentace SL classic, stejne tak conditional indikator a mathop indikator -#TODO - co je třeba vyvinout -# GENERATOR test intervalu (vstup name, note, od,do,step) -# napsat API, doma pak simple GUI -# vyuziti ATR (jako hranice historickeho rozsahu) - atr-up, atr-down -# nakreslit v grafu atru = close+atr, atrd = close-atr -# pripadne si vypocet atr nejak customizovat, prip. ruzne multiplikatory pro high low, pripadne si to vypocist podle sebe -# vyuziti: -# pro prekroceni nejake lajny, napr. ema nebo yesterdayclose -# - k identifikaci ze se pohybuje v jejim rozsahu -# - proste je to buffer, ktery musi byt prekonan, aby byla urcita akce -# pro learning pro vypocet conditional parametru (1,0,-1) prekroceni napr. dailyopen, yesterdayclose, gapclose -# kde 1 prekroceno, 0 v rozsahu (atr), -1 prekroceno dolu - to pomuze uceni -# vlastni supertrend strateige -# zaroven moznost vyuzit klouzave či parametrizovane atr, které se na základě -# určitých parametrů bude samo upravovat a cíleně vybočovat z KONTRA frekvencí, např. randomizovaný multiplier nebo nejak jinak ovlivneny minulým -# v indikatorech vsude kde je odkaz ma source jako hodnotu tak defaultne mit moznost uvest lookback, napr. bude treba porovnavat nejak cenu vs predposledni hodnotu ATRka (nechat az vyvstane pozadavek) -# zacit doma na ATRku si postavit supertrend, viz pinescript na ploše - - -#TODO - obecne vylepsovaky -# 1. v GUI graf container do n-TABů, mozna i draggable order, zaviratelne na Xko (innerContainer) -# 2. mit mozna specialni mod na pripravu dat (agreg+indikator, tzn. vse jen bez vstupů) - můžu pak zapracovat víc vectorové doplňování dat -# TOTO:: mozna by postacil vypnout backtester (tzn. no trades) - a projet jen indikatory. mozna by slo i vectorove optimalizovat. -# indikatory by se mohli predsunout pred next a next by se vubec nemusel volat (jen nekompatibilita s predch.strategiemi) -# 3. kombinace fastslope na fibonacci delkach (1,2,3,5..) jako dobry vstup pro ML -# 4. podivat se na attention based LSTM zda je v kerasu implementace -# do grafu přidat togglovatelné hranice barů určitých rozlišení - což mi jen udělá čáry Xs od sebe (dobré pro navrhování) -# 5. vymyslet optimalizovane vyuziti modelu na produkci (nejak mit zkompilovane, aby to bylo raketově pro skalár) - nyní to backtest zpomalí 4x -# 6. CONVNETS for time series forecasting - small 1D convnets can offer a fast alternative to RNNs for simple tasks such as text classification and timeseries forecasting. -# zkusit small conv1D pro identifikaci víření před trendem, např. jen 6 barů - identifikovat dobře target, musí jít o tutovku na targetu -# pro covnet zkusit cbar price, volume a time. Třeba to zachytí víření (ripples) -# Další oblasti k predikci jsou ripples, vlnky - předzvěst nějakého mocnějšího pohybu. A je pravda, že předtím se mohou objevit nějaké indicie. Ty zkus zachytit. -# Do runner_headers pridat bt_from, bt_to - pro razeni order_by, aby se runnery vzdy vraceli vzestupne dle data (pro machine l) - -#TODO -# vyvoj modelů workflow s LSTMtrain.py -# 1) POC - pouze zde ve skriptu, nad 1-2 runnery, okamžité zobrazení v plotu, -# optimalizace zakl. features a hyperparams. Zobrazit i u binary nejak cenu. -# 2) REALITY CHECK - trening modelu na batchi test intervalu, overeni ve strategii v BT, zobrazeni predikce v RT chartu -# 3) FINAL TRAINING -# testovani predikce - - -#TODO tady -# train model -# - train data- batch nebo runners -# - test data - batch or runners (s cim porovnavat/validovat) -# - vyber architektury -# - soucast skriptu muze byt i porovnavacka pripadne nejaky search optimalnich parametru - -#lstmtrain - podporit jednotlive kroky vyse -#modelML - udelat lepsi PODMINKY -#frontend? ma cenu? asi ano - GUI na model - new - train/retrain-change -# (vymyslet jak v gui chytře vybírat arch modelu a hyperparams, loss, optim - treba nejaka templata?) -# mozna ciselnik architektur s editačním polem pro kód -jen pár řádků(.add, .compile) přidat v editoru -# vymyslet jak to udělat pythonově -#testlist generator api - -# endregion - -#if null,the validation is made on 10% of train data -#runnery pro testovani -validation_runners = ["a38fc269-8df3-4374-9506-f0280d798854"] - -#u binary bude target bud hotovy indikator a nebo jej vytvorit on the fly -cfg = ModelML(name="model1", - version = "0.1", - note = None, - pred_output=PredOutput.LINEAR, - input_sequences = 10, - use_bars = True, - bar_features = ["volume","trades"], - ind_features = ["slope20", "ema20","emaFast","samebarslope","fastslope","fastslope4"], - target='target', #referencni hodnota pro target - napr pro graf - target_reference='vwap', - train_target_steps=3, - train_target_transformation=TargetTRFM.KEEPVAL, - train_runner_ids = ["08b7f96e-79bc-4849-9142-19d5b28775a8"], - train_batch_id = None, - train_epochs = 10, - train_remove_cross_sequences = True, - ) - -#TODO toto cele dat do TRAIN metody - vcetne pripadneho loopu a podpory API - -test_size = None - -#kdyz neplnime vstup, automaticky se loaduje training data z nastaveni classy -source_data, target_data, rows_in_day = cfg.load_data() - -if len(target_data) == 0: - raise Exception("target is empty - required for TRAINING - check target column name") - -np.set_printoptions(threshold=10,edgeitems=5) -#print("source_data", source_data) -#print("target_data", target_data) -print("rows_in_day", rows_in_day) -source_data = cfg.scalerX.fit_transform(source_data) - -#TODO mozna vyhodit to UNTR -#TODO asi vyhodit i target reference a vymyslet jinak - -#vytvořeni sekvenci po vstupních sadách (např. 10 barů) - výstup 3D např. #X_train (6205, 10, 14) -#doplneni transformace target data -X_train, y_train, y_train_ref = cfg.create_sequences(combined_data=source_data, - target_data=target_data, - remove_cross_sequences=cfg.train_remove_cross_sequences, - rows_in_day=rows_in_day) - -#zobrazime si transformovany target a jeho referncni sloupec -#ZHOMOGENIZOVAT OSY -plt.plot(y_train, label='Transf target') -plt.plot(y_train_ref, label='Ref target') -plt.plot() -plt.legend() -plt.show() - -print("After sequencing") -print("source:X_train", np.shape(X_train)) -print("target:y_train", np.shape(y_train)) -print("target:", y_train) -y_train = y_train.reshape(-1, 1) - -X_complete = np.array(X_train.copy()) -Y_complete = np.array(y_train.copy()) -X_train = np.array(X_train) -y_train = np.array(y_train) - -#target scaluji az po transformaci v create sequence -narozdil od X je stejny shape -y_train = cfg.scalerY.fit_transform(y_train) - - -if len(validation_runners) == 0: - test_size = 0.10 -# Split the data into training and test sets - kazdy vstupni pole rozdeli na dve -#nechame si takhle rozdelit i referencni sloupec -X_train, X_test, y_train, y_test, y_train_ref, y_test_ref = train_test_split(X_train, y_train, y_train_ref, test_size=test_size, shuffle=False) #random_state=42) - -print("Splittig the data") - -print("X_train", np.shape(X_train)) -print("X_test", np.shape(X_test)) -print("y_train", np.shape(y_train)) -print("y_test", np.shape(y_test)) -print("y_test_ref", np.shape(y_test_ref)) -print("y_train_ref", np.shape(y_train_ref)) - -#print(np.shape(X_train)) -# Define the input shape of the LSTM layer dynamically based on the reshaped X_train value -input_shape = (X_train.shape[1], X_train.shape[2]) - -# Build the LSTM model -#cfg.model = Sequential() -cfg.model.add(LSTM(128, input_shape=input_shape)) -cfg.model.add(Dense(1, activation="relu")) -#activation: Gelu, relu, elu, sigmoid... -# Compile the model -cfg.model.compile(loss='mse', optimizer='adam') -#loss: mse, binary_crossentropy - -# Train the model -cfg.model.fit(X_train, y_train, epochs=cfg.train_epochs) - -#save the model -cfg.save() - -#TBD db layer -cfg: ModelML = mu.load_model(cfg.name, cfg.version) - -# region Live predict -#EVALUATE SIM LIVE - PREDICT SCALAR - based on last X items -barslist, indicatorslist = cfg.load_runners_as_list(runner_id_list=["67b51211-d353-44d7-a58a-5ae298436da7"]) -#zmergujeme vsechny data dohromady -bars = mu.merge_dicts(barslist) -indicators = mu.merge_dicts(indicatorslist) -cfg.validate_available_features(bars, indicators) -#VSTUPEM JE standardni pole v strategii -value = cfg.predict(bars, indicators) -print("prediction for LIVE SIM:", value) -# endregion - -#EVALUATE TEST DATA - VECTOR BASED -#pokud mame eval runners pouzijeme ty, jinak bereme cast z testovacich dat -if len(validation_runners) > 0: - source_data, target_data, rows_in_day = cfg.load_data(runners_ids=validation_runners) - source_data = cfg.scalerX.fit_transform(source_data) - X_test, y_test, y_test_ref = cfg.create_sequences(combined_data=source_data, target_data=target_data,remove_cross_sequences=True, rows_in_day=rows_in_day) - -#prepnout ZDE pokud testovat cely bundle - jinak testujeme jen neznama -#X_test = X_complete -#y_test = Y_complete - -X_test = cfg.model.predict(X_test) -X_test = cfg.scalerY.inverse_transform(X_test) - -#target testovacim dat proc tu je reshape? y_test.reshape(-1, 1) -y_test = cfg.scalerY.inverse_transform(y_test) -#celkovy mean? nebo spis vector pro graf? -mse = mean_squared_error(y_test, X_test) -print('Test MSE:', mse) - -# Plot the predicted vs. actual -plt.plot(y_test, label='Actual') -plt.plot(X_test, label='Predicted') -#TODO zde nejak vymyslet jinou pricelinu - jako lightweight chart -plt.plot(y_test_ref, label='reference column - price') -plt.plot() -plt.legend() -plt.show() diff --git a/v2realbot/config.py b/v2realbot/config.py index 1905ee0..4a7a668 100644 --- a/v2realbot/config.py +++ b/v2realbot/config.py @@ -52,6 +52,7 @@ COUNT_API_REQUESTS = False #stratvars that cannot be changed in gui STRATVARS_UNCHANGEABLES = ['pendingbuys', 'blockbuy', 'jevylozeno', 'limitka'] DATA_DIR = user_data_dir("v2realbot") +MODEL_DIR = Path(DATA_DIR)/"models" #BT DELAYS #profiling PROFILING_NEXT_ENABLED = False diff --git a/v2realbot/controller/services.py b/v2realbot/controller/services.py index 76dfa9a..4c2d5eb 100644 --- a/v2realbot/controller/services.py +++ b/v2realbot/controller/services.py @@ -519,7 +519,8 @@ def batch_run_manager(id: UUID, runReq: RunRequest, rundays: list[RunDay]): print("Datum do", day.end) runReq.bt_from = day.start runReq.bt_to = day.end - runReq.note = f"{first_frm}-{last_frm} Batch {batch_id} #{cnt}/{cnt_max} {weekdayfilter_string} {day.name} N:{day.note} {note_from_run_request}" + #pozor z tohoto parsuje GUI Na batchheader + runReq.note = f"{first_frm}-{last_frm} Batch {batch_id} #{cnt}/{cnt_max} {weekdayfilter_string} {day.name} {day.note if day.note is not None else ''} N: {note_from_run_request}" #protoze jsme v ridicim vlaknu, poustime za sebou jednotlive stratiny v synchronnim modu res, id_val = run_stratin(id=id, runReq=runReq, synchronous=True, inter_batch_params=inter_batch_params) diff --git a/v2realbot/enums/enums.py b/v2realbot/enums/enums.py index d0f4095..9130c13 100644 --- a/v2realbot/enums/enums.py +++ b/v2realbot/enums/enums.py @@ -60,6 +60,7 @@ class RecordType(str, Enum): BAR = "bar" CBAR = "cbar" CBARVOLUME = "cbarvolume" + CBARDOLLAR = "cbardollar" CBARRENKO = "cbarrenko" TRADE = "trade" diff --git a/v2realbot/loader/aggregator.py b/v2realbot/loader/aggregator.py index 5d0abd6..216bd4c 100644 --- a/v2realbot/loader/aggregator.py +++ b/v2realbot/loader/aggregator.py @@ -178,14 +178,30 @@ class TradeAggregator: # return # else: pass - if self.rectype in (RecordType.BAR, RecordType.CBAR): - return await self.calculate_time_bar(data, symbol) + # if self.rectype in (RecordType.BAR, RecordType.CBAR): + # return await self.calculate_time_bar(data, symbol) - if self.rectype == RecordType.CBARVOLUME: - return await self.calculate_volume_bar(data, symbol) + # if self.rectype == RecordType.CBARVOLUME: + # return await self.calculate_volume_bar(data, symbol) - if self.rectype == RecordType.CBARRENKO: - return await self.calculate_renko_bar(data, symbol) + # if self.rectype == RecordType.CBARVOLUME: + # return await self.calculate_volume_bar(data, symbol) + + # if self.rectype == RecordType.CBARRENKO: + # return await self.calculate_renko_bar(data, symbol) + + match self.rectype: + case RecordType.BAR | RecordType.CBAR: + return await self.calculate_time_bar(data, symbol) + + case RecordType.CBARVOLUME: + return await self.calculate_volume_bar(data, symbol) + + case RecordType.CBARDOLLAR: + return await self.calculate_dollar_bar(data, symbol) + + case RecordType.CBARRENKO: + return await self.calculate_renko_bar(data, symbol) async def calculate_time_bar(self, data, symbol): #print("barstart",datetime.fromtimestamp(self.bar_start)) @@ -551,6 +567,179 @@ class TradeAggregator: else: return [] + #WIP - revidovant kod a otestovat + async def calculate_dollar_bar(self, data, symbol): + """" + Agreguje DOLLAR BARS - + hlavni promenne + - self.openedBar (dict) = stavová obsahují aktivní nepotvrzený bar + - confirmedBars (list) = nestavová obsahuje confirmnute bary, které budou na konci funkceflushnuty + """"" + #volume_bucket = 10000 #daily MA volume z emackova na 30 deleno 50ti - dat do configu + dollar_bucket = self.resolution + #potvrzene pripravene k vraceni + confirmedBars = [] + #potvrdi existujici a nastavi k vraceni + def confirm_existing(): + self.openedBar['confirmed'] = 1 + self.openedBar['vwap'] = self.vwaphelper / self.openedBar['volume'] + self.vwaphelper = 0 + + #ulozime zacatek potvrzeneho baru + #self.lastBarConfirmed = self.openedBar['time'] + + self.openedBar['updated'] = data['t'] + confirmedBars.append(deepcopy(self.openedBar)) + self.openedBar = None + #TBD po každém potvrzení zvýšíme čas o nanosekundu (pro zobrazení v gui) + #data['t'] = data['t'] + 0.000001 + + #init unconfirmed - velikost bucketu kontrolovana predtim + def initialize_unconfirmed(size): + #inicializuji pro nový bar + self.vwaphelper += (data['p'] * size) + self.barindex +=1 + self.openedBar = { + "close": data['p'], + "high": data['p'], + "low": data['p'], + "open": data['p'], + "volume": size, + "trades": 1, + "hlcc4": data['p'], + "confirmed": 0, + "time": datetime.fromtimestamp(data['t']), + "updated": data['t'], + "vwap": data['p'], + "index": self.barindex, + "resolution":dollar_bucket + } + + def update_unconfirmed(size): + #spočteme vwap - potřebujeme předchozí hodnoty + self.vwaphelper += (data['p'] * size) + self.openedBar['updated'] = data['t'] + self.openedBar['close'] = data['p'] + self.openedBar['high'] = max(self.openedBar['high'],data['p']) + self.openedBar['low'] = min(self.openedBar['low'],data['p']) + self.openedBar['volume'] = self.openedBar['volume'] + size + self.openedBar['trades'] = self.openedBar['trades'] + 1 + self.openedBar['vwap'] = self.vwaphelper / self.openedBar['volume'] + #pohrat si s timto round + self.openedBar['hlcc4'] = round((self.openedBar['high']+self.openedBar['low']+self.openedBar['close']+self.openedBar['close'])/4,3) + + #init new - confirmed + def initialize_confirmed(size): + #ulozime zacatek potvrzeneho baru + #self.lastBarConfirmed = datetime.fromtimestamp(data['t']) + self.barindex +=1 + confirmedBars.append({ + "close": data['p'], + "high": data['p'], + "low": data['p'], + "open": data['p'], + "volume": size, + "trades": 1, + "hlcc4":data['p'], + "confirmed": 1, + "time": datetime.fromtimestamp(data['t']), + "updated": data['t'], + "vwap": data['p'], + "index": self.barindex, + "resolution": dollar_bucket + }) + + #current trade dollar value + trade_dollar_val = int(data['s'])*float(data['p']) + + #existuje stávající bar a vejdeme se do nej + if self.openedBar is not None and trade_dollar_val + self.openedBar['volume']*self.openedBar['close'] < dollar_bucket: + #vejdeme se do stávajícího baru (tzn. neprekracujeme bucket) + update_unconfirmed(int(data['s'])) + #updatujeme stávající nepotvrzeny bar + #nevejdem se do nej nebo neexistuje predchozi bar + else: + #1)existuje predchozi bar - doplnime zbytkem do valikosti bucketu a nastavime confirmed + if self.openedBar is not None: + + #doplnime je zbytkem (v bucket left-je zbyvajici volume) + opened_bar_dollar_val = self.openedBar['volume']*self.openedBar['close'] + bucket_left = int((dollar_bucket - opened_bar_dollar_val)/float(data['p'])) + # - update and confirm bar + update_unconfirmed(bucket_left) + confirm_existing() + + #zbytek mnozství jde do dalsiho zpracovani + data['s'] = int(data['s']) - bucket_left + #nastavime cas o nanosekundu vyssi + data['t'] = round((data['t']) + 0.000001,6) + + #2 vytvarime novy bar (bary) a vejdeme se do nej + if int(data['s'])*float(data['p']) < dollar_bucket: + #vytvarime novy nepotvrzeny bar + initialize_unconfirmed(int(data['s'])) + #nevejdeme se do nej - pak vytvarime 1 až N dalsich baru (posledni nepotvrzený) + else: + # >>> for i in range(0, 550, 500): + # ... print(i) + # ... + # 0 + # 500 + + #vytvarime plne potvrzene buckety (kolik se jich plne vejde) + for size in range(int(dollar_bucket/float(data['p'])), int(data['s']), int(dollar_bucket/float(data['p']))): + initialize_confirmed(dollar_bucket/float(data['p'])) + #nastavime cas o nanosekundu vyssi + data['t'] = round((data['t']) + 0.000001,6) + #create complete full bucket with same prices and size + #naplnit do return pole + + #pokud je zbytek vytvorime z nej nepotvrzeny bar + zbytek = int(data['s'])*float(data['p']) % dollar_bucket + + #ze zbytku vytvorime nepotvrzeny bar + if zbytek > 0: + #prevedeme zpatky na volume + zbytek = int(zbytek/float(data['p'])) + initialize_unconfirmed(zbytek) + #create new open bar with size zbytek s otevrenym + + #je cena stejna od predchoziho tradu? pro nepotvrzeny cbar vracime jen pri zmene ceny + if self.last_price == data['p']: + self.diff_price = False + else: + self.diff_price = True + self.last_price = data['p'] + + if float(data['t']) - float(self.lasttimestamp) < GROUP_TRADES_WITH_TIMESTAMP_LESS_THAN: + self.trades_too_close = True + else: + self.trades_too_close = False + + #uložíme do předchozí hodnoty (poznáme tak open a close) + self.lasttimestamp = data['t'] + self.iterace += 1 + # print(self.iterace, data) + + #pokud mame confirm bary, tak FLUSHNEME confirm a i případný open (zrejme se pak nejaky vytvoril) + if len(confirmedBars) > 0: + return_set = confirmedBars + ([self.openedBar] if self.openedBar is not None else []) + confirmedBars = [] + return return_set + + #nemame confirm, FLUSHUJEME CBARVOLUME open - neresime zmenu ceny, ale neposilame kulomet (pokud nam nevytvari conf. bar) + if self.openedBar is not None and self.rectype == RecordType.CBARDOLLAR: + + #zkousime pustit i stejnou cenu(potrebujeme kvuli MYSELLU), ale blokoval kulomet,tzn. trady mensi nez GROUP_TRADES_WITH_TIMESTAMP_LESS_THAN (1ms) + #if self.diff_price is True: + if self.trades_too_close is False: + return [self.openedBar] + else: + return [] + else: + return [] + + async def calculate_renko_bar(self, data, symbol): """" Agreguje RENKO BARS - dle brick size diff --git a/v2realbot/main.py b/v2realbot/main.py index 8f79e37..dd8f0ba 100644 --- a/v2realbot/main.py +++ b/v2realbot/main.py @@ -1,11 +1,11 @@ import os,sys sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from v2realbot.config import WEB_API_KEY, DATA_DIR, MEDIA_DIRECTORY, LOG_FILE +from v2realbot.config import WEB_API_KEY, DATA_DIR, MEDIA_DIRECTORY, LOG_FILE, MODEL_DIR from alpaca.data.timeframe import TimeFrame, TimeFrameUnit from datetime import datetime import os from rich import print -from fastapi import FastAPI, Depends, HTTPException, status +from fastapi import FastAPI, Depends, HTTPException, status, File, UploadFile from fastapi.security import APIKeyHeader import uvicorn from uuid import UUID @@ -455,6 +455,16 @@ def _delete_archived_runners_byIDs(runner_ids: list[UUID]): elif res < 0: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Error: {res}:{id}") +#get runners list based on batch_id +@app.get("/archived_runners/batch/{batch_id}", dependencies=[Depends(api_key_auth)]) +def _get_archived_runnerslist_byBatchID(batch_id: str) -> list[UUID]: + res, set =cs.get_archived_runnerslist_byBatchID(batch_id) + if res == 0: + return set + else: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"No data found") + + #delete archive runner from header and detail @app.delete("/archived_runners/batch/{batch_id}", dependencies=[Depends(api_key_auth)], status_code=status.HTTP_200_OK) def _delete_archived_runners_byBatchID(batch_id: str): @@ -763,6 +773,44 @@ def delete_item(item_id: int) -> dict: # endregion +#model section +#UPLOAD MODEL +@app.post("/model/upload_model", dependencies=[Depends(api_key_auth)]) +async def upload_model(file: UploadFile = File(...)): + # Specify the directory to save the file + #save_directory = DATA_DIR+'/models/' + save_directory = MODEL_DIR + + os.makedirs(save_directory, exist_ok=True) + + # Extract just the filename, discarding any path information + base_filename = os.path.basename(file.filename) + file_path = os.path.join(save_directory, base_filename) + + # Save the uploaded file + with open(file_path, "wb") as buffer: + while True: + data = await file.read(1024) # Read in chunks + if not data: + break + buffer.write(data) + + print(f"saved to {file_path=} file:{base_filename=}") + + return {"filename": base_filename, "location": file_path} + +#LIST MODELS +@app.get("/model/list-models", dependencies=[Depends(api_key_auth)]) +def list_models(): + #models_directory = DATA_DIR + '/models/' + models_directory = MODEL_DIR + # Ensure the directory exists + if not os.path.exists(models_directory): + return {"error": "Models directory does not exist."} + + # List all files in the directory + model_files = os.listdir(models_directory) + return {"models": model_files} # Thread function to insert data from the queue into the database def insert_queue2db(): diff --git a/v2realbot/ml/ml.py b/v2realbot/ml/ml.py deleted file mode 100644 index 39cfa93..0000000 --- a/v2realbot/ml/ml.py +++ /dev/null @@ -1,389 +0,0 @@ -# from sklearn.preprocessing import StandardScaler -# # from keras.models import Sequential -# from v2realbot.enums.enums import PredOutput, Source, TargetTRFM -# from v2realbot.config import DATA_DIR -# from joblib import dump -# # import v2realbot.ml.mlutils as mu -# from v2realbot.utils.utils import slice_dict_lists -# import numpy as np -# from copy import deepcopy -# import v2realbot.controller.services as cs -# #Basic classes for machine learning -# #drzi model a jeho zakladni nastaveni - -# #Sample Data -# sample_bars = { -# 'time': [1, 2, 3, 4, 5,6,7,8,9,10,11,12,13,14,15], -# 'high': [10, 11, 12, 13, 14,10, 11, 12, 13, 14,10, 11, 12, 13, 14], -# 'low': [8, 9, 7, 6, 8,8, 9, 7, 6, 8,8, 9, 7, 6, 8], -# 'volume': [1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300], -# 'close': [9, 10, 11, 12, 13,9, 10, 11, 12, 13,9, 10, 11, 12, 13], -# 'open': [9, 10, 8, 8, 8,9, 10, 8, 8, 8,9, 10, 8, 8, 8], -# 'resolution': [1, 1, 1, 1, 1,1, 1, 1, 1, 1,1, 1, 1, 1, 1] -# } - -# sample_indicators = { -# 'time': [1, 2, 3, 4, 5,6,7,8,9,10,11,12,13,14,15], -# 'fastslope': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115], -# 'fsdelta': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115], -# 'fastslope2': [90, 95, 100, 110, 115,90, 95, 100, 110, 115,90, 95, 100, 110, 115], -# 'ema': [1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300,1000, 1200, 900, 1100, 1300] -# } - -# #Trida, která drzi instanci ML modelu a jeho konfigurace -# #take se pouziva jako nastroj na pripravu dat pro train a predikci -# #pozor samotna data trida neobsahuje, jen konfiguraci a pak samotny model -# class ModelML: -# def __init__(self, name: str, -# pred_output: PredOutput, -# bar_features: list, -# ind_features: list, -# input_sequences: int, -# target: str, -# target_reference: str, -# train_target_steps: int, #train -# train_target_transformation: TargetTRFM, #train -# train_epochs: int, #train -# train_runner_ids: list = None, #train -# train_batch_id: str = None, #train -# version: str = "1", -# note : str = None, -# use_bars: bool = True, -# train_remove_cross_sequences: bool = False, #train -# #standardne StandardScaler -# scalerX: StandardScaler = StandardScaler(), -# scalerY: StandardScaler = StandardScaler(), -# model, #Sequential = Sequential() -# )-> None: - -# self.name = name -# self.version = version -# self.note = note -# self.pred_output: PredOutput = pred_output -# #model muze byt take bez barů, tzn. jen indikatory -# self.use_bars = use_bars -# #zajistime poradi -# bar_features.sort() -# ind_features.sort() -# self.bar_features = bar_features -# self.ind_features = ind_features -# if (train_runner_ids is None or len(train_runner_ids) == 0) and train_batch_id is None: -# raise Exception("train_runner_ids nebo train_batch_id musi byt vyplnene") -# self.train_runner_ids = train_runner_ids -# self.train_batch_id = train_batch_id -# #target cílový sloupec, který je používám přímo nebo transformován na binary -# self.target = target -# self.target_reference = target_reference -# self.train_target_steps = train_target_steps -# self.train_target_transformation = train_target_transformation -# self.input_sequences = input_sequences -# self.train_epochs = train_epochs -# #keep cross sequences between runners -# self.train_remove_cross_sequences = train_remove_cross_sequences -# self.scalerX = scalerX -# self.scalerY = scalerY -# self.model = model - -# def save(self): -# filename = mu.get_full_filename(self.name,self.version) -# dump(self, filename) -# print(f"model {self.name} save") - -# #create X data with features -# def column_stack_source(self, bars, indicators, verbose = 1) -> np.array: -# #create SOURCE DATA with features -# # bars and indicators dictionary and features as input -# poradi_sloupcu_inds = [feature for feature in self.ind_features if feature in indicators] -# indicator_data = np.column_stack([indicators[feature] for feature in self.ind_features if feature in indicators]) - -# if len(bars)>0: -# bar_data = np.column_stack([bars[feature] for feature in self.bar_features if feature in bars]) -# poradi_sloupcu_bars = [feature for feature in self.bar_features if feature in bars] -# if verbose == 1: -# print("poradi sloupce v source_data", str(poradi_sloupcu_bars + poradi_sloupcu_inds)) -# combined_day_data = np.column_stack([bar_data,indicator_data]) -# else: -# combined_day_data = indicator_data -# if verbose == 1: -# print("poradi sloupce v source_data", str(poradi_sloupcu_inds)) -# return combined_day_data - -# #create TARGET(Y) data -# def column_stack_target(self, bars, indicators) -> np.array: -# target_base = [] -# target_reference = [] -# try: -# try: -# target_base = bars[self.target] -# except KeyError: -# target_base = indicators[self.target] -# try: -# target_reference = bars[self.target_reference] -# except KeyError: -# target_reference = indicators[self.target_reference] -# except KeyError: -# pass -# target_day_data = np.column_stack([target_base, target_reference]) -# return target_day_data - -# def load_runners_as_list(self, runner_id_list = None, batch_id = None): -# """Loads all runners data (bars, indicators) for given runners into list of dicts. - -# List of runners/train_batch_id may be provided, or self.train_runner_ids/train_batch_id is taken instead. - -# Returns: -# tuple (barslist, indicatorslist,) - lists with dictionaries for each runner -# """ -# if runner_id_list is not None: -# runner_ids = runner_id_list -# print("loading runners for ",str(runner_id_list)) -# elif batch_id is not None: -# print("Loading runners for train_batch_id:", batch_id) -# res, runner_ids = cs.get_archived_runnerslist_byBatchID(batch_id) -# elif self.train_batch_id is not None: -# print("Loading runners for TRAINING BATCH self.train_batch_id:", self.train_batch_id) -# res, runner_ids = cs.get_archived_runnerslist_byBatchID(self.train_batch_id) -# #pripadne bereme z listu runneru -# else: -# runner_ids = self.train_runner_ids -# print("loading runners for TRAINING runners ",str(self.train_runner_ids)) - - -# barslist = [] -# indicatorslist = [] -# ind_keys = None -# for runner_id in runner_ids: -# bars, indicators = mu.load_runner(runner_id) -# print(f"runner:{runner_id}") -# if self.use_bars: -# barslist.append(bars) -# print(f"bars keys {len(bars)} lng {len(bars[self.bar_features[0]])}") -# indicatorslist.append(indicators) -# print(f"indi keys {len(indicators)} lng {len(indicators[self.ind_features[0]])}") -# if ind_keys is not None and ind_keys != len(indicators): -# raise Exception("V runnerech musi byt stejny pocet indikatoru") -# else: -# ind_keys = len(indicators) - -# return barslist, indicatorslist - -# #toto nejspis rozdelit na TRAIN mod (kdy ma smysl si brat nataveni napr. remove cross) -# def create_sequences(self, combined_data, target_data = None, remove_cross_sequences: bool = False, rows_in_day = None): -# """Creates sequences of given length seq and optionally target N steps in the future. - -# Returns X(source) a Y(transformed target) - vrací take Y_untransformed - napr. referencni target column pro zobrazeni v grafu (napr. cenu) - -# Volby pro transformaci targetu: -# - KEEPVAL (keep value as is) -# - KEEPVAL_MOVE(keep value, move target N steps in the future) - -# další na zámysl (nejspíš ale data budu připravovat ve stratu a využívat jen KEEPy nahoře) -# - BINARY_prefix - sloupec založený na podmínce, výsledek je 0,1 -# - BINARY_TREND RISING - podmínka založena, že v target columnu stoupají/klesají po target N steps -# (podvarianty BINARY TREND RISING(0-1), FALLING(0-1), BOTH(-1 - )) -# - BINARY_READY - předpřipravený sloupec(vytvořený ve strategii jako indikator), stačí jen posunout o target step -# - BINARY_READY_POSUNUTY - předpřipraveny sloupec (již posunutýo o target M) - stačí brát as is - -# Args: -# combined_data: A list of combined data. -# target_data: A list of target data (0-target,1-target ref.column) -# remove_cross_sequences: If to remove crossday sequences -# rows_in_day: helper dict to remove crossday sequences -# return_untr: whether to return untransformed reference column - -# Returns: -# A list of X sequences and a list of y sequences. -# """ - -# if remove_cross_sequences is True and rows_in_day is None: -# raise Exception("To remove crossday sequences, rows_in_day param required.") - -# if target_data is not None and len(target_data) > 0: -# target_data_untr = target_data[:,1] -# target_data = target_data[:,0] -# else: -# target_data_untr = [] -# target_data = [] - -# X_train = [] -# y_train = [] -# y_untr = [] -# #comb data shape (4073, 13) -# #target shape (4073, 1) -# print("Start Sequencing") -# #range sekvence podle toho jestli je pozadovan MOVE nebo NE -# if self.train_target_transformation == TargetTRFM.KEEPVAL_MOVE: -# right_offset = self.input_sequences + self.train_target_steps -# else: -# right_offset= self.input_sequences -# for i in range(len(combined_data) - right_offset): - -# #take neresime cross sekvence kdyz neni vyplneni target nebo neni vyplnena rowsinaday -# if remove_cross_sequences is True and not self.is_same_day(i,i + right_offset, rows_in_day): -# print(f"sekvence vyrazena. NEW Zacatek {combined_data[i, 0]} konec {combined_data[i + right_offset, 0]}") -# continue - -# #pridame sekvenci -# X_train.append(combined_data[i:i + self.input_sequences]) - -# #target hodnotu bude ponecha (na radku mame jiz cilovy target) -# #nebo vezme hodnotu z N(train_target_steps) baru vpredu a da jako target k radku -# #je rizeno nastavenim right_offset vyse -# if target_data is not None and len(target_data) > 0: -# y_train.append(target_data[i + right_offset]) - -# #udela binary transformaci targetu -# # elif self.target_transformation == TargetTRFM.BINARY_TREND_UP: -# # #mini loop od 0 do počtu target steps - zda jsou successively rising -# # #radeji budu resit vizualne conditional indikatorem pri priprave dat -# # rising = False -# # for step in range(0,self.train_target_steps): -# # if target_data[i + self.input_sequences + step] < target_data[i + self.input_sequences + step + 1]: -# # rising = True -# # else: -# # rising = False -# # break -# # y_train.append([1] if rising else [0]) -# # #tato zakomentovana varianta porovnava jen cenu ted a cenu na target baru -# # #y_train.append([1] if target_data[i + self.input_sequences] < target_data[i + self.input_sequences + self.train_target_steps] else [0]) -# if target_data is not None and len(target_data) > 0: -# y_untr.append(target_data_untr[i + self.input_sequences]) -# return np.array(X_train), np.array(y_train), np.array(y_untr) - -# def is_same_day(self, idx_start, idx_end, rows_in_day): -# """Helper for sequencing enables to recognize if the start/end index are from the same day. - -# Used for sequences to remove cross runner(day) sequences. - -# Args: -# idx_start: Start index -# idx_end: End index -# rows_in_day: 1D array containing number of rows(bars,inds) for each day. -# Cumsumed defines edges where each day ends. [10,30,60] - -# Returns: -# A boolean - -# refactor to vectors if possible -# i_b, i_e -# podm_pole = i_b= pole -# [10,30,60] -# """ -# for i in rows_in_day: -# #jde o polozku na pomezi - vyhazujeme -# if idx_start < i and idx_end >= i: -# return False -# if idx_start < i and idx_end < i: -# return True -# return None - -# #vytvori X a Y data z nastaveni self -# #pro vybrane runnery stahne data, vybere sloupce dle faature a target -# #a vrátí jako sloupce v numpy poli -# #zaroven vraci i rows_in_day pro nasledny sekvencing -# def load_data(self, runners_ids: list = None, batch_id: list = None, source: Source = Source.RUNNERS): -# """Service to load data for the model. Can be used for training or for vector prediction. - -# If input data are not provided, it will get the value from training model configuration (train_runners_ids, train_batch_id) - -# Args: -# runner_ids: -# batch_id: -# source: To load sample data. - -# Returns: -# source_data,target_data,rows_in_day -# """ -# rows_in_day = [] -# indicatorslist = [] -# #bud natahneme samply -# if source == Source.SAMPLES: -# if self.use_bars: -# bars = sample_bars -# else: -# bars = {} -# indicators = sample_indicators -# indicatorslist.append(indicators) -# #nebo dotahneme pozadovane runnery -# else: -# #nalodujeme vsechny runnery jako listy (bud z runnerids nebo dle batchid) -# barslist, indicatorslist = self.load_runners_as_list(runner_id_list=runners_ids, batch_id=batch_id) -# #nerozumim -# bl = deepcopy(barslist) -# il = deepcopy(indicatorslist) -# #a zmergujeme jejich data dohromady -# bars = mu.merge_dicts(bl) -# indicators = mu.merge_dicts(il) - -# #zaroven vytvarime pomocny list, kde stale drzime pocet radku per day (pro nasledny sekvencing) -# #zatim nad indikatory - v budoucnu zvazit, kdyby jelo neco jen nad barama -# for i, val in enumerate(indicatorslist): -# #pro prvni klic z indikatoru pocteme cnt -# pocet = len(indicatorslist[i][self.ind_features[0]]) -# print("pro runner vkladame pocet", pocet) -# rows_in_day.append(pocet) - -# rows_in_day = np.array(rows_in_day) -# rows_in_day = np.cumsum(rows_in_day) -# print("celkove pole rows_in_day(cumsum):", rows_in_day) - -# print("Data LOADED.") -# print(f"number of indicators {len(indicators)}") -# print(f"number of bar elements{len(bars)}") -# print(f"ind list length {len(indicators['time'])}") -# print(f"bar list length {len(bars['time'])}") - -# self.validate_available_features(bars, indicators) - -# print("Preparing FEATURES") -# source_data, target_data = self.stack_bars_indicators(bars, indicators) -# return source_data, target_data, rows_in_day - -# def validate_available_features(self, bars, indicators): -# for k in self.bar_features: -# if not k in bars.keys(): -# raise Exception(f"Missing bar feature {k}") - -# for k in self.ind_features: -# if not k in indicators.keys(): -# raise Exception(f"Missing ind feature {k}") - -# def stack_bars_indicators(self, bars, indicators): -# print("Stacking dicts to numpy") -# print("Source - X") -# source_data = self.column_stack_source(bars, indicators) -# print("shape", np.shape(source_data)) -# print("Target - Y", self.target) -# target_data = self.column_stack_target(bars, indicators) -# print("shape", np.shape(target_data)) - -# return source_data, target_data - -# #pomocna sluzba, ktera provede vsechny transformace a inverzni scaling a vyleze z nej predikce -# #vstupem je standardni format ve strategii (state.bars, state.indicators) -# #vystupem je jedna hodnota -# def predict(self, bars, indicators) -> float: -# #oriznuti podle seqence - pokud je nastaveno v modelu -# lastNbars = slice_dict_lists(bars, self.input_sequences) -# lastNindicators = slice_dict_lists(indicators, self.input_sequences) -# # print("last5bars", lastNbars) -# # print("last5indicators",lastNindicators) - -# combined_live_data = self.column_stack_source(lastNbars, lastNindicators, verbose=0) -# #print("combined_live_data",combined_live_data) -# combined_live_data = self.scalerX.transform(combined_live_data) -# combined_live_data = np.array(combined_live_data) -# #print("last 5 values combined data shape", np.shape(combined_live_data)) - -# #converts to 3D array -# # 1 number of samples in the array. -# # 2 represents the sequence length. -# # 3 represents the number of features in the data. -# combined_live_data = combined_live_data.reshape((1, self.input_sequences, combined_live_data.shape[1])) - -# # Make a prediction -# prediction = self.model(combined_live_data, training=False) -# #prediction = prediction.reshape((1, 1)) -# # Convert the prediction back to the original scale -# prediction = self.scalerY.inverse_transform(prediction) -# return float(prediction) diff --git a/v2realbot/ml/mlutils.py b/v2realbot/ml/mlutils.py deleted file mode 100644 index 5207e69..0000000 --- a/v2realbot/ml/mlutils.py +++ /dev/null @@ -1,55 +0,0 @@ -import numpy as np -# import v2realbot.controller.services as cs -from joblib import load -from v2realbot.config import DATA_DIR - -def get_full_filename(name, version = "1"): - return DATA_DIR+'/models/'+name+'_v'+version+'.pkl' - -def load_model(name, version = "1"): - filename = get_full_filename(name, version) - return load(filename) - -#pomocne funkce na manipulaci s daty - -def merge_dicts(dict_list): - # Initialize an empty merged dictionary - merged_dict = {} - - # Iterate through the dictionaries in the list - for i,d in enumerate(dict_list): - for key, value in d.items(): - if key in merged_dict: - merged_dict[key] += value - else: - merged_dict[key] = value - #vlozime element s idenitfikaci runnera - - return merged_dict - - # # Initialize the merged dictionary with the first dictionary in the list - # merged_dict = dict_list[0].copy() - # merged_dict["index"] = [] - - # # Iterate through the remaining dictionaries and concatenate their lists - # for i, d in enumerate(dict_list[1:]): - # merged_dict["index"] = - # for key, value in d.items(): - # if key in merged_dict: - # merged_dict[key] += value - # else: - # merged_dict[key] = value - - # return merged_dict - -def load_runner(runner_id): - res, sada = cs.get_archived_runner_details_byID(runner_id) - if res == 0: - print("ok") - else: - print("error",res,sada) - raise Exception(f"error loading runner {runner_id} : {res} {sada}") - - bars = sada["bars"] - indicators = sada["indicators"][0] - return bars, indicators diff --git a/v2realbot/reporting/analyzer/WIP_daily_profit_distribution.py b/v2realbot/reporting/analyzer/WIP_daily_profit_distribution.py new file mode 100644 index 0000000..4794a93 --- /dev/null +++ b/v2realbot/reporting/analyzer/WIP_daily_profit_distribution.py @@ -0,0 +1,104 @@ +import matplotlib +import matplotlib.dates as mdates +matplotlib.use('Agg') # Set the Matplotlib backend to 'Agg' +import matplotlib.pyplot as plt +from matplotlib.ticker import MaxNLocator +import seaborn as sns +import pandas as pd +from datetime import datetime +from typing import List +from enum import Enum +import numpy as np +import v2realbot.controller.services as cs +from rich import print +from v2realbot.common.model import AnalyzerInputs +from v2realbot.common.PrescribedTradeModel import TradeDirection, TradeStatus, Trade, TradeStoplossType +from v2realbot.utils.utils import isrising, isfalling,zoneNY, price2dec, safe_get#, print +from pathlib import Path +from v2realbot.config import WEB_API_KEY, DATA_DIR, MEDIA_DIRECTORY +from v2realbot.enums.enums import RecordType, StartBarAlign, Mode, Account, OrderSide +from io import BytesIO +from v2realbot.utils.historicals import get_historical_bars +from alpaca.data.timeframe import TimeFrame, TimeFrameUnit +from collections import defaultdict +from scipy.stats import zscore +from io import BytesIO +from v2realbot.reporting.load_trades import load_trades +from typing import Tuple, Optional, List +from traceback import format_exc +import pandas as pd + +def daily_profit_distribution(runner_ids: list = None, batch_id: str = None, stream: bool = False): + try: + res, trades, days_cnt = load_trades(runner_ids, batch_id) + if res != 0: + raise Exception("Error in loading trades") + + #print(trades) + + # Convert list of Trade objects to DataFrame + trades_df = pd.DataFrame([t.__dict__ for t in trades if t.status == "closed"]) + + # Ensure 'exit_time' is a datetime object and make it timezone-naive if necessary + trades_df['exit_time'] = pd.to_datetime(trades_df['exit_time']).dt.tz_convert(zoneNY) + trades_df['date'] = trades_df['exit_time'].dt.date + + daily_profit = trades_df.groupby(['date', 'direction']).profit.sum().unstack(fill_value=0) + #print("dp",daily_profit) + daily_cumulative_profit = trades_df.groupby('date').profit.sum().cumsum() + + # Create the plot + fig, ax1 = plt.subplots(figsize=(10, 6)) + + # Bar chart for daily profit composition + daily_profit.plot(kind='bar', stacked=True, ax=ax1, color=['green', 'red'], zorder=2) + ax1.set_ylabel('Daily Profit') + ax1.set_xlabel('Date') + #ax1.xaxis.set_major_locator(MaxNLocator(10)) + + # Line chart for cumulative daily profit + #ax2 = ax1.twinx() + #print(daily_cumulative_profit) + #print(daily_cumulative_profit.index) + #ax2.plot(daily_cumulative_profit.index, daily_cumulative_profit, color='yellow', linestyle='-', linewidth=2, zorder=3) + #ax2.set_ylabel('Cumulative Profit') + + # Setting the secondary y-axis range dynamically based on cumulative profit values + # ax2.set_ylim(daily_cumulative_profit.min() - (daily_cumulative_profit.std() * 2), + # daily_cumulative_profit.max() + (daily_cumulative_profit.std() * 2)) + + # Dark mode settings + ax1.set_facecolor('black') + # ax1.grid(True) + #ax2.set_facecolor('black') + fig.patch.set_facecolor('black') + ax1.tick_params(colors='white') + #ax2.tick_params(colors='white') + # ax1.xaxis_date() + # ax1.xaxis.set_major_formatter(mdates.DateFormatter('%d.%m.', tz=zoneNY)) + ax1.tick_params(axis='x', rotation=45) + + # Footer + footer_text = f'Days Count: {days_cnt} | Parameters: {{"runner_ids": {len(runner_ids) if runner_ids is not None else None}, "batch_id": {batch_id}, "stream": {stream}}}' + plt.figtext(0.5, 0.01, footer_text, wrap=True, horizontalalignment='center', fontsize=8, color='white') + + # Save or stream the plot + if stream: + img_stream = BytesIO() + plt.savefig(img_stream, format='png', bbox_inches='tight', facecolor=fig.get_facecolor(), edgecolor='none') + img_stream.seek(0) + plt.close(fig) + return (0, img_stream) + else: + plt.savefig(f'{__name__}.png', bbox_inches='tight', facecolor=fig.get_facecolor(), edgecolor='none') + plt.close(fig) + return (0, None) + + except Exception as e: + # Detailed error reporting + return (-1, str(e) + format_exc()) +# Local debugging +if __name__ == '__main__': + batch_id = "6f9b012c" + res, val = daily_profit_distribution(batch_id=batch_id) + print(res, val) diff --git a/v2realbot/reporting/analyzer/find_optimal_cutoff.py b/v2realbot/reporting/analyzer/find_optimal_cutoff.py index ed997d1..c61cccf 100644 --- a/v2realbot/reporting/analyzer/find_optimal_cutoff.py +++ b/v2realbot/reporting/analyzer/find_optimal_cutoff.py @@ -25,7 +25,7 @@ from io import BytesIO # Assuming Trade, TradeStatus, TradeDirection, TradeStoplossType classes are defined elsewhere #LOSS and PROFIT without GRAPH -def find_optimal_cutoff(runner_ids: list = None, batch_id: str = None, stream: bool = False, rem_outliers:bool = False, file: str = "optimalcutoff.png",steps:int = 50): +def find_optimal_cutoff(runner_ids: list = None, batch_id: str = None, stream: bool = False, mode:str="absolute", rem_outliers:bool = False, z_score_threshold:int = 3, file: str = "optimalcutoff.png",steps:int = 50): #TODO dopracovat drawdown a minimalni a maximalni profity nikoliv cumulovane, zamyslet se #TODO list of runner_ids @@ -115,7 +115,11 @@ def find_optimal_cutoff(runner_ids: list = None, batch_id: str = None, stream: b for trade in trades: if trade.status == TradeStatus.CLOSED and trade.exit_time: day = trade.exit_time.date() - daily_cumulative_profits[day].append(trade.profit) + if mode == "absolute": + daily_cumulative_profits[day].append(trade.profit) + #relative profit + else: + daily_cumulative_profits[day].append(trade.rel_profit) for day in daily_cumulative_profits: daily_cumulative_profits[day] = np.cumsum(daily_cumulative_profits[day]) @@ -131,7 +135,7 @@ def find_optimal_cutoff(runner_ids: list = None, batch_id: str = None, stream: b for day, profits in cumulative_profits.items(): if len(profits) > 0: day_z_score = z_scores[list(cumulative_profits.keys()).index(day)] - if abs(day_z_score) < 3: # Adjust threshold as needed + if abs(day_z_score) < z_score_threshold: # Adjust threshold as needed filtered_profits[day] = profits return filtered_profits @@ -145,26 +149,25 @@ def find_optimal_cutoff(runner_ids: list = None, batch_id: str = None, stream: b # profit_range = (0, max_profit) if max_profit > 0 else (0, 0) # loss_range = (min_profit, 0) if min_profit < 0 else (0, 0) + if mode == "absolute": # OPT2 Calculate profit_range and loss_range based on all cumulative profits - all_cumulative_profits = np.concatenate([profits for profits in daily_cumulative_profits.values()]) - max_cumulative_profit = np.max(all_cumulative_profits) - min_cumulative_profit = np.min(all_cumulative_profits) - profit_range = (0, max_cumulative_profit) if max_cumulative_profit > 0 else (0, 0) - loss_range = (min_cumulative_profit, 0) if min_cumulative_profit < 0 else (0, 0) + all_cumulative_profits = np.concatenate([profits for profits in daily_cumulative_profits.values()]) + max_cumulative_profit = np.max(all_cumulative_profits) + min_cumulative_profit = np.min(all_cumulative_profits) + profit_range = (0, max_cumulative_profit) if max_cumulative_profit > 0 else (0, 0) + loss_range = (min_cumulative_profit, 0) if min_cumulative_profit < 0 else (0, 0) + else: + #for relative - hardcoded + profit_range = (0, 1) # Adjust based on your data + loss_range = (-1, 0) - print("Calculated ranges", profit_range, loss_range) + print("Ranges", profit_range, loss_range) num_points = steps # Adjust for speed vs accuracy profit_cutoffs = np.linspace(*profit_range, num_points) loss_cutoffs = np.linspace(*loss_range, num_points) - # OPT 3Statically define ranges for loss and profit cutoffs - # profit_range = (0, 1000) # Adjust based on your data - # loss_range = (-1000, 0) - # num_points = 20 # Adjust for speed vs accuracy - profit_cutoffs = np.linspace(*profit_range, num_points) - loss_cutoffs = np.linspace(*loss_range, num_points) total_profits_matrix = np.zeros((len(profit_cutoffs), len(loss_cutoffs))) @@ -207,12 +210,12 @@ def find_optimal_cutoff(runner_ids: list = None, batch_id: str = None, stream: b } plt.rcParams.update(params) plt.figure(figsize=(10, 8)) - sns.heatmap(total_profits_matrix, xticklabels=np.rint(loss_cutoffs).astype(int), yticklabels=np.rint(profit_cutoffs).astype(int), cmap="viridis") + sns.heatmap(total_profits_matrix, xticklabels=np.rint(loss_cutoffs).astype(int) if mode == "absolute" else np.around(loss_cutoffs, decimals=3), yticklabels=np.rint(profit_cutoffs).astype(int) if mode == "absolute" else np.around(profit_cutoffs, decimals=3), cmap="viridis") plt.xticks(rotation=90) # Rotate x-axis labels to be vertical plt.yticks(rotation=0) # Keep y-axis labels horizontal plt.gca().invert_yaxis() plt.gca().invert_xaxis() - plt.suptitle(f"Total Profit for Combinations of Profit/Loss Cutoffs ({cnt_max})", fontsize=16) + plt.suptitle(f"Total {mode} Profit for Profit/Loss Cutoffs ({cnt_max})", fontsize=16) plt.title(f"Optimal Profit Cutoff: {optimal_profit_cutoff:.2f}, Optimal Loss Cutoff: {optimal_loss_cutoff:.2f}, Max Profit: {max_profit:.2f}", fontsize=10) plt.xlabel("Loss Cutoff") plt.ylabel("Profit Cutoff") @@ -236,8 +239,8 @@ if __name__ == '__main__': # id_list = ["e8938b2e-8462-441a-8a82-d823c6a025cb"] # generate_trading_report_image(runner_ids=id_list) batch_id = "c76b4414" - vstup = AnalyzerInputs(**params) - res, val = find_optimal_cutoff(batch_id=batch_id, file="optimal_cutoff_vectorized.png",steps=20) + #vstup = AnalyzerInputs(**params) + res, val = find_optimal_cutoff(batch_id=batch_id, mode="relative", z_score_threshold=2, file="optimal_cutoff_vectorized.png",steps=20) #res, val = find_optimal_cutoff(batch_id=batch_id, rem_outliers=True, file="optimal_cutoff_vectorized_nooutliers.png") print(res,val) \ No newline at end of file diff --git a/v2realbot/reporting/analyzer/find_optimal_cutoff_REL.py b/v2realbot/reporting/analyzer/find_optimal_cutoff_REL.py new file mode 100644 index 0000000..8eff788 --- /dev/null +++ b/v2realbot/reporting/analyzer/find_optimal_cutoff_REL.py @@ -0,0 +1,244 @@ +import matplotlib +import matplotlib.dates as mdates +#matplotlib.use('Agg') # Set the Matplotlib backend to 'Agg' +import matplotlib.pyplot as plt +import seaborn as sns +import pandas as pd +from datetime import datetime +from typing import List +from enum import Enum +import numpy as np +import v2realbot.controller.services as cs +from rich import print +from v2realbot.common.model import AnalyzerInputs +from v2realbot.common.PrescribedTradeModel import TradeDirection, TradeStatus, Trade, TradeStoplossType +from v2realbot.utils.utils import isrising, isfalling,zoneNY, price2dec, safe_get#, print +from pathlib import Path +from v2realbot.config import WEB_API_KEY, DATA_DIR, MEDIA_DIRECTORY +from v2realbot.enums.enums import RecordType, StartBarAlign, Mode, Account, OrderSide +from io import BytesIO +from v2realbot.utils.historicals import get_historical_bars +from alpaca.data.timeframe import TimeFrame, TimeFrameUnit +from collections import defaultdict +from scipy.stats import zscore +from io import BytesIO +# Assuming Trade, TradeStatus, TradeDirection, TradeStoplossType classes are defined elsewhere + +#HEATMAPA pro RELATIVNI PROFIT - WIP +#po dodelani dat do stejné funkce jen s parametrem typ +def find_optimal_cutoff(runner_ids: list = None, batch_id: str = None, stream: bool = False, rem_outliers:bool = False, z_score_threshold:int = 3, file: str = "optimalcutoff.png",steps:int = 50): + + #TODO dopracovat drawdown a minimalni a maximalni profity nikoliv cumulovane, zamyslet se + #TODO list of runner_ids + #TODO pridelat na vytvoreni runnera a batche, samostatne REST API + na remove archrunnera + + if runner_ids is None and batch_id is None: + return -2, f"runner_id or batch_id must be present" + + if batch_id is not None: + res, runner_ids =cs.get_archived_runnerslist_byBatchID(batch_id) + + if res != 0: + print(f"no batch {batch_id} found") + return -1, f"no batch {batch_id} found" + + trades = [] + cnt_max = len(runner_ids) + cnt = 0 + #zatim zjistujeme start a end z min a max dni - jelikoz muze byt i seznam runner_ids a nejenom batch + end_date = None + start_date = None + for id in runner_ids: + cnt += 1 + #get runner + res, sada =cs.get_archived_runner_header_byID(id) + if res != 0: + print(f"no runner {id} found") + return -1, f"no runner {id} found" + + #print("archrunner") + #print(sada) + + if cnt == 1: + start_date = sada.bt_from if sada.mode in [Mode.BT,Mode.PREP] else sada.started + if cnt == cnt_max: + end_date = sada.bt_to if sada.mode in [Mode.BT or Mode.PREP] else sada.stopped + # Parse trades + + trades_dicts = sada.metrics["prescr_trades"] + + for trade_dict in trades_dicts: + trade_dict['last_update'] = datetime.fromtimestamp(trade_dict.get('last_update')).astimezone(zoneNY) if trade_dict['last_update'] is not None else None + trade_dict['entry_time'] = datetime.fromtimestamp(trade_dict.get('entry_time')).astimezone(zoneNY) if trade_dict['entry_time'] is not None else None + trade_dict['exit_time'] = datetime.fromtimestamp(trade_dict.get('exit_time')).astimezone(zoneNY) if trade_dict['exit_time'] is not None else None + trades.append(Trade(**trade_dict)) + + #print(trades) + + # symbol = sada.symbol + # #hour bars for backtested period + # print(start_date,end_date) + # bars= get_historical_bars(symbol, start_date, end_date, TimeFrame.Hour) + # print("bars for given period",bars) + # """Bars a dictionary with the following keys: + # * high: A list of high prices + # * low: A list of low prices + # * volume: A list of volumes + # * close: A list of close prices + # * hlcc4: A list of HLCC4 indicators + # * open: A list of open prices + # * time: A list of times in UTC (ISO 8601 format) + # * trades: A list of number of trades + # * resolution: A list of resolutions (all set to 'D') + # * confirmed: A list of booleans (all set to True) + # * vwap: A list of VWAP indicator + # * updated: A list of booleans (all set to True) + # * index: A list of integers (from 0 to the length of the list of daily bars) + # """ + + # Filter to only use trades with status 'CLOSED' + closed_trades = [trade for trade in trades if trade.status == TradeStatus.CLOSED] + + #print(closed_trades) + + if len(closed_trades) == 0: + return -1, "image generation no closed trades" + + # # Group trades by date and calculate daily profits + # trades_by_day = defaultdict(list) + # for trade in trades: + # if trade.status == TradeStatus.CLOSED and trade.exit_time: + # trade_day = trade.exit_time.date() + # trades_by_day[trade_day].append(trade) + + # Precompute daily cumulative profits + daily_cumulative_profits = defaultdict(list) + for trade in trades: + if trade.status == TradeStatus.CLOSED and trade.exit_time: + day = trade.exit_time.date() + daily_cumulative_profits[day].append(trade.profit) + + for day in daily_cumulative_profits: + daily_cumulative_profits[day] = np.cumsum(daily_cumulative_profits[day]) + + + if rem_outliers: + # Remove outliers based on z-scores + def remove_outliers(cumulative_profits): + all_profits = [profit[-1] for profit in cumulative_profits.values() if len(profit) > 0] + z_scores = zscore(all_profits) + print(z_scores) + filtered_profits = {} + for day, profits in cumulative_profits.items(): + if len(profits) > 0: + day_z_score = z_scores[list(cumulative_profits.keys()).index(day)] + if abs(day_z_score) < z_score_threshold: # Adjust threshold as needed + filtered_profits[day] = profits + return filtered_profits + + daily_cumulative_profits = remove_outliers(daily_cumulative_profits) + + + # OPT1 Dynamically calculate profit_range and loss_range - based on eod daily profit + # all_final_profits = [profits[-1] for profits in daily_cumulative_profits.values() if len(profits) > 0] + # max_profit = max(all_final_profits) + # min_profit = min(all_final_profits) + # profit_range = (0, max_profit) if max_profit > 0 else (0, 0) + # loss_range = (min_profit, 0) if min_profit < 0 else (0, 0) + + # OPT2 Calculate profit_range and loss_range based on all cumulative profits + all_cumulative_profits = np.concatenate([profits for profits in daily_cumulative_profits.values()]) + max_cumulative_profit = np.max(all_cumulative_profits) + min_cumulative_profit = np.min(all_cumulative_profits) + profit_range = (0, max_cumulative_profit) if max_cumulative_profit > 0 else (0, 0) + loss_range = (min_cumulative_profit, 0) if min_cumulative_profit < 0 else (0, 0) + + print("Calculated ranges", profit_range, loss_range) + + num_points = steps # Adjust for speed vs accuracy + profit_cutoffs = np.linspace(*profit_range, num_points) + loss_cutoffs = np.linspace(*loss_range, num_points) + + # OPT 3Statically define ranges for loss and profit cutoffs + # profit_range = (0, 1000) # Adjust based on your data + # loss_range = (-1000, 0) + # num_points = 20 # Adjust for speed vs accuracy + + profit_cutoffs = np.linspace(*profit_range, num_points) + loss_cutoffs = np.linspace(*loss_range, num_points) + + total_profits_matrix = np.zeros((len(profit_cutoffs), len(loss_cutoffs))) + + for i, profit_cutoff in enumerate(profit_cutoffs): + for j, loss_cutoff in enumerate(loss_cutoffs): + total_profit = 0 + for daily_profit in daily_cumulative_profits.values(): + cutoff_index = np.where((daily_profit >= profit_cutoff) | (daily_profit <= loss_cutoff))[0] + if cutoff_index.size > 0: + total_profit += daily_profit[cutoff_index[0]] + else: + total_profit += daily_profit[-1] if daily_profit.size > 0 else 0 + total_profits_matrix[i, j] = total_profit + + # Find the optimal combination + optimal_idx = np.unravel_index(total_profits_matrix.argmax(), total_profits_matrix.shape) + optimal_profit_cutoff = profit_cutoffs[optimal_idx[0]] + optimal_loss_cutoff = loss_cutoffs[optimal_idx[1]] + max_profit = total_profits_matrix[optimal_idx] + + # Plotting + # Setting up dark mode for the plots + plt.style.use('dark_background') + + # Optionally, you can further customize colors, labels, and axes + params = { + 'axes.titlesize': 9, + 'axes.labelsize': 8, + 'xtick.labelsize': 9, + 'ytick.labelsize': 9, + 'axes.labelcolor': '#a9a9a9', #a1a3aa', + 'axes.facecolor': '#121722', #'#0e0e0e', #202020', # Dark background for plot area + 'axes.grid': False, # Turn off the grid globally + 'grid.color': 'gray', # If the grid is on, set grid line color + 'grid.linestyle': '--', # Grid line style + 'grid.linewidth': 1, + 'xtick.color': '#a9a9a9', + 'ytick.color': '#a9a9a9', + 'axes.edgecolor': '#a9a9a9' + } + plt.rcParams.update(params) + plt.figure(figsize=(10, 8)) + sns.heatmap(total_profits_matrix, xticklabels=np.rint(loss_cutoffs).astype(int), yticklabels=np.rint(profit_cutoffs).astype(int), cmap="viridis") + plt.xticks(rotation=90) # Rotate x-axis labels to be vertical + plt.yticks(rotation=0) # Keep y-axis labels horizontal + plt.gca().invert_yaxis() + plt.gca().invert_xaxis() + plt.suptitle(f"Total Profit for Combinations of Profit/Loss Cutoffs ({cnt_max})", fontsize=16) + plt.title(f"Optimal Profit Cutoff: {optimal_profit_cutoff:.2f}, Optimal Loss Cutoff: {optimal_loss_cutoff:.2f}, Max Profit: {max_profit:.2f}", fontsize=10) + plt.xlabel("Loss Cutoff") + plt.ylabel("Profit Cutoff") + + if stream is False: + plt.savefig(file) + plt.close() + print(f"Optimal Profit Cutoff(rem_outliers:{rem_outliers}): {optimal_profit_cutoff}, Optimal Loss Cutoff: {optimal_loss_cutoff}, Max Profit: {max_profit}") + return 0, None + else: + # Return the image as a BytesIO stream + img_stream = BytesIO() + plt.savefig(img_stream, format='png') + plt.close() + img_stream.seek(0) # Rewind the stream to the beginning + return 0, img_stream + +# Example usage +# trades = [list of Trade objects] +if __name__ == '__main__': + # id_list = ["e8938b2e-8462-441a-8a82-d823c6a025cb"] + # generate_trading_report_image(runner_ids=id_list) + batch_id = "c76b4414" + vstup = AnalyzerInputs(**params) + res, val = find_optimal_cutoff(batch_id=batch_id, file="optimal_cutoff_vectorized.png",steps=20) + #res, val = find_optimal_cutoff(batch_id=batch_id, rem_outliers=True, file="optimal_cutoff_vectorized_nooutliers.png") + + print(res,val) \ No newline at end of file diff --git a/v2realbot/reporting/analyzer/summarize_trade_metrics.py b/v2realbot/reporting/analyzer/summarize_trade_metrics.py new file mode 100644 index 0000000..2bec57c --- /dev/null +++ b/v2realbot/reporting/analyzer/summarize_trade_metrics.py @@ -0,0 +1,129 @@ +import matplotlib +import matplotlib.dates as mdates +matplotlib.use('Agg') # Set the Matplotlib backend to 'Agg' +import matplotlib.pyplot as plt +from matplotlib.ticker import MaxNLocator +import seaborn as sns +import pandas as pd +from datetime import datetime +from typing import List +from enum import Enum +import numpy as np +import v2realbot.controller.services as cs +from rich import print +from v2realbot.common.model import AnalyzerInputs +from v2realbot.common.PrescribedTradeModel import TradeDirection, TradeStatus, Trade, TradeStoplossType +from v2realbot.utils.utils import isrising, isfalling,zoneNY, price2dec, safe_get#, print +from pathlib import Path +from v2realbot.config import WEB_API_KEY, DATA_DIR, MEDIA_DIRECTORY +from v2realbot.enums.enums import RecordType, StartBarAlign, Mode, Account, OrderSide +from io import BytesIO +from v2realbot.utils.historicals import get_historical_bars +from alpaca.data.timeframe import TimeFrame, TimeFrameUnit +from collections import defaultdict +from scipy.stats import zscore +from io import BytesIO +from v2realbot.reporting.load_trades import load_trades +from typing import Tuple, Optional, List +from traceback import format_exc +import pandas as pd + + +def summarize_trade_metrics(runner_ids: list = None, batch_id: str = None, stream: bool = False): + try: + res, trades, days_cnt = load_trades(runner_ids, batch_id) + if res != 0: + raise Exception("Error in loading trades") + + closed_trades = [trade for trade in trades if trade.status == "closed"] + + # Calculate metrics + metrics = calculate_metrics(closed_trades) + + # Generate and process image + img_stream = generate_table_image(metrics) + + # Add footer to image + #img_stream = add_footer_to_image(img_stream, days_cnt, runner_ids, batch_id, stream) + + # Output handling + if stream: + img_stream.seek(0) + return (0, img_stream) + else: + with open(f'summarize_trade_metrics_{batch_id}.png', 'wb') as f: + f.write(img_stream.getbuffer()) + return (0, None) + + except Exception as e: + # Detailed error reporting + return (-1, str(e)+format_exc()) + +def calculate_metrics(closed_trades): + if not closed_trades: + return {} + + total_profit = sum(trade.profit for trade in closed_trades) + max_profit = max(trade.profit for trade in closed_trades) + min_profit = min(trade.profit for trade in closed_trades) + total_trades = len(closed_trades) + long_trades = sum(1 for trade in closed_trades if trade.direction == "long") + short_trades = sum(1 for trade in closed_trades if trade.direction == "short") + + # Daily Metrics Calculation + trades_by_day = {} + for trade in closed_trades: + day = trade.entry_time.date() if trade.entry_time else None + if day: + trades_by_day.setdefault(day, []).append(trade) + + avg_trades_per_day = sum(len(trades) for trades in trades_by_day.values()) / len(trades_by_day) + avg_long_trades_per_day = sum(sum(1 for trade in trades if trade.direction == "long") for trades in trades_by_day.values()) / len(trades_by_day) + avg_short_trades_per_day = sum(sum(1 for trade in trades if trade.direction == "short") for trades in trades_by_day.values()) / len(trades_by_day) + + return { + "Average Profit": total_profit / total_trades, + "Maximum Profit": max_profit, + "Minimum Profit": min_profit, + "Total Number of Trades": total_trades, + "Number of Long Trades": long_trades, + "Number of Short Trades": short_trades, + "Average Trades per Day": avg_trades_per_day, + "Average Long Trades per Day": avg_long_trades_per_day, + "Average Short Trades per Day": avg_short_trades_per_day + } + +def generate_table_image(metrics): + fig, ax = plt.subplots(figsize=(10, 6)) + ax.axis('tight') + ax.axis('off') + + # Convert metrics to a 2D array where each row is a list + cell_text = [[value] for value in metrics.values()] + + # Convert dict keys to a list for row labels + row_labels = list(metrics.keys()) + + ax.table(cellText=cell_text, + rowLabels=row_labels, + loc='center') + + plt.subplots_adjust(left=0.2, top=0.8) + plt.title("Trade Metrics Summary", color='white') + + img_stream = BytesIO() + plt.savefig(img_stream, format='png', bbox_inches='tight', pad_inches=0.1, facecolor='black') + plt.close(fig) + return img_stream + +def add_footer_to_image(img_stream, days_cnt, runner_ids, batch_id, stream): + # Implementation for adding a footer to the image + # This can be done using PIL (Python Imaging Library) or other image processing libraries + # For simplicity, I'm leaving this as a placeholder + pass + +# Local debugging +if __name__ == '__main__': + batch_id = "73ad1866" + res, val = summarize_trade_metrics(batch_id=batch_id) + print(res, val) diff --git a/v2realbot/static/index.html b/v2realbot/static/index.html index 94d5971..6033364 100644 --- a/v2realbot/static/index.html +++ b/v2realbot/static/index.html @@ -225,7 +225,7 @@ -