This commit is contained in:
David Brazda
2024-10-02 13:54:29 +02:00
parent 538c739c59
commit 7188b2d003
9 changed files with 2064 additions and 373 deletions

View File

@ -1,4 +1,8 @@
alpaca-py==0.30.1 aiodns==3.2.0
aiohappyeyeballs==2.4.3
aiohttp==3.10.8
aiosignal==1.3.1
alpaca-py==0.18.1
annotated-types==0.7.0 annotated-types==0.7.0
anyio==4.6.0 anyio==4.6.0
appdirs==1.4.4 appdirs==1.4.4
@ -8,18 +12,21 @@ argon2-cffi-bindings==21.2.0
arrow==1.3.0 arrow==1.3.0
asttokens==2.4.1 asttokens==2.4.1
async-lru==2.0.4 async-lru==2.0.4
async-timeout==4.0.3
attrs==24.2.0 attrs==24.2.0
babel==2.16.0 babel==2.16.0
beautifulsoup4==4.12.3 beautifulsoup4==4.12.3
bleach==6.1.0 bleach==6.1.0
blinker==1.8.2 blinker==1.8.2
bottle==0.12.25 bottle==0.12.25
ccxt==4.4.10
certifi==2024.8.30 certifi==2024.8.30
cffi==1.17.1 cffi==1.17.1
charset-normalizer==3.3.2 charset-normalizer==3.3.2
click==8.1.7 click==8.1.7
comm==0.2.2 comm==0.2.2
contourpy==1.3.0 contourpy==1.3.0
cryptography==43.0.1
cycler==0.12.1 cycler==0.12.1
dash==2.17.1 dash==2.17.1
dash-bootstrap-components==1.6.0 dash-bootstrap-components==1.6.0
@ -40,6 +47,7 @@ Flask==3.0.3
fonttools==4.53.1 fonttools==4.53.1
fqdn==1.5.1 fqdn==1.5.1
frozendict==2.4.4 frozendict==2.4.4
frozenlist==1.4.1
greenlet==3.0.3 greenlet==3.0.3
h11==0.14.0 h11==0.14.0
html5lib==1.1 html5lib==1.1
@ -53,8 +61,10 @@ ipykernel==6.29.5
ipython==8.27.0 ipython==8.27.0
ipywidgets==8.1.5 ipywidgets==8.1.5
isoduration==20.11.0 isoduration==20.11.0
itables==2.2.1 itables==2.0.1
itsdangerous==2.2.0 itsdangerous==2.2.0
jax==0.4.23
jaxlib==0.4.23
jedi==0.19.1 jedi==0.19.1
Jinja2==3.1.4 Jinja2==3.1.4
joblib==1.4.2 joblib==1.4.2
@ -64,18 +74,18 @@ jsonschema==4.23.0
jsonschema-specifications==2023.12.1 jsonschema-specifications==2023.12.1
jupyter-events==0.10.0 jupyter-events==0.10.0
jupyter-lsp==2.2.5 jupyter-lsp==2.2.5
jupyter_client==8.6.2 jupyter_client==8.6.1
jupyter_core==5.7.2 jupyter_core==5.7.2
jupyter_server==2.14.2 jupyter_server==2.14.0
jupyter_server_terminals==0.5.3 jupyter_server_terminals==0.5.3
jupyterlab==4.2.5 jupyterlab==4.1.8
jupyterlab_pygments==0.3.0 jupyterlab_pygments==0.3.0
jupyterlab_server==2.27.3 jupyterlab_server==2.27.1
jupyterlab_widgets==3.0.13 jupyterlab_widgets==3.0.13
kiwisolver==1.4.5 kiwisolver==1.4.5
korean-lunar-calendar==0.3.1 korean-lunar-calendar==0.3.1
lightweight_charts @ git+https://github.com/drew2323/lightweight-charts-python.git@2b9f238a4242d958bc863b6209bf6444786477c5 lightweight_charts @ git+https://github.com/drew2323/lightweight-charts-python.git@35f029714b23c18abe791b90a85447e959c72258
llvmlite==0.43.0 llvmlite==0.39.1
lxml==5.3.0 lxml==5.3.0
markdown-it-py==3.0.0 markdown-it-py==3.0.0
MarkupSafe==2.1.5 MarkupSafe==2.1.5
@ -83,7 +93,9 @@ matplotlib==3.9.2
matplotlib-inline==0.1.7 matplotlib-inline==0.1.7
mdurl==0.1.2 mdurl==0.1.2
mistune==3.0.2 mistune==3.0.2
ml_dtypes==0.5.0
msgpack==1.0.8 msgpack==1.0.8
multidict==6.1.0
multitasking==0.0.11 multitasking==0.0.11
mypy-extensions==1.0.0 mypy-extensions==1.0.0
nbclient==0.10.0 nbclient==0.10.0
@ -92,12 +104,13 @@ nbformat==5.10.4
nest-asyncio==1.6.0 nest-asyncio==1.6.0
notebook==7.2.2 notebook==7.2.2
notebook_shim==0.2.4 notebook_shim==0.2.4
numba==0.60.0 numba==0.56.4
numpy==1.23.5 numpy==1.23.5
opt_einsum==3.4.0
orjson==3.10.7 orjson==3.10.7
overrides==7.7.0 overrides==7.7.0
packaging==24.1 packaging==24.1
pandas==2.2.2 pandas==2.2.1
pandas_market_calendars==4.4.1 pandas_market_calendars==4.4.1
pandocfilters==1.5.1 pandocfilters==1.5.1
parso==0.8.4 parso==0.8.4
@ -110,9 +123,11 @@ prometheus_client==0.21.0
prompt_toolkit==3.0.47 prompt_toolkit==3.0.47
proxy-tools==0.1.0 proxy-tools==0.1.0
psutil==6.0.0 psutil==6.0.0
psycopg2==2.9.9
ptyprocess==0.7.0 ptyprocess==0.7.0
pure_eval==0.2.3 pure_eval==0.2.3
pyarrow==17.0.0 pyarrow==11.0.0
pycares==4.4.0
pycparser==2.22 pycparser==2.22
pydantic==2.8.2 pydantic==2.8.2
pydantic_core==2.20.1 pydantic_core==2.20.1
@ -142,7 +157,7 @@ rich==13.8.0
rpds-py==0.20.0 rpds-py==0.20.0
schedule==1.2.2 schedule==1.2.2
scikit-learn==1.5.1 scikit-learn==1.5.1
scipy==1.14.1 scipy==1.11.2
seaborn==0.13.2 seaborn==0.13.2
Send2Trash==1.8.3 Send2Trash==1.8.3
six==1.16.0 six==1.16.0
@ -164,20 +179,21 @@ tornado==6.4.1
tqdm==4.66.5 tqdm==4.66.5
traitlets==5.14.3 traitlets==5.14.3
tulipy==0.4.0 tulipy==0.4.0
types-python-dateutil==2.9.0.20240906 types-python-dateutil==2.9.0.20240316
typing_extensions==4.12.2 typing_extensions==4.9.0
tzdata==2024.1 tzdata==2024.1
tzlocal==5.2 tzlocal==5.2
uri-template==1.3.0 uri-template==1.3.0
urllib3==2.2.2 urllib3==2.2.2
v2realbot @ git+https://github.com/drew2323/v2trading.git@700461033f2b816eeb1ee3a14b930f181cf57308 v2realbot @ git+https://github.com/drew2323/v2trading.git@700461033f2b816eeb1ee3a14b930f181cf57308
vectorbtpro @ file:///Users/davidbrazda/Desktop/vectorbtpro-2024.2.22-py3-none-any.whl#sha256=d85431edc7c9df69886ad0b6ee2b32d3aaa40cdc0b456f1c8fa6aa26f01d2be3 vectorbtpro @ file:///Users/davidbrazda/Downloads/vectorbt.pro-develop
wcwidth==0.2.13 wcwidth==0.2.13
webcolors==24.8.0 webcolors==24.8.0
webencodings==0.5.1 webencodings==0.5.1
websocket-client==1.8.0 websocket-client==1.8.0
websockets==13.0.1 websockets==11.0.3
Werkzeug==3.0.4 Werkzeug==3.0.4
widgetsnbextension==4.0.13 widgetsnbextension==4.0.9
yarl==1.13.1
yfinance==0.2.43 yfinance==0.2.43
zipp==3.20.1 zipp==3.20.1

74
research/dbinsertdata.py Normal file
View File

@ -0,0 +1,74 @@
"""Saves data from Kraken Futures, combining BTC and USD settled markets"""
import logging
from logging.config import fileConfig
from vectorbtpro import pd, vbt
from ext_lib.db import db_connect
from ext_lib.util import find_earliest_date
EXCHANGE = "1s_OHLCV"
SYMBOLS = ("BTC/USD:BTC", "BTC/USD:USD")
RESOLUTION = "1s"
DB_ENGINE = db_connect("ohlcv_1m")
DB_SYMBOL = "BTC/USD"
#fileConfig("logging.ini", disable_existing_loggers=False)
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=[logging.StreamHandler()]
)
log = logging.getLogger("main")
vbt.CCXTData.set_custom_settings(exchange=EXCHANGE, timeframe=RESOLUTION, limit=6000000)
vbt.SQLData.set_engine_settings(engine_name="pg", engine=DB_ENGINE, schema=EXCHANGE, populate_=True, chunksize=1000)
vbt.SQLData.set_custom_settings(engine_name="pg", schema=EXCHANGE)
def main():
if vbt.SQLData.has_table(DB_SYMBOL, schema=EXCHANGE):
data = vbt.SQLData.has_table(DB_SYMBOL, schema=EXCHANGE)
vbt.SQLData.create_schema(EXCHANGE)
db_last_tstamp = None
# TODO: figure out if it's possible to avoid using tables directly, but rather symbols
if vbt.SQLData.has_table(DB_SYMBOL, schema=EXCHANGE):
db_last_tstamp = vbt.SQLData.get_last_row_number(DB_SYMBOL, row_number_column="Open time")
dfs = []
for symbol in SYMBOLS:
if db_last_tstamp is None:
start = find_earliest_date(symbol, EXCHANGE)
else:
start = db_last_tstamp + pd.Timedelta(RESOLUTION)
log.info("Start date for %s is %s", symbol, start)
# Get data
df = vbt.CCXTData.pull(symbol, exchange=EXCHANGE, timeframe=RESOLUTION, start=start).get()
# If symbol is BTC/USD:BTC, convert volume from USD to BTC using the close price
if symbol == "BTC/USD:BTC":
df["Volume"] = df["Volume"].div(df["Close"])
dfs.append(df)
# Combine data from two symbols (last row is incomplete so removed)
concatenated_data = pd.concat(dfs, axis=0)
final_data = (
concatenated_data.groupby(concatenated_data.index)
.agg({"Open": "mean", "High": "mean", "Low": "mean", "Close": "mean", "Volume": "sum"})
.iloc[:-1]
)
data = vbt.SQLData.from_data({DB_SYMBOL: final_data})
# TODO: use custom method to prevent duplicate timestamps
log.info("Saving to DB")
vbt.SQLDataSaver(data).save_data(method="multi")
if __name__ == "__main__":
main()

View File

39
research/ext_lib/db.py Normal file
View File

@ -0,0 +1,39 @@
"""Database-related stuff"""
import sqlalchemy
from sqlalchemy import create_engine, text
DB_USER = "postgres"
DB_PW = "KzsZ0wz5tp2wUIPM"
DB_HOST = "db.stratlab.dev"
DB_PORT = 30543
DB_URL = f"postgresql://{DB_USER}:{DB_PW}@{DB_HOST}:{DB_PORT}"
print(DB_URL)
#DB_ARGS = {"sslmode": "verify-full", "sslrootcert": "system"}
DB_ARGS = {}
def db_connect(db_name: str) -> sqlalchemy.engine.Engine:
"""Connect to DB. Create it if it doesn't exist
Args:
db_name: name of the database to create
"""
try:
engine = create_engine(f"{DB_URL}/{db_name}", connect_args=DB_ARGS)
engine.connect()
return engine
except sqlalchemy.exc.OperationalError:
# Database doesn't exist, create it
conn = create_engine(f"{DB_URL}/postgres", isolation_level="AUTOCOMMIT", connect_args=DB_ARGS).connect()
# TODO: figure out how to get rid of SQL injection. Standard parameterization adds quotes that breaks syntax
conn.execute(text(f"CREATE DATABASE {db_name}"))
return create_engine(f"{DB_URL}/{db_name}", connect_args=DB_ARGS)
# list exchanges
# list symbols
# first date
# last date
# get data
# save data

49
research/ext_lib/util.py Normal file
View File

@ -0,0 +1,49 @@
import logging
from datetime import datetime, timedelta
from ccxt.base.errors import ExchangeNotAvailable
from vectorbtpro import pd, tp, vbt
log = logging.getLogger(__name__)
def find_earliest_date(symbol: str, exchange: str, **kwargs) -> tp.Optional[pd.Timestamp]:
"""Wrapper around CCXTData.find_earliest_date to handle ExchangeNotAvailable error with binary search
Args:
symbol: The trading symbol to query
exchange: The exchange to query
**kwargs: Additional arguments to pass to the find_earliest_date method
Returns:
tp.Optional[pd.Timestamp]: The earliest available date if found, otherwise None
"""
log.info("Searching for earliest date for %s", symbol)
start_date = pd.Timestamp(kwargs.pop("start", datetime(2010, 1, 1))).floor("D")
end_date = pd.Timestamp(kwargs.pop("end", datetime.now())).floor("D")
while start_date < end_date:
log.info("Trying %s to %s range", start_date, end_date)
mid_date = (start_date + (end_date - start_date) // 2).floor("D")
try:
found_date = vbt.CCXTData.find_earliest_date(
symbol, exchange=exchange, start=mid_date, end=end_date, limit=10, **kwargs
)
if found_date:
# Move the end date to mid_date to search the earlier half
end_date = mid_date
else:
# Move the start date to mid_date + 1 to search the later half
start_date = mid_date + timedelta(days=1)
except ExchangeNotAvailable:
# Move the start date to mid_date + 1 to search the later half
start_date = mid_date + timedelta(days=1)
# After the loop, start_date should be the earliest date with data
try:
found_date = vbt.CCXTData.find_earliest_date(
symbol, exchange=exchange, start=start_date, end=end_date, **kwargs
)
return found_date
except ExchangeNotAvailable as e:
log.error("ExchangeNotAvailable error encountered at final step... Error: %s", e)
return None

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long