This commit is contained in:
David Brazda
2024-10-02 13:54:29 +02:00
parent 538c739c59
commit 7188b2d003
9 changed files with 2064 additions and 373 deletions

View File

39
research/ext_lib/db.py Normal file
View File

@ -0,0 +1,39 @@
"""Database-related stuff"""
import sqlalchemy
from sqlalchemy import create_engine, text
DB_USER = "postgres"
DB_PW = "KzsZ0wz5tp2wUIPM"
DB_HOST = "db.stratlab.dev"
DB_PORT = 30543
DB_URL = f"postgresql://{DB_USER}:{DB_PW}@{DB_HOST}:{DB_PORT}"
print(DB_URL)
#DB_ARGS = {"sslmode": "verify-full", "sslrootcert": "system"}
DB_ARGS = {}
def db_connect(db_name: str) -> sqlalchemy.engine.Engine:
"""Connect to DB. Create it if it doesn't exist
Args:
db_name: name of the database to create
"""
try:
engine = create_engine(f"{DB_URL}/{db_name}", connect_args=DB_ARGS)
engine.connect()
return engine
except sqlalchemy.exc.OperationalError:
# Database doesn't exist, create it
conn = create_engine(f"{DB_URL}/postgres", isolation_level="AUTOCOMMIT", connect_args=DB_ARGS).connect()
# TODO: figure out how to get rid of SQL injection. Standard parameterization adds quotes that breaks syntax
conn.execute(text(f"CREATE DATABASE {db_name}"))
return create_engine(f"{DB_URL}/{db_name}", connect_args=DB_ARGS)
# list exchanges
# list symbols
# first date
# last date
# get data
# save data

49
research/ext_lib/util.py Normal file
View File

@ -0,0 +1,49 @@
import logging
from datetime import datetime, timedelta
from ccxt.base.errors import ExchangeNotAvailable
from vectorbtpro import pd, tp, vbt
log = logging.getLogger(__name__)
def find_earliest_date(symbol: str, exchange: str, **kwargs) -> tp.Optional[pd.Timestamp]:
"""Wrapper around CCXTData.find_earliest_date to handle ExchangeNotAvailable error with binary search
Args:
symbol: The trading symbol to query
exchange: The exchange to query
**kwargs: Additional arguments to pass to the find_earliest_date method
Returns:
tp.Optional[pd.Timestamp]: The earliest available date if found, otherwise None
"""
log.info("Searching for earliest date for %s", symbol)
start_date = pd.Timestamp(kwargs.pop("start", datetime(2010, 1, 1))).floor("D")
end_date = pd.Timestamp(kwargs.pop("end", datetime.now())).floor("D")
while start_date < end_date:
log.info("Trying %s to %s range", start_date, end_date)
mid_date = (start_date + (end_date - start_date) // 2).floor("D")
try:
found_date = vbt.CCXTData.find_earliest_date(
symbol, exchange=exchange, start=mid_date, end=end_date, limit=10, **kwargs
)
if found_date:
# Move the end date to mid_date to search the earlier half
end_date = mid_date
else:
# Move the start date to mid_date + 1 to search the later half
start_date = mid_date + timedelta(days=1)
except ExchangeNotAvailable:
# Move the start date to mid_date + 1 to search the later half
start_date = mid_date + timedelta(days=1)
# After the loop, start_date should be the earliest date with data
try:
found_date = vbt.CCXTData.find_earliest_date(
symbol, exchange=exchange, start=start_date, end=end_date, **kwargs
)
return found_date
except ExchangeNotAvailable as e:
log.error("ExchangeNotAvailable error encountered at final step... Error: %s", e)
return None