8.4 KiB
8.4 KiB
In [ ]:
import pandas as pd import vectorbtpro as vbt
Acquiting Forex Data from Dukascopy¶
For acquiring historical market data from Dukascopy, I used this nodejs package called dukascopy-node.
The following are the commands I used to download M1 (1 minute ) data for the following symbols:
npx dukascopy-node -i audnzd -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv npx dukascopy-node -i audnzd -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv npx dukascopy-node -i eurgbp -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv npx dukascopy-node -i eurgbp -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv npx dukascopy-node -i gbpjpy -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv npx dukascopy-node -i gbpjpy -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv npx dukascopy-node -i usdjpy -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv npx dukascopy-node -i usdjpy -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv npx dukascopy-node -i usdcad -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv npx dukascopy-node -i usdcad -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv npx dukascopy-node -i eurusd -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv npx dukascopy-node -i eurusd -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv npx dukascopy-node -i audusd -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv npx dukascopy-node -i audusd -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv npx dukascopy-node -i gbpusd -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv npx dukascopy-node -i gbpusd -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv
The free data 1m provided by Dukascopy has some missing data and one needs to validate it for data quality auditing with
other preferable paid data sources.
In [ ]:
def read_bid_ask_data(ask_file : str, bid_file : str, set_time_index = False) -> pd.DataFrame: """Reads and combines the bid & ask csv files of duksascopy historical market data, into a single OHLCV dataframe.""" df_ask = pd.read_csv(ask_file, infer_datetime_format = True) df_bid = pd.read_csv(bid_file, infer_datetime_format = True) merged_df = pd.merge(df_bid, df_ask, on='timestamp', suffixes=('_ask', '_bid')) merged_df['open'] = (merged_df['open_ask'] + merged_df['open_bid']) / 2.0 merged_df['close']= (merged_df['close_ask'] + merged_df['close_bid']) / 2.0 merged_df['high'] = merged_df[['high_ask','high_bid']].max(axis=1) merged_df['low'] = merged_df[['low_ask','low_bid']].max(axis=1) merged_df['volume'] = merged_df['volume_bid'] + merged_df['volume_ask'] merged_df = merged_df[merged_df["volume"] > 0.0].reset_index() ## Case when we downloaded Dukascopy historical market data from node package: dukascopy-node merged_df['time'] = pd.to_datetime(merged_df['timestamp'], unit = 'ms') merged_df.drop(columns = ["timestamp"], inplace = True) final_cols = ['time','open','high','low','close','volume','volume_bid','volume_ask'] if set_time_index: merged_df["time"] = pd.to_datetime(merged_df["time"],format='%d.%m.%Y %H:%M:%S') merged_df = merged_df.set_index("time") return merged_df[final_cols[1:]] return merged_df[final_cols].reset_index(drop=True)
In [ ]:
### DataFrame Slicing based on nr. of rows on 1m dataframe def slice_df_by_1m_rows(df : pd.DataFrame, nr_days_to_slice : int): """Slice the historical dataframe from most recent to the nr. of days specified""" mins_per_day = 24 * 60 nr_days_to_slice = 365 * mins_per_day df = df.iloc[-nr_days_to_slice:].reset_index(drop = True) return df
In [ ]:
## Specify FileNames of Bid / Ask data downloaded from DukaScopy bid_ask_files = { "GBPUSD" : {"Bid": "gbpusd-m1-bid-2019-01-01-2023-01-13.csv", "Ask": "gbpusd-m1-ask-2019-01-01-2023-01-13.csv"}, "EURUSD" : {"Bid": "eurusd-m1-bid-2019-01-01-2023-01-13.csv", "Ask": "eurusd-m1-ask-2019-01-01-2023-01-13.csv"}, "AUDUSD" : {"Bid": "audusd-m1-bid-2019-01-01-2023-01-13.csv", "Ask": "audusd-m1-ask-2019-01-01-2023-01-13.csv"}, "USDCAD" : {"Bid": "usdcad-m1-bid-2019-01-01-2023-01-13.csv", "Ask": "usdcad-m1-ask-2019-01-01-2023-01-13.csv"}, "USDJPY" : {"Bid": "usdjpy-m1-bid-2019-01-01-2023-01-13.csv", "Ask": "usdjpy-m1-ask-2019-01-01-2023-01-13.csv"}, "GBPJPY" : {"Bid": "gbpjpy-m1-bid-2019-01-01-2023-01-13.csv", "Ask": "gbpjpy-m1-ask-2019-01-01-2023-01-13.csv"}, "EURGBP" : {"Bid": "eurgbp-m1-bid-2019-01-01-2023-01-16.csv", "Ask": "eurgbp-m1-ask-2019-01-01-2023-01-16.csv"}, "GBPAUD" : {"Bid": "gbpaud-m1-bid-2019-01-01-2023-01-16.csv", "Ask": "gbpaud-m1-ask-2019-01-01-2023-01-16.csv"} }
In [ ]:
## Write everything into one single HDF5 file indexed by keys for the various symbols folder_path = "/Users/john.doe/Documents/Dukascopy_Historical_Data/" output_file_path = "/Users/john.doe/Documents/vbtpro_tuts_private/data/MultiAsset_OHLCV_3Y_m1.h5" for symbol in bid_ask_files.keys(): print(f'\n{symbol}') ask_csv_file = folder_path + bid_ask_files[symbol]["Ask"] bid_csv_file = folder_path + bid_ask_files[symbol]["Bid"] print("ASK File PATH:",ask_csv_file,'\nBID File PATH:',bid_csv_file) df = read_bid_ask_data(ask_csv_file, bid_csv_file, set_time_index = True) df.to_hdf(output_file_path, key=symbol)
Acquiring Crypto Data¶
In [ ]:
## Acquire multi-asset 1m crypto data from Binance using vbt Wrapper data = vbt.BinanceData.fetch( ["BTCUSDT", "ETHUSDT", "BNBUSDT", "XRPUSDT", "ADAUSDT"], start="2019-01-01 UTC", end="2022-12-01 UTC", timeframe="1m" ) ## Save acquired data locally for persistance data.to_hdf("/Users/john.doe/Documents/vbtpro_tuts_private/data/Binance_MultiAsset_OHLCV_3Y_m1.h5")