{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import vectorbtpro as vbt"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Acquiting Forex Data from Dukascopy\n",
"For acquiring historical market data from Dukascopy, I used this nodejs package called [`dukascopy-node`](https://github.com/Leo4815162342/dukascopy-node).\n",
"
The following are the commands I used to download `M1` (1 minute ) data for the following symbols:
\n",
"```javascript\n",
"npx dukascopy-node -i audnzd -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv\n",
"npx dukascopy-node -i audnzd -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv\n",
"\n",
"npx dukascopy-node -i eurgbp -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv\n",
"npx dukascopy-node -i eurgbp -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv\n",
"\n",
"npx dukascopy-node -i gbpjpy -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv\n",
"npx dukascopy-node -i gbpjpy -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv\n",
"\n",
"npx dukascopy-node -i usdjpy -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv\n",
"npx dukascopy-node -i usdjpy -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv\n",
"\n",
"npx dukascopy-node -i usdcad -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv\n",
"npx dukascopy-node -i usdcad -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv\n",
"\n",
"npx dukascopy-node -i eurusd -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv\n",
"npx dukascopy-node -i eurusd -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv\n",
"\n",
"npx dukascopy-node -i audusd -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv\n",
"npx dukascopy-node -i audusd -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv\n",
"\n",
"npx dukascopy-node -i gbpusd -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv\n",
"npx dukascopy-node -i gbpusd -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv\n",
"```\n",
"The free data `1m` provided by Dukascopy has some missing data and one needs to validate it for data quality auditing with\n",
"other preferable paid data sources."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def read_bid_ask_data(ask_file : str, bid_file : str, set_time_index = False) -> pd.DataFrame:\n",
" \"\"\"Reads and combines the bid & ask csv files of duksascopy historical market data, into a single OHLCV dataframe.\"\"\"\n",
" df_ask = pd.read_csv(ask_file, infer_datetime_format = True)\n",
" df_bid = pd.read_csv(bid_file, infer_datetime_format = True)\n",
" merged_df = pd.merge(df_bid, df_ask, on='timestamp', suffixes=('_ask', '_bid'))\n",
" merged_df['open'] = (merged_df['open_ask'] + merged_df['open_bid']) / 2.0\n",
" merged_df['close']= (merged_df['close_ask'] + merged_df['close_bid']) / 2.0\n",
" merged_df['high'] = merged_df[['high_ask','high_bid']].max(axis=1)\n",
" merged_df['low'] = merged_df[['low_ask','low_bid']].max(axis=1)\n",
" merged_df['volume'] = merged_df['volume_bid'] + merged_df['volume_ask'] \n",
"\n",
" merged_df = merged_df[merged_df[\"volume\"] > 0.0].reset_index()\n",
" ## Case when we downloaded Dukascopy historical market data from node package: dukascopy-node\n",
" merged_df['time'] = pd.to_datetime(merged_df['timestamp'], unit = 'ms')\n",
" merged_df.drop(columns = [\"timestamp\"], inplace = True)\n",
"\n",
" final_cols = ['time','open','high','low','close','volume','volume_bid','volume_ask']\n",
"\n",
" if set_time_index:\n",
" merged_df[\"time\"] = pd.to_datetime(merged_df[\"time\"],format='%d.%m.%Y %H:%M:%S')\n",
" merged_df = merged_df.set_index(\"time\")\n",
" return merged_df[final_cols[1:]] \n",
" return merged_df[final_cols].reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"### DataFrame Slicing based on nr. of rows on 1m dataframe\n",
"def slice_df_by_1m_rows(df : pd.DataFrame, nr_days_to_slice : int):\n",
" \"\"\"Slice the historical dataframe from most recent to the nr. of days specified\"\"\"\n",
" mins_per_day = 24 * 60\n",
" nr_days_to_slice = 365 * mins_per_day\n",
" df = df.iloc[-nr_days_to_slice:].reset_index(drop = True)\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Specify FileNames of Bid / Ask data downloaded from DukaScopy\n",
"bid_ask_files = {\n",
" \"GBPUSD\" : {\"Bid\": \"gbpusd-m1-bid-2019-01-01-2023-01-13.csv\",\n",
" \"Ask\": \"gbpusd-m1-ask-2019-01-01-2023-01-13.csv\"},\n",
" \"EURUSD\" : {\"Bid\": \"eurusd-m1-bid-2019-01-01-2023-01-13.csv\",\n",
" \"Ask\": \"eurusd-m1-ask-2019-01-01-2023-01-13.csv\"},\n",
" \"AUDUSD\" : {\"Bid\": \"audusd-m1-bid-2019-01-01-2023-01-13.csv\",\n",
" \"Ask\": \"audusd-m1-ask-2019-01-01-2023-01-13.csv\"},\n",
" \"USDCAD\" : {\"Bid\": \"usdcad-m1-bid-2019-01-01-2023-01-13.csv\",\n",
" \"Ask\": \"usdcad-m1-ask-2019-01-01-2023-01-13.csv\"},\n",
" \"USDJPY\" : {\"Bid\": \"usdjpy-m1-bid-2019-01-01-2023-01-13.csv\",\n",
" \"Ask\": \"usdjpy-m1-ask-2019-01-01-2023-01-13.csv\"},\n",
" \"GBPJPY\" : {\"Bid\": \"gbpjpy-m1-bid-2019-01-01-2023-01-13.csv\",\n",
" \"Ask\": \"gbpjpy-m1-ask-2019-01-01-2023-01-13.csv\"},\n",
" \"EURGBP\" : {\"Bid\": \"eurgbp-m1-bid-2019-01-01-2023-01-16.csv\",\n",
" \"Ask\": \"eurgbp-m1-ask-2019-01-01-2023-01-16.csv\"},\n",
" \"GBPAUD\" : {\"Bid\": \"gbpaud-m1-bid-2019-01-01-2023-01-16.csv\",\n",
" \"Ask\": \"gbpaud-m1-ask-2019-01-01-2023-01-16.csv\"} \n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Write everything into one single HDF5 file indexed by keys for the various symbols\n",
"folder_path = \"/Users/john.doe/Documents/Dukascopy_Historical_Data/\"\n",
"output_file_path = \"/Users/john.doe/Documents/vbtpro_tuts_private/data/MultiAsset_OHLCV_3Y_m1.h5\"\n",
"for symbol in bid_ask_files.keys():\n",
" print(f'\\n{symbol}')\n",
" ask_csv_file = folder_path + bid_ask_files[symbol][\"Ask\"]\n",
" bid_csv_file = folder_path + bid_ask_files[symbol][\"Bid\"]\n",
" print(\"ASK File PATH:\",ask_csv_file,'\\nBID File PATH:',bid_csv_file)\n",
" df = read_bid_ask_data(ask_csv_file, bid_csv_file, set_time_index = True)\n",
" df.to_hdf(output_file_path, key=symbol)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Acquiring Crypto Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Acquire multi-asset 1m crypto data from Binance using vbt Wrapper\n",
"\n",
"data = vbt.BinanceData.fetch(\n",
" [\"BTCUSDT\", \"ETHUSDT\", \"BNBUSDT\", \"XRPUSDT\", \"ADAUSDT\"], \n",
" start=\"2019-01-01 UTC\", \n",
" end=\"2022-12-01 UTC\",\n",
" timeframe=\"1m\"\n",
" )\n",
"\n",
"## Save acquired data locally for persistance\n",
"data.to_hdf(\"/Users/john.doe/Documents/vbtpro_tuts_private/data/Binance_MultiAsset_OHLCV_3Y_m1.h5\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "vbt",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "553d3b352623cb609a2efe4df91242fdc89d5ebcee56d9279e2aa2c11b529c13"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}