11 KiB
This code processes and analyzes equity options data, extracting volatility curves for specific options chains. It reads options data from CSV files, stores them in an Arctic database, and then queries the database to retrieve options chains for analysis. The code includes functions to read the options data, filter it based on specific criteria, and query it for expiration dates. Finally, it plots the implied volatility curves for the options, visualizing how volatility changes with different strikes and expiration dates.
import os import glob import matplotlib.pyplot as plt import datetime as dt
import arcticdb as adb import pandas as pd
Initialize an Arctic database connection and get the "options" library
arctic = adb.Arctic("lmdb://equity_options") lib = arctic.get_library("options", create_if_missing=True)
Define a function to read options chains from CSV files
def read_chains(fl): """Reads options chains from a CSV file Parameters ---------- fl : str Path to the CSV file Returns ------- df : pd.DataFrame Dataframe containing the options chains """ # Read the CSV file, set the "date" column as the index, and convert to datetime df = ( pd .read_csv(fl) .set_index("date") ) df.index = pd.to_datetime(df.index) return df
Read all CSV files from the "rut-eod" directory and store their data in the Arctic database
files = glob.glob(os.path.join("rut-eod", "*.csv")) for fl in files: chains = read_chains(fl) chains.option_expiration = pd.to_datetime(chains.option_expiration) underlyings = chains.symbol.unique() for underlying in underlyings: df = chains[chains.symbol == underlying] adb_sym = f"options/{underlying}" adb_fcn = lib.update if lib.has_symbol(adb_sym) else lib.write adb_fcn(adb_sym, df)
Read data for the "RUT" underlying from the Arctic database
rut = lib.read("options/RUT").data
Print the minimum and maximum dates from the "RUT" data
rut.index.min(), rut.index.max()
Print information about the "RUT" dataframe
rut.info()
Define a function to read the volatility curve for a specific options chain
def read_vol_curve(as_of_date, underlying, expiry, delta_low, delta_high): """Reads the volatility curve for specific options Parameters ---------- as_of_date : pd.Timestamp The date for which to read the data underlying : str The underlying asset symbol expiry : pd.Timestamp The expiration date of the options delta_low : float The lower bound of the delta range delta_high : float The upper bound of the delta range Returns ------- df : pd.DataFrame Dataframe containing the volatility curve """ # Build a query to filter options data based on expiration date and delta range q = adb.QueryBuilder() filter = ( (q["option_expiration"] == expiry) & ( ( (q["delta"] >= delta_low) & (q["delta"] <= delta_high) ) | ( (q["delta"] >= -delta_high) & (q["delta"] <= -delta_low) ) ) ) q = ( q[filter] .groupby("strike") .agg({"iv": "mean"}) ) # Read the filtered data from the Arctic database return lib.read( f"options/{underlying}", date_range=(as_of_date, as_of_date), query_builder=q ).data
Define a function to query expiration dates for options
def query_expirations(as_of_date, underlying, dte=30): """Queries expiration dates for options Parameters ---------- as_of_date : pd.Timestamp The date for which to query the data underlying : str The underlying asset symbol dte : int, optional Days to expiration threshold, by default 30 Returns ------- expirations : pd.Index Index containing the expiration dates """ # Build a query to filter options data based on expiration date threshold q = adb.QueryBuilder() filter = (q.option_expiration > as_of_date + dt.timedelta(days=dte)) q = q[filter].groupby("option_expiration").agg({"volume": "sum"}) # Read the filtered data from the Arctic database and sort by expiration date return ( lib .read( f"options/{underlying}", date_range=(as_of_date, as_of_date), query_builder=q ) .data .sort_index() .index )
Define input parameters for querying and plotting the volatility curves
as_of_date = pd.Timestamp("2013-06-03") expiry = pd.Timestamp("2013-06-22") underlying = "RUT" dte = 30 delta_low = 0.05 delta_high = 0.50
Query expiration dates for the given underlying asset and date
expiries = query_expirations(as_of_date, underlying, dte)
Plot the implied volatility curves for the retrieved expiration dates
_, ax = plt.subplots(1, 1) cmap = plt.get_cmap("rainbow", len(expiries)) format_kw = {"linewidth": 0.5, "alpha": 0.85} for i, expiry in enumerate(expiries): curve = read_vol_curve( as_of_date, underlying, expiry, delta_low, delta_high ) ( curve .sort_index() .plot( ax=ax, y="iv", label=expiry.strftime("%Y-%m-%d"), grid=True, color=cmap(i), **format_kw ) ) ax.set_ylabel("implied volatility") ax.legend(loc="upper right", framealpha=0.7)
PyQuant News is where finance practitioners level up with Python for quant finance, algorithmic trading, and market data analysis. Looking to get started? Check out the fastest growing, top-selling course to get started with Python for quant finance. For educational purposes. Not investment advise. Use at your own risk.
