#!/usr/bin/env python # coding: utf-8 # # Simple example on using Instrumental Variables method for estimation # In[1]: import numpy as np import pandas as pd import patsy as ps from statsmodels.sandbox.regression.gmm import IV2SLS import os, sys sys.path.append(os.path.abspath("../../../")) from dowhy import CausalModel # In[2]: n_points = 1000 education_abilty = 1 education_voucher = 0.5 income_abilty = 2 income_education = 4 # confounder ability = np.random.normal(0, 3, size=n_points) # instrument voucher = np.random.normal(2, 1, size=n_points) # treatment education = np.random.normal(5, 1, size=n_points) + education_abilty * ability +\ education_voucher * voucher # outcome income = np.random.normal(10, 3, size=n_points) +\ income_abilty * ability + income_education * education # build dataset data = np.stack([ability, education, income, voucher]).T df = pd.DataFrame(data, columns = ['ability', 'education', 'income', 'voucher']) # In[3]: income_vec, endog = ps.dmatrices("income ~ education", data=df) exog = ps.dmatrix("voucher", data=df) m = IV2SLS(income_vec, endog, exog).fit() m.summary() # In[4]: model=CausalModel( data = df, treatment='education', outcome='income', common_causes=['ability'], instruments=['voucher'] ) identified_estimand = model.identify_effect() estimate = model.estimate_effect(identified_estimand, method_name="iv.instrumental_variable", test_significance=True ) print(estimate)