#!/usr/bin/env python # coding: utf-8 # # DoWhy example on ihdp (Infant Health and Development Program) dataset # In[1]: # importing required libraries import os, sys sys.path.append(os.path.abspath("../../../")) import dowhy from dowhy import CausalModel import pandas as pd import numpy as np # ### Loading Data # In[2]: data= pd.read_csv("https://raw.githubusercontent.com/AMLab-Amsterdam/CEVAE/master/datasets/IHDP/csv/ihdp_npci_1.csv", header = None) col = ["treatment", "y_factual", "y_cfactual", "mu0", "mu1" ,] for i in range(1,26): col.append("x"+str(i)) data.columns = col data = data.astype({"treatment":'bool'}, copy=False) data.head() # ### 1.Model # In[3]: # Create a causal model from the data and given common causes. xs = "" for i in range(1,26): xs += ("x"+str(i)+"+") model=CausalModel( data = data, treatment='treatment', outcome='y_factual', common_causes=xs.split('+') ) # ### 2.Identify # In[4]: #Identify the causal effect identified_estimand = model.identify_effect() # ### 3. Estimate (using different methods) # #### 3.1 Using Linear Regression # In[5]: # Estimate the causal effect and compare it with Average Treatment Effect estimate = model.estimate_effect(identified_estimand, method_name="backdoor.linear_regression", test_significance=True ) print(estimate) print("Causal Estimate is " + str(estimate.value)) data_1 = data[data["treatment"]==1] data_0 = data[data["treatment"]==0] print("ATE", np.mean(data_1["y_factual"])- np.mean(data_0["y_factual"])) # #### 3.2 Using Propensity Score Matching # In[6]: estimate = model.estimate_effect(identified_estimand, method_name="backdoor.propensity_score_matching" ) print("Causal Estimate is " + str(estimate.value)) print("ATE", np.mean(data_1["y_factual"])- np.mean(data_0["y_factual"])) # #### 3.3 Using Propensity Score Stratification # In[7]: estimate = model.estimate_effect(identified_estimand, method_name="backdoor.propensity_score_stratification", method_params={'num_strata':50, 'clipping_threshold':5} ) print("Causal Estimate is " + str(estimate.value)) print("ATE", np.mean(data_1["y_factual"])- np.mean(data_0["y_factual"])) # #### 3.4 Using Propensity Score Weighting # In[8]: estimate = model.estimate_effect(identified_estimand, method_name="backdoor.propensity_score_weighting" ) print("Causal Estimate is " + str(estimate.value)) print("ATE", np.mean(data_1["y_factual"])- np.mean(data_0["y_factual"])) # ### 4. Refute # ##### Refute the obtained estimate using multiple robustness checks. # ##### 4.1 Adding a random common cause # In[9]: refute_results=model.refute_estimate(identified_estimand, estimate, method_name="random_common_cause") print(refute_results) # ##### 4.2 Using a placebo treatment # In[10]: res_placebo=model.refute_estimate(identified_estimand, estimate, method_name="placebo_treatment_refuter", placebo_type="permute") print(res_placebo) # #### 4.3 Data Subset Refuter # In[11]: res_subset=model.refute_estimate(identified_estimand, estimate, method_name="data_subset_refuter", subset_fraction=0.9) print(res_subset)