#!/usr/bin/env python # coding: utf-8 # # DoWhy example on the Lalonde dataset # # Thanks to [@mizuy](https://github.com/mizuy) for providing this example. Here we use the Lalonde dataset and apply IPW estimator to it. # In[1]: import os, sys sys.path.append(os.path.abspath("../../")) import dowhy from dowhy.do_why import CausalModel from rpy2.robjects import r as R get_ipython().run_line_magic('load_ext', 'rpy2.ipython') #%R install.packages("Matching") get_ipython().run_line_magic('R', 'library(Matching)') # ## 1. Load the data # In[2]: get_ipython().run_line_magic('R', 'data(lalonde)') get_ipython().run_line_magic('R', '-o lalonde') # ## Run DoWhy analysis: model, identify, estimate # In[3]: model=CausalModel( data = lalonde, treatment='treat', outcome='re78', common_causes='nodegr+black+hisp+age+educ+married'.split('+')) identified_estimand = model.identify_effect() estimate = model.estimate_effect(identified_estimand, method_name="backdoor.propensity_score_weighting") #print(estimate) print("Causal Estimate is " + str(estimate.value)) # ## Sanity check: compare to manual IPW estimate # In[4]: df = model._data ps = df['ps'] y = df['re78'] z = df['treat'] ey1 = z*y/ps / sum(z/ps) ey0 = (1-z)*y/(1-ps) / sum((1-z)/(1-ps)) ate = ey1.sum()-ey0.sum() print("Causal Estimate is " + str(ate)) # correct -> Causal Estimate is 1634.9868359746906