#!/usr/bin/env python
# coding: utf-8

# # DoWhy example on the Lalonde dataset
# 
# Thanks to [@mizuy](https://github.com/mizuy) for providing this example. Here we use the Lalonde dataset and apply IPW estimator to it. 

# In[1]:


import os, sys
sys.path.append(os.path.abspath("../../"))

import dowhy
from dowhy.do_why import CausalModel
from rpy2.robjects import r as R
get_ipython().run_line_magic('load_ext', 'rpy2.ipython')

#%R install.packages("Matching")
get_ipython().run_line_magic('R', 'library(Matching)')


# ## 1. Load the data

# In[2]:


get_ipython().run_line_magic('R', 'data(lalonde)')
get_ipython().run_line_magic('R', '-o lalonde')


# ## Run DoWhy analysis: model, identify, estimate

# In[3]:


model=CausalModel(
        data = lalonde,
        treatment='treat',
        outcome='re78',
        common_causes='nodegr+black+hisp+age+educ+married'.split('+'))
identified_estimand = model.identify_effect()
estimate = model.estimate_effect(identified_estimand,
        method_name="backdoor.propensity_score_weighting")
#print(estimate)
print("Causal Estimate is " + str(estimate.value))


# ## Sanity check: compare to manual IPW estimate

# In[4]:


df = model._data
ps = df['ps']
y = df['re78']
z = df['treat']

ey1 = z*y/ps / sum(z/ps)
ey0 = (1-z)*y/(1-ps) / sum((1-z)/(1-ps))
ate = ey1.sum()-ey0.sum()
print("Causal Estimate is " + str(ate))

# correct -> Causal Estimate is 1634.9868359746906