#!/usr/bin/env python # coding: utf-8 # # DoWhy: Different estimation methods for causal inference # This is a quick introduction to the DoWhy causal inference library. # We will load in a sample dataset and use different methods for estimating the causal effect of a (pre-specified)treatment variable on a (pre-specified) outcome variable. # # First, let us add the required path for Python to find the DoWhy code and load all required packages # In[1]: import os, sys sys.path.append(os.path.abspath("../../")) # In[2]: import numpy as np import pandas as pd import logging import dowhy from dowhy.do_why import CausalModel import dowhy.datasets # Now, let us load a dataset. For simplicity, we simulate a dataset with linear relationships between common causes and treatment, and common causes and outcome. # # Beta is the true causal effect. # In[3]: data = dowhy.datasets.linear_dataset(beta=10, num_common_causes=5, num_instruments = 2, num_samples=10000, treatment_is_binary=True) df = data["df"] # Note that we are using a pandas dataframe to load the data. # ## Identifying the causal estimand # We now input a causal graph in the DOT graph format. # In[4]: # With graph model=CausalModel( data = df, treatment=data["treatment_name"], outcome=data["outcome_name"], graph=data["gml_graph"], instruments=data["instrument_names"], logging_level = logging.INFO ) # In[5]: model.view_model() # In[6]: from IPython.display import Image, display display(Image(filename="causal_model.png")) # We get a causal graph. Now identification and estimation is done. # In[7]: identified_estimand = model.identify_effect() print(identified_estimand) # ## Method 1: Regression # # Use linear regression. # In[8]: causal_estimate_reg = model.estimate_effect(identified_estimand, method_name="backdoor.linear_regression", test_significance=True) print(causal_estimate_reg) print("Causal Estimate is " + str(causal_estimate_reg.value)) # ## Method 2: Stratification # # We will be using propensity scores to stratify units in the data. # In[9]: causal_estimate_strat = model.estimate_effect(identified_estimand, method_name="backdoor.propensity_score_stratification") print(causal_estimate_strat) print("Causal Estimate is " + str(causal_estimate_strat.value)) # ## Method 3: Matching # # We will be using propensity scores to match units in the data. # In[10]: causal_estimate_match = model.estimate_effect(identified_estimand, method_name="backdoor.propensity_score_matching") print(causal_estimate_match) print("Causal Estimate is " + str(causal_estimate_match.value)) # ## Method 4: Weighting # # We will be using (inverse) propensity scores to assign weights to units in the data. # In[11]: causal_estimate_ipw = model.estimate_effect(identified_estimand, method_name="backdoor.propensity_score_weighting") print(causal_estimate_ipw) print("Causal Estimate is " + str(causal_estimate_ipw.value)) # ## Method 5: Instrumental Variable # # We will be using the Wald estimator for the provided instrumental variable. # In[12]: causal_estimate_iv = model.estimate_effect(identified_estimand, method_name="iv.instrumental_variable", method_params={'iv_instrument_name':'Z1'}) print(causal_estimate_iv) print("Causal Estimate is " + str(causal_estimate_iv.value)) # ## Method 6: Regression Discontinuity # # We will be internally converting this to an equivalent instrumental variables problem. # In[13]: causal_estimate_regdist = model.estimate_effect(identified_estimand, method_name="iv.regression_discontinuity", method_params={'rd_variable_name':'Z1', 'rd_threshold_value':0.5, 'rd_bandwidth': 0.1}) print(causal_estimate_regdist) print("Causal Estimate is " + str(causal_estimate_regdist.value))