#!/usr/bin/env python # coding: utf-8 # # Refutation Notebook # The objective of this notebook is to compare the ability of refuters to detect the problems in a given set of estimators. # Note: # This notebook makes use of the optional dependencies: # - pygraphviz # - causalml # ## Import Dependencies # In[1]: from dowhy.datasets import linear_dataset from dowhy import CausalModel import causalml # ## Inspection Parameters # These parameters give us the option of inspecting the intermediate steps to sanity check the steps performed # In[2]: inspect_datasets = True inspect_models = True inspect_identified_estimands = True inspect_estimates = True inspect_refutations = True # ## Estimator List # We pass a list of strings, corresponding to the estimators of interest # In[3]: estimator_list = ["backdoor.propensity_score_matching", "backdoor.propensity_score_weighting", "backdoor.causalml.inference.meta.LRSRegressor"] method_params= [ None, None, { "init_params":{} } ] # ## Refuter List # A list of strings, corresponding to each refuter we wish to run # In[4]: refuter_list = ["bootstrap_refuter", "data_subset_refuter"] # ## Create the Datasets # In[5]: # Parameters for creating the Dataset TREATMENT_IS_BINARY = True BETA = 10 NUM_SAMPLES = 1000 NUM_CONFOUNDERS = 5 NUM_INSTRUMENTS = 3 NUM_EFFECT_MODIFIERS = 2 # Creating a Linear Dataset with the given parameters linear_data = linear_dataset( beta = BETA, num_common_causes = NUM_CONFOUNDERS, num_instruments = NUM_INSTRUMENTS, num_effect_modifiers = NUM_EFFECT_MODIFIERS, num_samples = NUM_SAMPLES, treatment_is_binary = True ) # Other datasets come here # Append them together in an array datasets = [linear_data] # ## Inspect Data # In[6]: dataset_num = 1 if inspect_datasets is True: for data in datasets: print("####### Dataset {}###########################################################################################".format(dataset_num)) print(data['df'].head()) print("#############################################################################################################") dataset_num += 1 # ## Create the CausalModels # In[7]: models = [] for data in datasets: model = CausalModel( data = data['df'], treatment = data['treatment_name'], outcome = data['outcome_name'], graph = data['gml_graph'] ) models.append(model) # ## Inspect Models # In[8]: model_num = 1 if inspect_models is True: for model in models: print("####### Model {}#############################################################################################".format(model_num)) print("Common Causes:",model._common_causes) print("Effect Modifiers:",model._effect_modifiers) print("Instruments:",model._instruments) print("Outcome:",model._outcome) print("Treatment:",model._treatment) print("#############################################################################################################") model_num += 1 # ## Identify Effect # In[9]: identified_estimands = [] for model in models: identified_estimand = model.identify_effect(proceed_when_unidentifiable=True) identified_estimands.append(identified_estimand) # ## Identified Estimands # In[10]: estimand_count = 1 for estimand in identified_estimands: print("####### Identified Estimand {}#####################################################################################".format(estimand_count)) print(estimand) print("###################################################################################################################") estimand_count += 1 # ## Estimate Effect # In[11]: estimate_list = [] for i in range(len(identified_estimands)): for j in range(len(estimator_list)): estimate = model.estimate_effect( identified_estimands[i], method_name=estimator_list[j], method_params=method_params[j] ) estimate_list.append(estimate) # ## Estimate Values # In[12]: estimand_count = 1 if inspect_estimates is True: for estimand in estimate_list: print("####### Estimand {}#######################################################################################".format(estimand_count)) print("*** Class Name ***") print() print(estimand.params['estimator_class']) print() print(estimand) print("########################################################################################################") print() estimand_count += 1 # ## Refute Estimate # In[13]: refutation_list = [] for estimand in identified_estimands: for estimate in estimate_list: for refuter in refuter_list: ref = model.refute_estimate(estimand, estimate,method_name=refuter) refutation_list.append(ref) # ## Refutation Values # In[14]: refuter_count = 1 if inspect_refutations is True: for refutation in refutation_list: print("####### Refutation {}#######################################################################################".format(refuter_count)) print("*** Class Name ***") print() print(refutation.refutation_type) print() print(refutation) print("########################################################################################################") print() refuter_count += 1 # In[ ]: # In[ ]: