#!/usr/bin/env python # coding: utf-8 # In[1]: from dowhy import CausalModel import dowhy.datasets import numpy as np import pandas as pd import warnings warnings.filterwarnings('ignore') import logging #logging.getLogger("dowhy").setLevel(logging.INFO) np.random.seed(25) dowhy.__version__ # In[2]: data = dowhy.datasets.linear_dataset(10, num_common_causes=2, num_samples=100000, num_instruments=0, num_effect_modifiers=2, num_treatments=1, treatment_is_binary=True, outcome_is_binary=False) df = data['df'] print(df.head()) print(data) # In[3]: model = CausalModel(df, data["treatment_name"],data["outcome_name"], data["dot_graph"], missing_nodes_as_confounders=True, logging_level=logging.INFO) print("CC", model._common_causes) print("EM", model._effect_modifiers) model.view_model() from IPython.display import Image, display display(Image(filename="causal_model.png")) # In[4]: logging.getLogger("causal_estimar") # In[5]: identified_estimand = model.identify_effect(proceed_when_unidentifiable=True) import time start=time.time() causal_estimate = model.estimate_effect(identified_estimand, method_name="backdoor.linear_regression", confidence_intervals=False, test_significance=False, method_params = { 'num_null_simulations':10, 'num_simulations':10, 'num_quantiles_to_discretize_cont_cols':10, 'fit_method': "statsmodels", 'need_conditional_estimates':False }, ) end=time.time() print(end-start) #print(causal_estimate.estimator._linear_model.summary()) #print(causal_estimate) # In[6]: causal_estimate.test_stat_significance(num_simulations=2) # In[ ]: print(causal_estimate.estimate_conditional_effects(effect_modifiers=["X1" ]).head(20)) # In[ ]: from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LassoCV from sklearn.ensemble import GradientBoostingRegressor dml_estimate = model.estimate_effect(identified_estimand, method_name="backdoor.econml.dml.DMLCateEstimator", control_value = 0, treatment_value = 1, target_units = "ate", # condition used for CATE confidence_intervals=False, method_params={"init_params":{'model_y':GradientBoostingRegressor(), 'model_t': GradientBoostingRegressor(), "model_final":LassoCV(), 'featurizer':PolynomialFeatures(degree=1, include_bias=True)}, "fit_params":{}}) print(dml_estimate) # In[ ]: print(causal_estimate.test_stat_significance(method="bootstrap", num_null_simulations=2)) print(causal_estimate.test_stat_significance()) causal_estimate.test_stat_significance() # In[ ]: print(causal_estimate.get_standard_error(method="bootstrap", num_ci_simulations=10, sample_size_fraction=0.9)) # In[ ]: print(causal_estimate.get_confidence_intervals(confidence_level=0.99)) print(causal_estimate.get_confidence_intervals(method="bootstrap", confidence_level=0.95, num_ci_simulations=10)) print(causal_estimate.get_confidence_intervals(method="psm", confidence_level=0.95, num_ci_simulations=10)) # In[15]: str(model.refute_estimate(identified_estimand, causal_estimate, method_name="placebo_treatment_refuter", num_simulations=1)) # In[ ]: import statsmodels.formula.api as smf res=smf.ols(formula="y~v0+W0+W1+W2+W3", data=df).fit() res.summary() # In[ ]: lm = causal_estimate.estimator._linear_model print(lm.pvalues) print(type(lm.conf_int())) print(lm.bse) lm.summary() # In[ ]: print(causal_estimate.get_confidence_intervals(confidence_level =0.9)) # In[ ]: print(causal_estimate.get_standard_error()) # In[ ]: print(causal_estimate.test_stat_significance()) # In[ ]: