Notebook

In [1]:

from dowhy import CausalModel
import dowhy.datasets
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

import logging
#logging.getLogger("dowhy").setLevel(logging.INFO)
np.random.seed(25)
dowhy.__version__

Out[1]:

'0.4'

In [2]:

data = dowhy.datasets.linear_dataset(10, num_common_causes=2, num_samples=100000,
                                     num_instruments=0, num_effect_modifiers=2,
                                     num_treatments=1,
                                     treatment_is_binary=True,
                                    outcome_is_binary=False)
df = data['df']
print(df.head())
print(data)

         X0        X1        W0        W1     v0          y
0 -0.690365 -1.110144 -0.099337 -0.426628  False  -0.491223
1  0.417015 -0.511456 -0.216640 -0.057772   True   8.814807
2  0.223920  0.426649  0.120333  2.002458   True  12.894260
3  0.854081 -0.545430 -1.312982  1.033137   True   6.092524
4  0.008900  0.277964 -0.180486 -0.067758   True  10.150629
{'df':              X0        X1        W0        W1     v0          y
0     -0.690365 -1.110144 -0.099337 -0.426628  False  -0.491223
1      0.417015 -0.511456 -0.216640 -0.057772   True   8.814807
2      0.223920  0.426649  0.120333  2.002458   True  12.894260
3      0.854081 -0.545430 -1.312982  1.033137   True   6.092524
4      0.008900  0.277964 -0.180486 -0.067758   True  10.150629
...         ...       ...       ...       ...    ...        ...
99995  1.496265  0.613054  0.713475 -0.788403  False   2.482054
99996  1.555262  0.013880  1.995508  0.566159   True  22.096263
99997 -0.814917  0.254763  0.919172 -0.694613  False   3.284822
99998  0.554770  1.331817  0.001694  0.521115   True  15.661477
99999  0.396213  0.100599  2.093892 -1.172198  False   7.594258

[100000 rows x 6 columns], 'treatment_name': ['v0'], 'outcome_name': 'y', 'common_causes_names': ['W0', 'W1'], 'instrument_names': [], 'effect_modifier_names': ['X0', 'X1'], 'dot_graph': 'digraph { U[label="Unobserved Confounders"]; U->y;v0->y; U->v0;W0-> v0; W1-> v0;W0-> y; W1-> y;X0-> y; X1-> y;}', 'gml_graph': 'graph[directed 1node[ id "y" label "y"]node[ id "Unobserved Confounders" label "Unobserved Confounders"]edge[source "Unobserved Confounders" target "y"]node[ id "W0" label "W0"] node[ id "W1" label "W1"]node[ id "v0" label "v0"]edge[source "v0" target "y"]edge[source "Unobserved Confounders" target "v0"]edge[ source "W0" target "v0"] edge[ source "W1" target "v0"]edge[ source "W0" target "y"] edge[ source "W1" target "y"]node[ id "X0" label "X0"] edge[ source "X0" target "y"] node[ id "X1" label "X1"] edge[ source "X1" target "y"]]', 'ate': 11.759597003791514}

In [3]:

model = CausalModel(df,
                    data["treatment_name"],data["outcome_name"],
                    data["dot_graph"],
                   missing_nodes_as_confounders=True,
                   logging_level=logging.INFO)

print("CC", model._common_causes)
print("EM", model._effect_modifiers)
model.view_model()
from IPython.display import Image, display
display(Image(filename="causal_model.png"))

INFO:dowhy.causal_model:Model to find the causal effect of treatment ['v0'] on outcome ['y']

CC ['U', 'W1', 'W0']
EM ['X1', 'X0']

In [4]:

logging.getLogger("causal_estimar")

Out[4]:

<Logger causal_estimar (INFO)>

In [5]:

identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
import time
start=time.time()
causal_estimate = model.estimate_effect(identified_estimand,
                                        method_name="backdoor.linear_regression",
                                       confidence_intervals=False,
                                       test_significance=False,
                                        method_params = {
                                            'num_null_simulations':10,
                                            'num_simulations':10,
                                            'num_quantiles_to_discretize_cont_cols':10,
                                            'fit_method': "statsmodels",
                                            'need_conditional_estimates':False
                                        },
                                       )
end=time.time()
print(end-start)
#print(causal_estimate.estimator._linear_model.summary())
#print(causal_estimate)

INFO:dowhy.causal_identifier:Common causes of treatment and outcome:['U', 'W1', 'W0']
WARNING:dowhy.causal_identifier:If this is observed data (not from a randomized experiment), there might always be missing confounders. Causal effect cannot be identified perfectly.
INFO:dowhy.causal_identifier:Continuing by ignoring these unobserved confounders because proceed_when_unidentifiable flag is True.
INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]
INFO:dowhy.causal_estimator:INFO: Using Linear Regression Estimator
INFO:dowhy.causal_estimator:b: y~v0+W1+W0+v0*X1+v0*X0

5.589203357696533

In [6]:

causal_estimate.test_stat_significance(num_simulations=2)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-6-a8010fa4491b> in <module>
----> 1 causal_estimate.test_stat_significance(num_simulations=2)

/mnt/c/Users/amit_/code/dowhy/dowhy/causal_estimator.py in test_stat_significance(self, method, **kwargs)
    693         signif_results = self.estimator.test_significance(self.value,
    694                 method=method,
--> 695                 **kwargs)
    696         return {'p_value': signif_results["p_value"]}
    697 

/mnt/c/Users/amit_/code/dowhy/dowhy/causal_estimator.py in test_significance(self, estimate_value, method, **kwargs)
    536         if method == "default" or method is True: # user has not provided any method
    537             try:
--> 538                 signif_dict = self._test_significance(estimate_value, method, **kwargs)
    539             except NotImplementedError:
    540                 signif_dict = self._test_significance_with_bootstrap(estimate_value, **kwargs)

TypeError: _test_significance() got an unexpected keyword argument 'num_simulations'

In [ ]:

print(causal_estimate.estimate_conditional_effects(effect_modifiers=["X1" ]).head(20))

In [ ]:

from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LassoCV
from sklearn.ensemble import GradientBoostingRegressor
dml_estimate = model.estimate_effect(identified_estimand, method_name="backdoor.econml.dml.DMLCateEstimator",
                                     control_value = 0,
                                     treatment_value = 1,
                                 target_units = "ate",  # condition used for CATE
                                 confidence_intervals=False,
                                method_params={"init_params":{'model_y':GradientBoostingRegressor(),
                                                              'model_t': GradientBoostingRegressor(),
                                                              "model_final":LassoCV(), 
                                                              'featurizer':PolynomialFeatures(degree=1, include_bias=True)},
                                               "fit_params":{}})
print(dml_estimate)

In [ ]:

print(causal_estimate.test_stat_significance(method="bootstrap", num_null_simulations=2))
print(causal_estimate.test_stat_significance())
causal_estimate.test_stat_significance()

In [ ]:

print(causal_estimate.get_standard_error(method="bootstrap", num_ci_simulations=10, sample_size_fraction=0.9))

In [ ]:

print(causal_estimate.get_confidence_intervals(confidence_level=0.99))
print(causal_estimate.get_confidence_intervals(method="bootstrap", confidence_level=0.95, num_ci_simulations=10))
print(causal_estimate.get_confidence_intervals(method="psm", confidence_level=0.95, num_ci_simulations=10))

In [15]:

str(model.refute_estimate(identified_estimand, causal_estimate, method_name="placebo_treatment_refuter", num_simulations=1))

INFO:dowhy.causal_refuters.placebo_treatment_refuter:Refutation over 1 simulated datasets of Random Data treatment
INFO:dowhy.causal_refuters.placebo_treatment_refuter:Using a Binomial Distribution with 1 trials and 0.5 probability of success
INFO:dowhy.causal_estimator:INFO: Using Linear Regression Estimator
INFO:dowhy.causal_estimator:b: y~placebo+W1+W0+placebo*X1+placebo*X0
WARNING:dowhy.causal_refuters.placebo_treatment_refuter:We assume a Normal Distribution as the sample has less than 100 examples.
                 Note: The underlying distribution may not be Normal. We assume that it approaches normal with the increase in sample size.

Out[15]:

'Refute: Use a Placebo Treatment\nEstimated effect:11.759597003788148\nNew effect:0.0185251357744729\np value:0.0\n'

In [ ]:

import statsmodels.formula.api as smf
res=smf.ols(formula="y~v0+W0+W1+W2+W3", data=df).fit()
res.summary()

In [ ]:

lm = causal_estimate.estimator._linear_model
print(lm.pvalues)
print(type(lm.conf_int()))
print(lm.bse)
lm.summary()

In [ ]:

print(causal_estimate.get_confidence_intervals(confidence_level =0.9))

In [ ]:

print(causal_estimate.get_standard_error())

In [ ]:

print(causal_estimate.test_stat_significance())

In [ ]: