import dowhy.datasets
from dowhy.do_samplers.kernel_density_sampler import KernelDensitySampler
from dowhy.do_why import CausalModel
from dowhy.api.causal_data_frame import CausalDataFrame
import numpy as np
import pandas as pd
from statsmodels.api import OLS
data = dowhy.datasets.linear_dataset(beta=5,
num_common_causes=1,
num_instruments = 0,
num_samples=1000,
treatment_is_binary=True)
data['dot_graph'] = 'digraph { v ->y;X0-> v;X0-> y;}'
df = data['df']
df['y'] = df['y'] + np.random.normal(size=len(df)) # without noise, the variance in Y|X, Z is zero, and mcmc fails.
cdf = CausalDataFrame(df)
cdf.causal.do(x={'v': 1},
variable_types={'v': 'b', 'y': 'c', 'X0': 'c'},
outcome='y',
method='mcmc',
common_causes=['X0'],
keep_original_treatment=True,
proceed_when_unidentifiable=True).groupby('v').mean().plot(y='y', kind='bar')
WARNING:dowhy.do_why:Causal Graph not provided. DoWhy will construct a graph based on data inputs.
INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'U', 'X0'}
INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]
['X0']
yes
{'observed': 'yes'}
Model to find the causal effect of treatment v on outcome y
{'label': 'Unobserved Confounders', 'observed': 'no'}
All common causes are observed. Causal effect can be identified.
McmcSampler
INFO:dowhy.do_sampler:Using McmcSampler for do sampling.
treatments ['v'] backdoor ['X0']
INFO:pymc3:Auto-assigning NUTS sampler... INFO:pymc3:Initializing NUTS using jitter+adapt_diag... INFO:pymc3:Multiprocess sampling (4 chains in 4 jobs) INFO:pymc3:NUTS: [y_sd, beta_y, v_sd, beta_v] Sampling 4 chains: 100%|██████████| 8000/8000 [00:05<00:00, 1550.20draws/s]
<matplotlib.axes._subplots.AxesSubplot at 0x7feaac626550>
cdf = CausalDataFrame(df)
cdf_1 = cdf.causal.do(x={'v': 1},
variable_types={'v': 'b', 'y': 'c', 'X0': 'c'},
outcome='y',
method='mcmc',
dot_graph=data['dot_graph'],
proceed_when_unidentifiable=True)
cdf_0 = cdf.causal.do(x={'v': 0},
variable_types={'v': 'b', 'y': 'c', 'X0': 'c'},
outcome='y',
method='mcmc',
dot_graph=data['dot_graph'],
proceed_when_unidentifiable=True,
use_previous_sampler=True)
INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'U', 'X0'}
INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]
INFO:dowhy.do_sampler:Using McmcSampler for do sampling.
Error: Pygraphviz cannot be loaded. No module named 'pygraphviz'
Trying pydot ...
['X0']
yes
{'observed': 'yes'}
Model to find the causal effect of treatment v on outcome y
{'label': 'Unobserved Confounders', 'observed': 'no'}
All common causes are observed. Causal effect can be identified.
McmcSampler
treatments ['v']
backdoor ['X0']
INFO:pymc3:Auto-assigning NUTS sampler...
INFO:pymc3:Initializing NUTS using jitter+adapt_diag...
INFO:pymc3:Multiprocess sampling (4 chains in 4 jobs)
INFO:pymc3:NUTS: [y_sd, beta_y, v_sd, beta_v]
Sampling 4 chains: 100%|██████████| 8000/8000 [00:05<00:00, 1479.09draws/s]
INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'U', 'X0'}
INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]
{'label': 'Unobserved Confounders', 'observed': 'no'}
All common causes are observed. Causal effect can be identified.
McmcSampler
cdf_0
| X0 | v | y | |
|---|---|---|---|
| 0 | -0.679571 | 0 | -0.540989 |
| 1 | -0.243537 | 0 | -0.816734 |
| 2 | -0.195101 | 0 | -0.420884 |
| 3 | 0.923288 | 0 | 0.543097 |
| 4 | 0.389773 | 0 | 2.000232 |
| 5 | 0.345340 | 0 | -0.235523 |
| 6 | 0.539989 | 0 | -0.191927 |
| 7 | 1.294383 | 0 | 1.841164 |
| 8 | -0.557656 | 0 | -1.179258 |
| 9 | -0.581319 | 0 | -1.585532 |
| 10 | 0.089578 | 0 | 0.371095 |
| 11 | 0.826961 | 0 | 3.333531 |
| 12 | -0.671221 | 0 | -2.539698 |
| 13 | 0.986791 | 0 | 1.801363 |
| 14 | 1.594109 | 0 | 2.040565 |
| 15 | -0.245527 | 0 | -0.289327 |
| 16 | 0.178187 | 0 | 0.141852 |
| 17 | 2.024924 | 0 | 2.980681 |
| 18 | -1.760241 | 0 | -3.197609 |
| 19 | 1.911770 | 0 | 4.160535 |
| 20 | 0.411533 | 0 | 0.891029 |
| 21 | -1.601890 | 0 | -3.900897 |
| 22 | -0.779602 | 0 | -3.024336 |
| 23 | 2.117118 | 0 | 3.186386 |
| 24 | 0.515388 | 0 | 1.042302 |
| 25 | 0.195795 | 0 | -1.043282 |
| 26 | 0.112839 | 0 | 0.106886 |
| 27 | 0.508712 | 0 | -0.138736 |
| 28 | 1.449329 | 0 | 2.797328 |
| 29 | 0.948285 | 0 | 2.310356 |
| ... | ... | ... | ... |
| 970 | 0.733012 | 0 | 2.388490 |
| 971 | -0.970542 | 0 | -2.013554 |
| 972 | 1.451770 | 0 | 3.385053 |
| 973 | 0.086223 | 0 | -0.224399 |
| 974 | 0.062156 | 0 | -0.295354 |
| 975 | 1.178053 | 0 | 1.891484 |
| 976 | 0.038045 | 0 | 0.491061 |
| 977 | -0.420454 | 0 | -1.158550 |
| 978 | 0.498168 | 0 | 0.916308 |
| 979 | 0.179320 | 0 | 1.385915 |
| 980 | -1.081955 | 0 | -4.169600 |
| 981 | 0.792726 | 0 | 0.799942 |
| 982 | 0.817159 | 0 | 1.660682 |
| 983 | 0.277124 | 0 | 0.915327 |
| 984 | 1.139441 | 0 | 2.971099 |
| 985 | 2.636305 | 0 | 4.528907 |
| 986 | 0.849081 | 0 | 0.664671 |
| 987 | 2.231701 | 0 | 4.656510 |
| 988 | 1.065030 | 0 | 2.355032 |
| 989 | -0.134438 | 0 | 0.298530 |
| 990 | -2.540774 | 0 | -5.619055 |
| 991 | -0.368138 | 0 | -0.208480 |
| 992 | 1.050256 | 0 | 1.603709 |
| 993 | 0.669631 | 0 | 0.961663 |
| 994 | -0.508734 | 0 | -0.771802 |
| 995 | -0.103255 | 0 | -0.251265 |
| 996 | -0.906700 | 0 | -2.711775 |
| 997 | 0.156403 | 0 | 0.602863 |
| 998 | -0.276539 | 0 | -0.986462 |
| 999 | 0.608260 | 0 | 0.644999 |
1000 rows × 3 columns
cdf_1
| X0 | v | y | |
|---|---|---|---|
| 0 | -0.679571 | 1 | 3.545042 |
| 1 | -0.243537 | 1 | 4.665001 |
| 2 | -0.195101 | 1 | 4.499220 |
| 3 | 0.923288 | 1 | 6.673522 |
| 4 | 0.389773 | 1 | 5.976617 |
| 5 | 0.345340 | 1 | 5.553557 |
| 6 | 0.539989 | 1 | 6.375131 |
| 7 | 1.294383 | 1 | 7.499560 |
| 8 | -0.557656 | 1 | 3.655596 |
| 9 | -0.581319 | 1 | 4.275826 |
| 10 | 0.089578 | 1 | 5.298058 |
| 11 | 0.826961 | 1 | 6.531107 |
| 12 | -0.671221 | 1 | 3.541304 |
| 13 | 0.986791 | 1 | 7.004788 |
| 14 | 1.594109 | 1 | 7.981339 |
| 15 | -0.245527 | 1 | 4.159227 |
| 16 | 0.178187 | 1 | 5.359598 |
| 17 | 2.024924 | 1 | 9.260993 |
| 18 | -1.760241 | 1 | 1.503151 |
| 19 | 1.911770 | 1 | 8.763278 |
| 20 | 0.411533 | 1 | 6.014290 |
| 21 | -1.601890 | 1 | 1.895632 |
| 22 | -0.779602 | 1 | 3.509766 |
| 23 | 2.117118 | 1 | 9.529310 |
| 24 | 0.515388 | 1 | 5.812637 |
| 25 | 0.195795 | 1 | 5.080119 |
| 26 | 0.112839 | 1 | 5.088389 |
| 27 | 0.508712 | 1 | 5.831991 |
| 28 | 1.449329 | 1 | 8.066816 |
| 29 | 0.948285 | 1 | 6.386133 |
| ... | ... | ... | ... |
| 970 | 0.733012 | 1 | 6.507144 |
| 971 | -0.970542 | 1 | 3.359742 |
| 972 | 1.451770 | 1 | 7.869123 |
| 973 | 0.086223 | 1 | 4.587526 |
| 974 | 0.062156 | 1 | 5.341651 |
| 975 | 1.178053 | 1 | 7.150807 |
| 976 | 0.038045 | 1 | 4.709112 |
| 977 | -0.420454 | 1 | 3.717059 |
| 978 | 0.498168 | 1 | 5.969144 |
| 979 | 0.179320 | 1 | 5.106763 |
| 980 | -1.081955 | 1 | 3.072383 |
| 981 | 0.792726 | 1 | 6.142553 |
| 982 | 0.817159 | 1 | 6.614257 |
| 983 | 0.277124 | 1 | 5.478525 |
| 984 | 1.139441 | 1 | 7.124206 |
| 985 | 2.636305 | 1 | 9.761611 |
| 986 | 0.849081 | 1 | 6.316136 |
| 987 | 2.231701 | 1 | 9.592429 |
| 988 | 1.065030 | 1 | 6.767247 |
| 989 | -0.134438 | 1 | 4.681228 |
| 990 | -2.540774 | 1 | 0.112361 |
| 991 | -0.368138 | 1 | 4.395120 |
| 992 | 1.050256 | 1 | 7.209847 |
| 993 | 0.669631 | 1 | 6.251246 |
| 994 | -0.508734 | 1 | 4.251908 |
| 995 | -0.103255 | 1 | 5.060815 |
| 996 | -0.906700 | 1 | 3.055053 |
| 997 | 0.156403 | 1 | 5.534543 |
| 998 | -0.276539 | 1 | 4.520962 |
| 999 | 0.608260 | 1 | 6.146062 |
1000 rows × 3 columns
cdf_1 = cdf.causal.do(x={'v': 1},
variable_types={'v': 'b', 'y': 'c', 'X0': 'c'},
outcome='y',
method='mcmc',
common_causes=['X0'],
proceed_when_unidentifiable=True,
use_previous_sampler=False)
cdf_0 = cdf.causal.do(x={'v': 0},
variable_types={'v': 'b', 'y': 'c', 'X0': 'c'},
outcome='y',
method='mcmc',
common_causes=['X0'],
proceed_when_unidentifiable=True,
use_previous_sampler=True)
WARNING:dowhy.do_why:Causal Graph not provided. DoWhy will construct a graph based on data inputs.
INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'U', 'X0'}
INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]
INFO:dowhy.do_sampler:Using McmcSampler for do sampling.
['X0']
yes
{'observed': 'yes'}
Model to find the causal effect of treatment v on outcome y
{'label': 'Unobserved Confounders', 'observed': 'no'}
All common causes are observed. Causal effect can be identified.
McmcSampler
treatments ['v']
backdoor ['X0']
INFO:pymc3:Auto-assigning NUTS sampler...
INFO:pymc3:Initializing NUTS using jitter+adapt_diag...
INFO:pymc3:Multiprocess sampling (4 chains in 4 jobs)
INFO:pymc3:NUTS: [y_sd, beta_y, v_sd, beta_v]
Sampling 4 chains: 100%|██████████| 8000/8000 [00:08<00:00, 919.62draws/s]
INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'U', 'X0'}
INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]
{'label': 'Unobserved Confounders', 'observed': 'no'}
All common causes are observed. Causal effect can be identified.
McmcSampler
(cdf_1['y'] - cdf_0['y']).mean()
1.96*(cdf_1['y'] - cdf_0['y']).std() / np.sqrt(len(cdf))
model = OLS(df['y'], df[['X0', 'v']])
result = model.fit()
result.summary()
cdf_1
cdf_0
cdf_do = cdf.causal.do(x={'v': 0},
variable_types={'v': 'b', 'y': 'c', 'X0': 'c'},
outcome='y',
method='mcmc',
common_causes=['X0'],
proceed_when_unidentifiable=True,
keep_original_treatment=True)
cdf_do