import os
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from causalml.inference.meta import BaseSRegressor, BaseTRegressor, BaseXRegressor, BaseRRegressor
from causalml.dataset.regression import synthetic_data
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import shap
import matplotlib.pyplot as plt
import time
from sklearn.inspection import permutation_importance
from sklearn.model_selection import train_test_split
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # for lightgbm to work
%reload_ext autoreload
%autoreload 2
%matplotlib inline
plt.style.use('fivethirtyeight')
n_features = 25
n_samples = 10000
y, X, w, tau, b, e = synthetic_data(mode=1, n=n_samples, p=n_features, sigma=0.5)
w_multi = np.array(['treatment_A' if x==1 else 'control' for x in w])
e_multi = {'treatment_A': e}
feature_names = ['stars', 'tiger', 'merciful', 'quixotic', 'fireman', 'dependent',
'shelf', 'touch', 'barbarous', 'clammy', 'playground', 'rain', 'offer',
'cute', 'future', 'damp', 'nonchalant', 'change', 'rigid', 'sweltering',
'eight', 'wrap', 'lethal', 'adhesive', 'lip'] # specify feature names
model_tau = LGBMRegressor(importance_type='gain') # specify model for model_tau
base_algo = LGBMRegressor()
# base_algo = XGBRegressor()
# base_algo = RandomForestRegressor()
# base_algo = LinearRegression()
slearner = BaseSRegressor(base_algo, control_name='control')
slearner.estimate_ate(X, w_multi, y)
array([0.580055])
slearner_tau = slearner.fit_predict(X, w_multi, y)
auto)¶slearner.get_importance(X=X,
tau=slearner_tau,
normalize=True,
method='auto',
features=feature_names)
{'treatment_A': tiger 0.444429
stars 0.403077
quixotic 0.075380
merciful 0.035531
fireman 0.034485
touch 0.001058
nonchalant 0.000617
cute 0.000483
barbarous 0.000423
wrap 0.000414
lethal 0.000410
shelf 0.000403
rigid 0.000358
change 0.000352
sweltering 0.000299
damp 0.000288
lip 0.000279
rain 0.000278
clammy 0.000241
playground 0.000240
eight 0.000219
dependent 0.000198
future 0.000198
adhesive 0.000189
offer 0.000149
dtype: float64}
slearner.plot_importance(X=X,
tau=slearner_tau,
normalize=True,
method='auto',
features=feature_names)
permutation)¶slearner.get_importance(X=X,
tau=slearner_tau,
method='permutation',
features=feature_names,
random_state=42)
{'treatment_A': tiger 0.926826
stars 0.890637
quixotic 0.152119
fireman 0.067351
merciful 0.065483
touch 0.000955
cute 0.000355
wrap 0.000243
nonchalant 0.000172
shelf 0.000122
change 0.000102
playground 0.000094
offer 0.000068
sweltering 0.000068
lethal 0.000040
future 0.000020
adhesive 0.000014
lip 0.000012
rigid -0.000012
barbarous -0.000013
dependent -0.000021
damp -0.000025
rain -0.000047
eight -0.000060
clammy -0.000266
dtype: float64}
start_time = time.time()
slearner.get_importance(X=X,
tau=slearner_tau,
method='permutation',
features=feature_names,
random_state=42)
print("Elapsed time: %s seconds" % (time.time() - start_time))
Elapsed time: 0.7701990604400635 seconds
slearner.plot_importance(X=X,
tau=slearner_tau,
method='permutation',
features=feature_names,
random_state=42)
sklearn.inspection.permutation_importance)¶start_time = time.time()
X_train, X_test, y_train, y_test = train_test_split(X, slearner_tau, test_size=0.3, random_state=42)
model_tau_fit = model_tau.fit(X_train, y_train)
perm_imp_test = permutation_importance(
estimator=model_tau_fit,
X=X_test,
y=y_test,
random_state=42).importances_mean
pd.Series(perm_imp_test, feature_names).sort_values(ascending=False)
print("Elapsed time: %s seconds" % (time.time() - start_time))
Elapsed time: 0.6549179553985596 seconds
pd.Series(perm_imp_test, feature_names).sort_values(ascending=False)
tiger 0.926826 stars 0.890637 quixotic 0.152119 fireman 0.067351 merciful 0.065483 touch 0.000955 cute 0.000355 wrap 0.000243 nonchalant 0.000172 shelf 0.000122 change 0.000102 playground 0.000094 offer 0.000068 sweltering 0.000068 lethal 0.000040 future 0.000020 adhesive 0.000014 lip 0.000012 rigid -0.000012 barbarous -0.000013 dependent -0.000021 damp -0.000025 rain -0.000047 eight -0.000060 clammy -0.000266 dtype: float64
pd.Series(perm_imp_test, feature_names).sort_values().plot(kind='barh', figsize=(12, 8))
plt.title('Test Set Permutation Importances')
Text(0.5, 1.0, 'Test Set Permutation Importances')
perm_imp_train = permutation_importance(
estimator=model_tau_fit,
X=X_train,
y=y_train,
random_state=42).importances_mean
pd.Series(perm_imp_train, feature_names).sort_values(ascending=False)
tiger 0.932121 stars 0.914533 quixotic 0.150311 fireman 0.070200 merciful 0.068532 change 0.002079 touch 0.002075 wrap 0.001549 barbarous 0.001534 nonchalant 0.001475 lethal 0.001265 shelf 0.001234 cute 0.001149 lip 0.001064 eight 0.001002 clammy 0.000985 damp 0.000899 sweltering 0.000822 playground 0.000816 rain 0.000799 adhesive 0.000706 offer 0.000696 rigid 0.000646 dependent 0.000599 future 0.000571 dtype: float64
pd.Series(perm_imp_train, feature_names).sort_values().plot(kind='barh', figsize=(12, 8))
plt.title('Training Set Permutation Importances')
Text(0.5, 1.0, 'Training Set Permutation Importances')
shap_slearner = slearner.get_shap_values(X=X, tau=slearner_tau)
shap_slearner
{'treatment_A': array([[ 1.63713247e-01, -1.22875268e-01, 1.29921780e-02, ...,
4.77270166e-04, 8.78739522e-05, 4.47276414e-04],
[ 1.07458801e-01, 1.68045837e-01, -5.38344956e-02, ...,
1.48115054e-04, -4.79806857e-04, 3.88284913e-04],
[ 1.52838059e-01, 4.27213750e-02, -2.98603942e-02, ...,
-1.93728476e-03, 4.20645535e-04, 1.67141631e-04],
...,
[ 2.10088221e-01, 2.58206832e-01, 1.36663409e-02, ...,
-2.33767724e-05, -5.13345087e-05, -1.09709454e-04],
[ 1.43487246e-01, -2.19807727e-01, 1.57069559e-02, ...,
9.23352141e-05, 8.12460244e-05, 2.27814358e-04],
[ 1.39723722e-01, 9.35043735e-02, 2.40665358e-02, ...,
-1.16384901e-05, 1.65545219e-04, 1.08785894e-04]])}
np.mean(np.abs(shap_slearner['treatment_A']),axis=0)
array([0.13124822, 0.13219818, 0.02059032, 0.03700333, 0.01741701,
0.00032722, 0.00081558, 0.00268428, 0.00085818, 0.00024359,
0.00090127, 0.00081984, 0.00034884, 0.00067194, 0.00082475,
0.00068499, 0.0007342 , 0.00130267, 0.00042108, 0.00045903,
0.00049161, 0.00090679, 0.00049228, 0.00043198, 0.00056552])
# Plot shap values without specifying shap_dict
slearner.plot_shap_values(X=X, tau=slearner_tau, features=feature_names)
# Plot shap values WITH specifying shap_dict
slearner.plot_shap_values(shap_dict=shap_slearner)
# interaction_idx set to None (no color coding for interaction effects)
slearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx=1,
X=X,
tau=slearner_tau,
interaction_idx=None,
shap_dict=shap_slearner)
# interaction_idx set to 'auto' (searches for feature with greatest approximate interaction)
# specify feature names
slearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx='tiger',
X=X,
tau=slearner_tau,
interaction_idx='auto',
shap_dict=shap_slearner,
features=feature_names)
# interaction_idx set to specific index
slearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx=1,
X=X,
tau=slearner_tau,
interaction_idx=10,
shap_dict=shap_slearner,
features=feature_names)
tlearner = BaseTRegressor(LGBMRegressor(), control_name='control')
tlearner.estimate_ate(X, w_multi, y)
(array([0.58065275]), array([0.56617032]), array([0.59513517]))
tlearner_tau = tlearner.fit_predict(X, w_multi, y)
auto)¶tlearner.get_importance(X=X,
tau=tlearner_tau,
normalize=True,
method='auto',
features=feature_names)
{'treatment_A': tiger 0.340037
stars 0.294991
quixotic 0.053629
merciful 0.051621
fireman 0.035279
change 0.021800
touch 0.018210
eight 0.013965
barbarous 0.013783
nonchalant 0.013619
cute 0.013341
wrap 0.013082
dependent 0.012694
offer 0.010826
clammy 0.010642
adhesive 0.009580
rain 0.009102
lethal 0.009055
sweltering 0.008511
future 0.008493
playground 0.008492
rigid 0.008428
damp 0.007731
shelf 0.007184
lip 0.005906
dtype: float64}
tlearner.plot_importance(X=X,
tau=tlearner_tau,
normalize=True,
method='auto',
features=feature_names)
permutation)¶tlearner.get_importance(X=X,
tau=tlearner_tau,
method='permutation',
features=feature_names,
random_state=42)
{'treatment_A': tiger 0.542145
stars 0.467867
merciful 0.052725
quixotic 0.052160
fireman 0.028023
change 0.016190
touch 0.010718
nonchalant 0.008626
eight 0.007869
dependent 0.007420
wrap 0.006913
offer 0.005693
clammy 0.005449
playground 0.005034
lethal 0.004923
cute 0.004634
future 0.004429
sweltering 0.004285
barbarous 0.004236
shelf 0.003853
damp 0.003703
rain 0.003322
adhesive 0.002478
lip 0.001463
rigid -0.000175
dtype: float64}
tlearner.plot_importance(X=X,
tau=tlearner_tau,
method='permutation',
features=feature_names,
random_state=42)
sklearn.inspection.permutation_importance)¶start_time = time.time()
X_train, X_test, y_train, y_test = train_test_split(X, tlearner_tau, test_size=0.3, random_state=42)
model_tau_fit = model_tau.fit(X_train, y_train)
perm_imp_test = permutation_importance(
estimator=model_tau_fit,
X=X_test,
y=y_test,
random_state=42).importances_mean
pd.Series(perm_imp_test, feature_names).sort_values(ascending=False)
print("Elapsed time: %s seconds" % (time.time() - start_time))
Elapsed time: 2.6981277465820312 seconds
pd.Series(perm_imp_test, feature_names).sort_values(ascending=False)
tiger 0.542145 stars 0.467867 merciful 0.052725 quixotic 0.052160 fireman 0.028023 change 0.016190 touch 0.010718 nonchalant 0.008626 eight 0.007869 dependent 0.007420 wrap 0.006913 offer 0.005693 clammy 0.005449 playground 0.005034 lethal 0.004923 cute 0.004634 future 0.004429 sweltering 0.004285 barbarous 0.004236 shelf 0.003853 damp 0.003703 rain 0.003322 adhesive 0.002478 lip 0.001463 rigid -0.000175 dtype: float64
pd.Series(perm_imp_test, feature_names).sort_values().plot(kind='barh', figsize=(12, 8))
plt.title('Test Set Permutation Importances')
Text(0.5, 1.0, 'Test Set Permutation Importances')
shap_tlearner = tlearner.get_shap_values(X=X, tau=tlearner_tau)
shap_tlearner
{'treatment_A': array([[ 2.23895567e-01, -6.98016830e-02, 6.66056009e-02, ...,
3.42849501e-05, -4.89155636e-03, -1.94508501e-03],
[ 8.29988012e-02, 1.83765218e-01, -5.90301935e-02, ...,
1.09675786e-03, 2.33757682e-03, -1.79593092e-03],
[ 2.33977934e-01, 4.80451533e-02, 5.10472659e-03, ...,
-5.19230274e-03, 1.69935484e-02, -1.86346890e-03],
...,
[ 2.23726810e-01, 2.83999125e-01, 1.05957214e-02, ...,
-4.40564369e-03, 3.63104522e-03, -7.79595271e-04],
[ 1.82598989e-01, -1.86008617e-01, 2.21095145e-02, ...,
-2.08611140e-03, 9.31929067e-03, 4.51260084e-03],
[ 8.28411269e-02, 4.95341098e-02, 2.08908666e-02, ...,
-1.20010459e-04, -2.08475022e-03, -2.78406599e-03]])}
# Plot shap values without specifying shap_dict
tlearner.plot_shap_values(X=X, tau=tlearner_tau, features=feature_names)
# Plot shap values WITH specifying shap_dict
tlearner.plot_shap_values(shap_dict=shap_tlearner)
xlearner = BaseXRegressor(LGBMRegressor(), control_name='control')
xlearner.estimate_ate(X, w_multi, y, p=e_multi)
(array([0.52635073]), array([0.51271831]), array([0.53998316]))
xlearner_tau = xlearner.predict(X, w_multi, y, p=e_multi)
auto)¶xlearner.get_importance(X=X,
tau=xlearner_tau,
normalize=True,
method='auto',
features=feature_names)
{'treatment_A': tiger 0.413157
stars 0.359466
merciful 0.029364
change 0.021216
quixotic 0.017347
touch 0.016293
fireman 0.013874
eight 0.013647
adhesive 0.013040
barbarous 0.011566
damp 0.009279
clammy 0.008334
cute 0.008301
wrap 0.008288
dependent 0.007396
playground 0.006204
nonchalant 0.006183
rigid 0.006077
shelf 0.005574
sweltering 0.005481
offer 0.005114
lethal 0.005065
rain 0.003906
lip 0.003356
future 0.002472
dtype: float64}
xlearner.plot_importance(X=X,
tau=xlearner_tau,
normalize=True,
method='auto',
features=feature_names)
permutation)¶xlearner.get_importance(X=X,
tau=xlearner_tau,
method='permutation',
features=feature_names,
random_state=42)
{'treatment_A': tiger 0.790118
stars 0.657454
merciful 0.031689
quixotic 0.026451
change 0.024444
touch 0.019540
adhesive 0.012644
fireman 0.011609
eight 0.010880
damp 0.008791
barbarous 0.008573
nonchalant 0.007776
wrap 0.006515
dependent 0.005716
playground 0.005185
cute 0.004942
clammy 0.004570
rigid 0.004291
sweltering 0.003877
lethal 0.003752
shelf 0.003285
offer 0.002548
lip 0.001531
rain 0.000856
future 0.000621
dtype: float64}
xlearner.plot_importance(X=X,
tau=xlearner_tau,
method='permutation',
features=feature_names,
random_state=42)
sklearn.inspection.permutation_importance)¶start_time = time.time()
X_train, X_test, y_train, y_test = train_test_split(X, xlearner_tau, test_size=0.3, random_state=42)
model_tau_fit = model_tau.fit(X_train, y_train)
perm_imp_test = permutation_importance(
estimator=model_tau_fit,
X=X_test,
y=y_test,
random_state=42).importances_mean
pd.Series(perm_imp_test, feature_names).sort_values(ascending=False)
print("Elapsed time: %s seconds" % (time.time() - start_time))
Elapsed time: 1.9017090797424316 seconds
pd.Series(perm_imp_test, feature_names).sort_values(ascending=False)
tiger 0.790118 stars 0.657454 merciful 0.031689 quixotic 0.026451 change 0.024444 touch 0.019540 adhesive 0.012644 fireman 0.011609 eight 0.010880 damp 0.008791 barbarous 0.008573 nonchalant 0.007776 wrap 0.006515 dependent 0.005716 playground 0.005185 cute 0.004942 clammy 0.004570 rigid 0.004291 sweltering 0.003877 lethal 0.003752 shelf 0.003285 offer 0.002548 lip 0.001531 rain 0.000856 future 0.000621 dtype: float64
pd.Series(perm_imp_test, feature_names).sort_values().plot(kind='barh', figsize=(12, 8))
plt.title('Test Set Permutation Importances')
Text(0.5, 1.0, 'Test Set Permutation Importances')
shap_xlearner = xlearner.get_shap_values(X=X, tau=xlearner_tau)
shap_xlearner
{'treatment_A': array([[ 0.12076316, -0.05792336, 0.02906702, ..., 0.00665948,
-0.01039702, -0.00043187],
[ 0.03947314, 0.13257951, -0.0352313 , ..., 0.00135135,
-0.00162266, -0.00127716],
[ 0.13415539, 0.00655198, 0.00531122, ..., -0.00137835,
0.0283614 , -0.00483927],
...,
[ 0.23418565, 0.26383921, 0.00296239, ..., 0.0003731 ,
-0.00863325, -0.00110156],
[ 0.19065257, -0.14832322, 0.0125272 , ..., 0.00320624,
0.02214994, 0.00257151],
[ 0.05736145, 0.04083792, 0.01270311, ..., 0.00256949,
-0.00833282, 0.00071824]])}
# shap_dict not specified
xlearner.plot_shap_values(X=X, tau=xlearner_tau, features=feature_names)
# shap_dict specified
xlearner.plot_shap_values(shap_dict=shap_xlearner)
rlearner = BaseRRegressor(LGBMRegressor(), control_name='control')
rlearner_tau = rlearner.fit_predict(X, w_multi, y, p=e_multi)
auto)¶rlearner.get_importance(X=X,
tau=rlearner_tau,
normalize=True,
method='auto',
features=feature_names)
{'treatment_A': tiger 0.236119
stars 0.187231
merciful 0.048726
change 0.043941
touch 0.042460
playground 0.039960
cute 0.037151
offer 0.028047
barbarous 0.027472
sweltering 0.026418
adhesive 0.024631
clammy 0.022468
wrap 0.022425
quixotic 0.021160
nonchalant 0.021017
dependent 0.020474
lethal 0.019601
fireman 0.019547
lip 0.018586
damp 0.017167
shelf 0.017109
eight 0.016680
rigid 0.016609
future 0.012659
rain 0.012342
dtype: float64}
rlearner.plot_importance(X=X,
tau=rlearner_tau,
method='auto',
features=feature_names)
permutation)¶rlearner.get_importance(X=X,
tau=rlearner_tau,
method='permutation',
features=feature_names,
random_state=42)
{'treatment_A': tiger 0.334549
stars 0.246910
merciful 0.039518
change 0.037940
touch 0.031415
playground 0.029544
cute 0.024985
adhesive 0.016695
sweltering 0.014847
barbarous 0.014077
nonchalant 0.011086
wrap 0.010270
fireman 0.010028
dependent 0.009341
quixotic 0.008749
rain 0.008382
lethal 0.008209
offer 0.007923
clammy 0.006927
lip 0.006815
shelf 0.006428
rigid 0.005010
eight 0.004826
damp 0.003911
future 0.003565
dtype: float64}
rlearner.plot_importance(X=X,
tau=rlearner_tau,
method='permutation',
features=feature_names,
random_state=42)
sklearn.inspection.permutation_importance)¶start_time = time.time()
X_train, X_test, y_train, y_test = train_test_split(X, rlearner_tau, test_size=0.3, random_state=42)
model_tau_fit = model_tau.fit(X_train, y_train)
perm_imp_test = permutation_importance(
estimator=model_tau_fit,
X=X_test,
y=y_test,
random_state=42).importances_mean
pd.Series(perm_imp_test, feature_names).sort_values(ascending=False)
print("Elapsed time: %s seconds" % (time.time() - start_time))
Elapsed time: 1.03749418258667 seconds
pd.Series(perm_imp_test, feature_names).sort_values(ascending=False)
tiger 0.334549 stars 0.246910 merciful 0.039518 change 0.037940 touch 0.031415 playground 0.029544 cute 0.024985 adhesive 0.016695 sweltering 0.014847 barbarous 0.014077 nonchalant 0.011086 wrap 0.010270 fireman 0.010028 dependent 0.009341 quixotic 0.008749 rain 0.008382 lethal 0.008209 offer 0.007923 clammy 0.006927 lip 0.006815 shelf 0.006428 rigid 0.005010 eight 0.004826 damp 0.003911 future 0.003565 dtype: float64
pd.Series(perm_imp_test, feature_names).sort_values().plot(kind='barh', figsize=(12, 8))
plt.title('Test Set Permutation Importances')
Text(0.5, 1.0, 'Test Set Permutation Importances')
shap_rlearner = rlearner.get_shap_values(X=X, tau=rlearner_tau)
shap_rlearner
{'treatment_A': array([[ 1.02894775e-01, -8.05449817e-02, 3.04355138e-02, ...,
9.06141245e-03, -2.65595398e-02, -7.18443444e-03],
[ 9.37017074e-03, 1.42539004e-01, -7.26851156e-02, ...,
-1.61938371e-03, -2.10699106e-03, -3.47964030e-03],
[ 1.78765111e-01, -8.01589569e-02, 4.60271178e-02, ...,
6.87169631e-03, 3.64934038e-02, -1.65044199e-03],
...,
[ 2.12922006e-01, 3.25138109e-01, 1.60017459e-02, ...,
9.47591720e-03, 1.82383527e-03, -1.77858376e-03],
[ 1.02421810e-01, -1.18504799e-01, 4.53390392e-03, ...,
6.80744483e-03, 1.47400244e-02, 2.87801715e-03],
[ 4.23080488e-02, 3.25438480e-02, 1.86015561e-02, ...,
1.82458857e-03, -2.34528945e-03, 2.89855741e-04]])}
# without providing shap_dict
rlearner.plot_shap_values(X=X, tau=rlearner_tau, features=feature_names)
# with providing shap_dict
rlearner.plot_shap_values(shap_dict=shap_rlearner)