import os
import warnings
base_path = os.path.abspath("../")
os.chdir(base_path)
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from causalml.inference.meta import BaseSRegressor, BaseTRegressor, BaseXRegressor, BaseRRegressor
from causalml.dataset.regression import synthetic_data
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import shap
import matplotlib.pyplot as plt
import time
from sklearn.inspection import permutation_importance
from sklearn.model_selection import train_test_split
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # for lightgbm to work
%reload_ext autoreload
%autoreload 2
%matplotlib inline
plt.style.use('fivethirtyeight')
n_features = 25
n_samples = 10000
y, X, w, tau, b, e = synthetic_data(mode=1, n=n_samples, p=n_features, sigma=0.5)
w_multi = np.array(['treatment_A' if x==1 else 'control' for x in w])
e_multi = {'treatment_A': e}
feature_names = ['stars', 'tiger', 'merciful', 'quixotic', 'fireman', 'dependent',
'shelf', 'touch', 'barbarous', 'clammy', 'playground', 'rain', 'offer',
'cute', 'future', 'damp', 'nonchalant', 'change', 'rigid', 'sweltering',
'eight', 'wrap', 'lethal', 'adhesive', 'lip'] # specify feature names
model_tau = LGBMRegressor(importance_type='gain') # specify model for model_tau
base_algo = LGBMRegressor()
# base_algo = XGBRegressor()
# base_algo = RandomForestRegressor()
# base_algo = LinearRegression()
slearner = BaseSRegressor(base_algo, control_name='control')
slearner.estimate_ate(X, w_multi, y)
array([0.57652638])
slearner_tau = slearner.fit_predict(X, w_multi, y)
auto)¶slearner.get_importance(X=X,
tau=slearner_tau,
normalize=True,
method='auto',
features=feature_names)
{'treatment_A': tiger 0.466394
stars 0.405692
quixotic 0.056295
merciful 0.044424
fireman 0.019247
touch 0.001384
clammy 0.000819
adhesive 0.000480
future 0.000400
damp 0.000373
rigid 0.000369
playground 0.000366
eight 0.000353
lethal 0.000352
barbarous 0.000350
cute 0.000339
sweltering 0.000317
dependent 0.000315
change 0.000315
nonchalant 0.000307
lip 0.000269
wrap 0.000257
shelf 0.000216
offer 0.000194
rain 0.000175
dtype: float64}
slearner.plot_importance(X=X,
tau=slearner_tau,
normalize=True,
method='auto',
features=feature_names)
permutation)¶slearner.get_importance(X=X,
tau=slearner_tau,
method='permutation',
features=feature_names,
random_state=42)
{'treatment_A': tiger 0.938470
stars 0.838396
quixotic 0.121493
merciful 0.086907
fireman 0.038455
touch 0.001722
dependent 0.000387
clammy 0.000204
adhesive 0.000197
playground 0.000175
barbarous 0.000065
rigid 0.000058
eight 0.000047
damp 0.000037
offer 0.000030
shelf 0.000024
cute 0.000021
lip 0.000020
rain 0.000010
future -0.000009
wrap -0.000045
change -0.000050
nonchalant -0.000055
sweltering -0.000115
lethal -0.000131
dtype: float64}
start_time = time.time()
slearner.get_importance(X=X,
tau=slearner_tau,
method='permutation',
features=feature_names,
random_state=42)
print("Elapsed time: %s seconds" % (time.time() - start_time))
Elapsed time: 0.6892201900482178 seconds
slearner.plot_importance(X=X,
tau=slearner_tau,
method='permutation',
features=feature_names,
random_state=42)
sklearn.inspection.permutation_importance)¶start_time = time.time()
X_train, X_test, y_train, y_test = train_test_split(X, slearner_tau, test_size=0.3, random_state=42)
model_tau_fit = model_tau.fit(X_train, y_train)
perm_imp_test = permutation_importance(
estimator=model_tau_fit,
X=X_test,
y=y_test,
random_state=42).importances_mean
pd.Series(perm_imp_test, feature_names).sort_values(ascending=False)
print("Elapsed time: %s seconds" % (time.time() - start_time))
Elapsed time: 0.6840920448303223 seconds
pd.Series(perm_imp_test, feature_names).sort_values(ascending=False)
tiger 0.938470 stars 0.838396 quixotic 0.121493 merciful 0.086907 fireman 0.038455 touch 0.001722 dependent 0.000387 clammy 0.000204 adhesive 0.000197 playground 0.000175 barbarous 0.000065 rigid 0.000058 eight 0.000047 damp 0.000037 offer 0.000030 shelf 0.000024 cute 0.000021 lip 0.000020 rain 0.000010 future -0.000009 wrap -0.000045 change -0.000050 nonchalant -0.000055 sweltering -0.000115 lethal -0.000131 dtype: float64
pd.Series(perm_imp_test, feature_names).sort_values().plot(kind='barh', figsize=(12, 8))
plt.title('Test Set Permutation Importances')
Text(0.5, 1.0, 'Test Set Permutation Importances')
perm_imp_train = permutation_importance(
estimator=model_tau_fit,
X=X_train,
y=y_train,
random_state=42).importances_mean
pd.Series(perm_imp_train, feature_names).sort_values(ascending=False)
tiger 0.977817 stars 0.867720 quixotic 0.125296 merciful 0.090889 fireman 0.041912 touch 0.003801 dependent 0.002164 clammy 0.002008 sweltering 0.001901 cute 0.001478 lethal 0.001260 wrap 0.001231 damp 0.001206 future 0.001201 rain 0.001031 shelf 0.000976 playground 0.000888 rigid 0.000821 adhesive 0.000813 nonchalant 0.000797 change 0.000791 barbarous 0.000769 offer 0.000719 eight 0.000719 lip 0.000619 dtype: float64
pd.Series(perm_imp_train, feature_names).sort_values().plot(kind='barh', figsize=(12, 8))
plt.title('Training Set Permutation Importances')
Text(0.5, 1.0, 'Training Set Permutation Importances')
shap_slearner = slearner.get_shap_values(X=X, tau=slearner_tau)
shap_slearner
{'treatment_A': array([[ 1.85965599e-02, 1.50524540e-01, -5.55596292e-03, ...,
4.48446501e-04, -1.95476029e-04, -3.42248564e-03],
[ 7.82080973e-02, -2.36391211e-01, 2.88678514e-02, ...,
-3.70192908e-04, -3.39569059e-04, 1.22111367e-05],
[ 9.04201505e-02, 1.59707950e-01, 1.73721268e-03, ...,
-1.37756957e-04, 1.77684213e-04, -4.46185989e-04],
...,
[-1.99286917e-01, -2.07402161e-01, -8.74781304e-02, ...,
2.49031036e-03, -1.01702522e-03, 2.86971644e-04],
[-1.07236468e-01, 1.09881401e-01, -3.54466649e-02, ...,
2.69856996e-05, -6.92309733e-04, -1.94350406e-04],
[-9.15393868e-02, 3.07062682e-01, 4.98609094e-02, ...,
-6.94895003e-05, 7.62465467e-04, -4.42911077e-05]])}
np.mean(np.abs(shap_slearner['treatment_A']),axis=0)
array([0.13324381, 0.13797313, 0.02693165, 0.02818332, 0.01622515,
0.00173129, 0.00043212, 0.00462018, 0.00072387, 0.00270952,
0.00052824, 0.00024711, 0.00063725, 0.00085535, 0.00139984,
0.00107961, 0.00083777, 0.00037895, 0.00063368, 0.00071085,
0.00044104, 0.00115953, 0.00061382, 0.00051472, 0.00046914])
# Plot shap values without specifying shap_dict
slearner.plot_shap_values(X=X, tau=slearner_tau, features=feature_names)
# Plot shap values WITH specifying shap_dict
slearner.plot_shap_values(shap_dict=shap_slearner)
# interaction_idx set to None (no color coding for interaction effects)
slearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx=1,
X=X,
tau=slearner_tau,
interaction_idx=None,
shap_dict=shap_slearner)
# interaction_idx set to 'auto' (searches for feature with greatest approximate interaction)
# specify feature names
slearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx='tiger',
X=X,
tau=slearner_tau,
interaction_idx='auto',
shap_dict=shap_slearner,
features=feature_names)
# interaction_idx set to specific index
slearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx=1,
X=X,
tau=slearner_tau,
interaction_idx=10,
shap_dict=shap_slearner,
features=feature_names)
tlearner = BaseTRegressor(LGBMRegressor(), control_name='control')
tlearner.estimate_ate(X, w_multi, y)
(array([0.57531904]), array([0.56053703]), array([0.59010105]))
tlearner_tau = tlearner.fit_predict(X, w_multi, y)
auto)¶tlearner.get_importance(X=X,
tau=tlearner_tau,
normalize=True,
method='auto',
features=feature_names)
{'treatment_A': tiger 0.367077
stars 0.298487
quixotic 0.043473
merciful 0.042342
clammy 0.024813
fireman 0.019719
future 0.016705
sweltering 0.016578
lip 0.012670
offer 0.012268
barbarous 0.011683
cute 0.011126
nonchalant 0.011074
change 0.011046
eight 0.010876
adhesive 0.010435
lethal 0.010043
damp 0.009813
dependent 0.009771
rigid 0.009429
shelf 0.009045
rain 0.009022
playground 0.008745
touch 0.008734
wrap 0.005027
dtype: float64}
tlearner.plot_importance(X=X,
tau=tlearner_tau,
normalize=True,
method='auto',
features=feature_names)
permutation)¶tlearner.get_importance(X=X,
tau=tlearner_tau,
method='permutation',
features=feature_names,
random_state=42)
{'treatment_A': tiger 0.573731
stars 0.458209
merciful 0.036041
quixotic 0.031509
clammy 0.016865
fireman 0.012998
sweltering 0.011995
future 0.008679
lip 0.007420
offer 0.006272
eight 0.005664
barbarous 0.005433
nonchalant 0.004996
cute 0.004840
rain 0.004593
change 0.004298
damp 0.004297
dependent 0.004202
shelf 0.004026
lethal 0.003419
touch 0.003273
playground 0.003240
adhesive 0.002401
wrap 0.001678
rigid 0.001169
dtype: float64}
tlearner.plot_importance(X=X,
tau=tlearner_tau,
method='permutation',
features=feature_names,
random_state=42)
sklearn.inspection.permutation_importance)¶start_time = time.time()
X_train, X_test, y_train, y_test = train_test_split(X, tlearner_tau, test_size=0.3, random_state=42)
model_tau_fit = model_tau.fit(X_train, y_train)
perm_imp_test = permutation_importance(
estimator=model_tau_fit,
X=X_test,
y=y_test,
random_state=42).importances_mean
pd.Series(perm_imp_test, feature_names).sort_values(ascending=False)
print("Elapsed time: %s seconds" % (time.time() - start_time))
Elapsed time: 0.6249639987945557 seconds
pd.Series(perm_imp_test, feature_names).sort_values(ascending=False)
tiger 0.573731 stars 0.458209 merciful 0.036041 quixotic 0.031509 clammy 0.016865 fireman 0.012998 sweltering 0.011995 future 0.008679 lip 0.007420 offer 0.006272 eight 0.005664 barbarous 0.005433 nonchalant 0.004996 cute 0.004840 rain 0.004593 change 0.004298 damp 0.004297 dependent 0.004202 shelf 0.004026 lethal 0.003419 touch 0.003273 playground 0.003240 adhesive 0.002401 wrap 0.001678 rigid 0.001169 dtype: float64
pd.Series(perm_imp_test, feature_names).sort_values().plot(kind='barh', figsize=(12, 8))
plt.title('Test Set Permutation Importances')
Text(0.5, 1.0, 'Test Set Permutation Importances')
shap_tlearner = tlearner.get_shap_values(X=X, tau=tlearner_tau)
shap_tlearner
{'treatment_A': array([[-6.89269243e-03, 1.40591415e-01, 3.75455659e-03, ...,
2.64532860e-03, -3.88182312e-03, -7.73279543e-02],
[ 8.36307028e-02, -2.46364371e-01, 1.09908459e-02, ...,
-3.66398303e-03, -3.97734276e-03, 2.56495888e-03],
[ 9.97879884e-02, 1.95474118e-01, -1.57834193e-03, ...,
3.40626462e-03, -4.85671565e-04, -3.62959648e-03],
...,
[-2.15946344e-01, -1.71168984e-01, -4.55286764e-02, ...,
2.98283440e-04, 5.74751489e-03, 1.81083170e-02],
[-8.73306751e-02, 6.44263380e-02, -2.14271236e-02, ...,
-5.15141325e-03, -3.66708335e-03, 2.43046195e-03],
[-1.39019231e-01, 4.61741741e-01, 2.16542925e-02, ...,
-2.78400171e-03, 9.78260008e-03, -3.80018010e-03]])}
# Plot shap values without specifying shap_dict
tlearner.plot_shap_values(X=X, tau=tlearner_tau, features=feature_names)
# Plot shap values WITH specifying shap_dict
tlearner.plot_shap_values(shap_dict=shap_tlearner)
xlearner = BaseXRegressor(LGBMRegressor(), control_name='control')
xlearner.estimate_ate(X, e_multi, w_multi, y)
(array([0.53204758]), array([0.51807698]), array([0.54601818]))
xlearner_tau = xlearner.predict(X, e_multi, w_multi, y)
auto)¶xlearner.get_importance(X=X,
tau=xlearner_tau,
normalize=True,
method='auto',
features=feature_names)
{'treatment_A': tiger 0.445398
stars 0.350588
clammy 0.024404
merciful 0.016463
eight 0.013235
quixotic 0.012893
sweltering 0.012551
future 0.011300
touch 0.010797
nonchalant 0.010058
offer 0.009685
barbarous 0.009395
rain 0.007844
dependent 0.006987
fireman 0.006955
lip 0.006931
cute 0.006901
shelf 0.005902
adhesive 0.005882
playground 0.005878
rigid 0.005297
lethal 0.004445
damp 0.004319
change 0.003006
wrap 0.002885
dtype: float64}
xlearner.plot_importance(X=X,
tau=xlearner_tau,
normalize=True,
method='auto',
features=feature_names)
permutation)¶xlearner.get_importance(X=X,
tau=xlearner_tau,
method='permutation',
features=feature_names,
random_state=42)
{'treatment_A': tiger 0.837614
stars 0.621791
clammy 0.035555
merciful 0.018321
quixotic 0.015277
eight 0.013299
sweltering 0.013252
touch 0.012349
nonchalant 0.011482
barbarous 0.011398
future 0.010558
offer 0.009084
lip 0.008267
cute 0.008004
rain 0.006863
dependent 0.006616
fireman 0.004964
rigid 0.004650
adhesive 0.004461
shelf 0.004408
playground 0.004342
damp 0.003833
lethal 0.003050
change 0.002094
wrap 0.001763
dtype: float64}
xlearner.plot_importance(X=X,
tau=xlearner_tau,
method='permutation',
features=feature_names,
random_state=42)
sklearn.inspection.permutation_importance)¶start_time = time.time()
X_train, X_test, y_train, y_test = train_test_split(X, xlearner_tau, test_size=0.3, random_state=42)
model_tau_fit = model_tau.fit(X_train, y_train)
perm_imp_test = permutation_importance(
estimator=model_tau_fit,
X=X_test,
y=y_test,
random_state=42).importances_mean
pd.Series(perm_imp_test, feature_names).sort_values(ascending=False)
print("Elapsed time: %s seconds" % (time.time() - start_time))
Elapsed time: 0.7079129219055176 seconds
pd.Series(perm_imp_test, feature_names).sort_values(ascending=False)
tiger 0.837614 stars 0.621791 clammy 0.035555 merciful 0.018321 quixotic 0.015277 eight 0.013299 sweltering 0.013252 touch 0.012349 nonchalant 0.011482 barbarous 0.011398 future 0.010558 offer 0.009084 lip 0.008267 cute 0.008004 rain 0.006863 dependent 0.006616 fireman 0.004964 rigid 0.004650 adhesive 0.004461 shelf 0.004408 playground 0.004342 damp 0.003833 lethal 0.003050 change 0.002094 wrap 0.001763 dtype: float64
pd.Series(perm_imp_test, feature_names).sort_values().plot(kind='barh', figsize=(12, 8))
plt.title('Test Set Permutation Importances')
Text(0.5, 1.0, 'Test Set Permutation Importances')
shap_xlearner = xlearner.get_shap_values(X=X, tau=xlearner_tau)
shap_xlearner
{'treatment_A': array([[-0.03485554, 0.15415286, 0.0107788 , ..., 0.00560224,
-0.00262616, -0.03822558],
[ 0.08514976, -0.21604149, 0.00567841, ..., -0.00452231,
-0.00203289, 0.00115042],
[ 0.09064029, 0.1790326 , 0.00532089, ..., -0.00343718,
0.00138747, -0.0012849 ],
...,
[-0.1830646 , -0.13441459, -0.02101109, ..., 0.0104924 ,
0.0032898 , 0.01186523],
[-0.09311651, 0.01577386, -0.0243432 , ..., -0.00377513,
-0.00081169, -0.00084607],
[-0.12160697, 0.38320337, 0.01422979, ..., -0.00248256,
0.0035844 , -0.0038958 ]])}
# shap_dict not specified
xlearner.plot_shap_values(X=X, tau=xlearner_tau, features=feature_names)
# shap_dict specified
xlearner.plot_shap_values(shap_dict=shap_xlearner)
rlearner = BaseRRegressor(LGBMRegressor(), control_name='control')
rlearner_tau = rlearner.fit_predict(X, e_multi, w_multi, y)
auto)¶rlearner.get_importance(X=X,
tau=rlearner_tau,
normalize=True,
method='auto',
features=feature_names)
{'treatment_A': tiger 0.265782
stars 0.183487
clammy 0.050748
sweltering 0.033953
lip 0.032178
merciful 0.030900
future 0.030895
adhesive 0.028763
quixotic 0.027381
offer 0.026122
rain 0.024693
cute 0.024653
eight 0.022138
touch 0.021480
rigid 0.020678
damp 0.020620
playground 0.018939
wrap 0.018196
barbarous 0.018088
dependent 0.017933
change 0.017835
shelf 0.016420
fireman 0.016152
nonchalant 0.016051
lethal 0.015914
dtype: float64}
rlearner.plot_importance(X=X,
tau=rlearner_tau,
method='auto',
features=feature_names)
permutation)¶rlearner.get_importance(X=X,
tau=rlearner_tau,
method='permutation',
features=feature_names,
random_state=42)
{'treatment_A': tiger 0.380958
stars 0.243383
clammy 0.044792
sweltering 0.028370
lip 0.028354
future 0.025996
merciful 0.023391
offer 0.022393
adhesive 0.017629
quixotic 0.017390
eight 0.015068
cute 0.015019
touch 0.012206
fireman 0.010353
rain 0.009351
dependent 0.008820
playground 0.008726
nonchalant 0.008371
barbarous 0.007561
shelf 0.007027
lethal 0.006839
change 0.006731
wrap 0.005803
rigid 0.004959
damp 0.004754
dtype: float64}
rlearner.plot_importance(X=X,
tau=rlearner_tau,
method='permutation',
features=feature_names,
random_state=42)
sklearn.inspection.permutation_importance)¶start_time = time.time()
X_train, X_test, y_train, y_test = train_test_split(X, rlearner_tau, test_size=0.3, random_state=42)
model_tau_fit = model_tau.fit(X_train, y_train)
perm_imp_test = permutation_importance(
estimator=model_tau_fit,
X=X_test,
y=y_test,
random_state=42).importances_mean
pd.Series(perm_imp_test, feature_names).sort_values(ascending=False)
print("Elapsed time: %s seconds" % (time.time() - start_time))
Elapsed time: 0.747636079788208 seconds
pd.Series(perm_imp_test, feature_names).sort_values(ascending=False)
tiger 0.380958 stars 0.243383 clammy 0.044792 sweltering 0.028370 lip 0.028354 future 0.025996 merciful 0.023391 offer 0.022393 adhesive 0.017629 quixotic 0.017390 eight 0.015068 cute 0.015019 touch 0.012206 fireman 0.010353 rain 0.009351 dependent 0.008820 playground 0.008726 nonchalant 0.008371 barbarous 0.007561 shelf 0.007027 lethal 0.006839 change 0.006731 wrap 0.005803 rigid 0.004959 damp 0.004754 dtype: float64
pd.Series(perm_imp_test, feature_names).sort_values().plot(kind='barh', figsize=(12, 8))
plt.title('Test Set Permutation Importances')
Text(0.5, 1.0, 'Test Set Permutation Importances')
shap_rlearner = rlearner.get_shap_values(X=X, tau=rlearner_tau)
shap_rlearner
{'treatment_A': array([[-9.48568815e-03, 1.65557909e-01, 9.65632978e-03, ...,
6.62268061e-05, -5.09259836e-03, -1.59912241e-01],
[ 6.55774283e-02, -2.04946202e-01, 1.18588613e-02, ...,
-7.01146719e-03, -8.74117130e-03, 2.30650966e-03],
[ 8.02179069e-02, 1.56186874e-01, 8.55430443e-03, ...,
-7.90951441e-03, 6.48777490e-04, -1.13225597e-02],
...,
[-1.58315449e-01, -1.40714473e-01, -6.53059181e-02, ...,
-2.63096323e-03, -1.70126307e-02, 4.59584091e-02],
[-7.90786868e-02, 1.03010179e-02, -3.98414379e-02, ...,
-7.10201485e-03, -3.18090224e-03, 2.14761644e-03],
[-1.20000199e-01, 4.60188357e-01, 2.63232980e-02, ...,
-1.89560835e-02, 1.38353213e-02, 9.37838351e-03]])}
# without providing shap_dict
rlearner.plot_shap_values(X=X, tau=rlearner_tau, features=feature_names)
# with providing shap_dict
rlearner.plot_shap_values(shap_dict=shap_rlearner)