#!/usr/bin/env python # coding: utf-8 # In[1]: import os import warnings base_path = os.path.abspath("../") os.chdir(base_path) warnings.filterwarnings('ignore') # In[5]: get_ipython().system('pip install pydotplus') # In[6]: import pandas as pd import numpy as np import matplotlib.pyplot as plt from causalml.inference.meta import BaseSRegressor, BaseTRegressor, BaseXRegressor, BaseRRegressor from causalml.dataset.regression import synthetic_data from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor from sklearn.tree import DecisionTreeRegressor from xgboost import XGBRegressor from lightgbm import LGBMRegressor import eli5 import shap import matplotlib.pyplot as plt os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # for lightgbm to work get_ipython().run_line_magic('reload_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') get_ipython().run_line_magic('matplotlib', 'inline') # In[7]: plt.style.use('fivethirtyeight') # In[8]: n_features = 25 n_samples = 10000 y, X, w, tau, b, e = synthetic_data(mode=1, n=n_samples, p=n_features, sigma=0.5) # In[9]: w_multi = np.array(['treatment_A' if x==1 else 'control' for x in w]) e_multi = {'treatment_A': e} # ## S Learner # In[10]: base_algo = LGBMRegressor() # base_algo = XGBRegressor() # base_algo = RandomForestRegressor() # base_algo = LinearRegression() slearner = BaseSRegressor(base_algo, control_name='control') slearner.estimate_ate(X, w_multi, y) # In[11]: slearner_tau = slearner.fit_predict(X, w_multi, y) # ### Feature Importance (method = `auto`) # In[29]: feature_names = ['stars', 'tiger', 'merciful', 'quixotic', 'fireman', 'dependent', 'shelf', 'touch', 'barbarous', 'clammy', 'playground', 'rain', 'offer', 'cute', 'future', 'damp', 'nonchalant', 'change', 'rigid', 'sweltering', 'eight', 'wrap', 'lethal', 'adhesive', 'lip'] # specify feature names model_tau_feature = RandomForestRegressor() # specify model for model_tau_feature slearner.get_importance(X=X, tau=slearner_tau, model_tau_feature=model_tau_feature, normalize=True, method='auto', features=feature_names) # In[28]: slearner.plot_importance(X=X, tau=slearner_tau, normalize=True, method='auto') # ### Feature Importance (method = `permutation`) # In[30]: slearner.get_importance(X=X, tau=slearner_tau, method='permutation') # In[31]: slearner.plot_importance(X=X, tau=slearner_tau, method='permutation') # ### Shapley Values # In[32]: shap_slearner = slearner.get_shap_values(X=X, tau=slearner_tau) shap_slearner # In[33]: np.mean(np.abs(shap_slearner['treatment_A']),axis=0) # In[34]: # Plot shap values without specifying shap_dict slearner.plot_shap_values(X=X, tau=slearner_tau) # In[35]: # Plot shap values WITH specifying shap_dict slearner.plot_shap_values(shap_dict=shap_slearner) # In[36]: # interaction_idx set to None (no color coding for interaction effects) slearner.plot_shap_dependence(treatment_group='treatment_A', feature_idx=1, X=X, tau=slearner_tau, interaction_idx=None, shap_dict=shap_slearner) # In[37]: # interaction_idx set to 'auto' (searches for feature with greatest approximate interaction) slearner.plot_shap_dependence(treatment_group='treatment_A', feature_idx=1, X=X, tau=slearner_tau, interaction_idx='auto') # In[38]: # interaction_idx set to 'auto' (searches for feature with greatest approximate interaction) # specify feature names slearner.plot_shap_dependence(treatment_group='treatment_A', feature_idx='tiger', X=X, tau=slearner_tau, interaction_idx='auto', shap_dict=shap_slearner, features=feature_names) # In[39]: # interaction_idx set to specific index slearner.plot_shap_dependence(treatment_group='treatment_A', feature_idx=1, X=X, tau=slearner_tau, interaction_idx=10, shap_dict=shap_slearner) # ## T Learner # In[40]: tlearner = BaseTRegressor(LGBMRegressor(), control_name='control') tlearner.estimate_ate(X, w_multi, y) # In[41]: tlearner_tau = tlearner.fit_predict(X, w_multi, y) # ### Feature Importance (method = `auto`) # In[43]: tlearner.get_importance(X=X, tau=tlearner_tau, normalize=True, method='auto') # In[44]: tlearner.plot_importance(X=X, tau=tlearner_tau, normalize=True, method='auto') # ### Feature Importance (method = `permutation`) # In[46]: tlearner.get_importance(X=X, tau=tlearner_tau, method='permutation') # In[47]: tlearner.plot_importance(X=X, tau=tlearner_tau, method='permutation') # ### Shapley Values # In[48]: shap_tlearner = tlearner.get_shap_values(X=X, tau=tlearner_tau) shap_tlearner # In[49]: # Plot shap values without specifying shap_dict tlearner.plot_shap_values(X=X, tau=tlearner_tau) # In[50]: # Plot shap values WITH specifying shap_dict tlearner.plot_shap_values(shap_dict=shap_tlearner) # In[51]: # interaction_idx set to None (no color coding for interaction effects) tlearner.plot_shap_dependence(treatment_group='treatment_A', feature_idx=3, X=X, tau=tlearner_tau, interaction_idx=None, shap_dict=shap_tlearner) # In[52]: # interaction_idx set to 'auto' (searches for feature with greatest approximate interaction) tlearner.plot_shap_dependence(treatment_group='treatment_A', feature_idx=3, X=X, tau=tlearner_tau, interaction_idx='auto', shap_dict=shap_tlearner) # In[53]: # interaction_idx set to specific index tlearner.plot_shap_dependence(treatment_group='treatment_A', feature_idx=3, X=X, tau=tlearner_tau, interaction_idx=10, shap_dict=shap_tlearner) # ## X Learner # In[54]: xlearner = BaseXRegressor(LGBMRegressor(), control_name='control') xlearner.estimate_ate(X, e_multi, w_multi, y) # In[55]: xlearner_tau = xlearner.predict(X, e_multi, w_multi, y) # ### Feature Importance (method = `auto`) # In[56]: xlearner.get_importance(X=X, tau=xlearner_tau, normalize=True, method='auto') # In[57]: xlearner.plot_importance(X=X, tau=xlearner_tau, normalize=True, method='auto') # ### Feature Importance (method = `permutation`) # In[58]: xlearner.get_importance(X=X, tau=xlearner_tau, method='permutation') # In[59]: xlearner.plot_importance(X=X, tau=xlearner_tau, method='permutation') # ### Shapley Values # In[60]: shap_xlearner = xlearner.get_shap_values(X=X, tau=xlearner_tau) shap_xlearner # In[61]: # shap_dict not specified xlearner.plot_shap_values(X=X, tau=xlearner_tau) # In[62]: # shap_dict specified xlearner.plot_shap_values(shap_dict=shap_xlearner) # In[63]: # interaction_idx set to None (no color coding for interaction effects) xlearner.plot_shap_dependence(treatment_group='treatment_A', feature_idx=0, X=X, tau=xlearner_tau, interaction_idx=None, shap_dict=shap_xlearner) # In[64]: # interaction_idx set to 'auto' (searches for feature with greatest approximate interaction) xlearner.plot_shap_dependence(treatment_group='treatment_A', feature_idx=0, X=X, tau=xlearner_tau, interaction_idx='auto', shap_dict=shap_xlearner) # In[65]: # interaction_idx set to specific index xlearner.plot_shap_dependence(treatment_group='treatment_A', feature_idx=0, X=X, tau=xlearner_tau, interaction_idx=10, shap_dict=shap_xlearner) # ## R Learner # In[102]: rlearner = BaseRRegressor(LGBMRegressor(importance_type='gain'), control_name='control') rlearner_tau = rlearner.fit_predict(X, e_multi, w_multi, y) # ### Feature Importance (method = `auto`) # In[104]: rlearner.get_importance(X=X, tau=rlearner_tau, normalize=True, method='auto') # In[105]: rlearner.plot_importance(X=X, tau=rlearner_tau, method='auto') # ### Feature Importance (method = `permutation`) # In[106]: rlearner.get_importance(X=X, tau=rlearner_tau, method='permutation') # In[107]: rlearner.plot_importance(X=X, tau=rlearner_tau, method='permutation') # ### Shapley Values # In[108]: shap_rlearner = rlearner.get_shap_values(X=X, tau=rlearner_tau) shap_rlearner # In[109]: # without providing shap_dict rlearner.plot_shap_values(X=X, tau=rlearner_tau) # In[110]: # with providing shap_dict rlearner.plot_shap_values(shap_dict=shap_rlearner) # In[111]: # interaction_idx set to None (no color coding for interaction effects) rlearner.plot_shap_dependence(treatment_group='treatment_A', feature_idx=0, X=X, tau=rlearner_tau, interaction_idx=None, shap_dict=shap_rlearner) # In[112]: # interaction_idx set to 'auto' (searches for feature with greatest approximate interaction) rlearner.plot_shap_dependence(treatment_group='treatment_A', feature_idx=0, X=X, tau=rlearner_tau, interaction_idx='auto', shap_dict=shap_rlearner) # In[113]: # interaction_idx set to specific index rlearner.plot_shap_dependence(treatment_group='treatment_A', feature_idx=1, X=X, tau=rlearner_tau, interaction_idx=20, shap_dict=shap_rlearner) # In[ ]: