#!/usr/bin/env python
# coding: utf-8
#
#
# # Lifelines examples
# ##### Gehan Survival Dataset
#
# The data show the length of remission in weeks for two groups of leukemia patients, treated and control, and were analyzed by Cox in his original proportional hazards paper. The data are available in a file containing three columns:
#
# - Treatment: coded Treated (drug) or Control (placebo),
# - Time: weeks of remission,
# - Failure: coded 1 if a failure (relapse), 0 if censored
# In[1]:
from lifelines.estimation import KaplanMeierFitter, NelsonAalenFitter
import pandas as pd
import numpy as np
get_ipython().run_line_magic('pylab', 'inline')
figsize(12.5,6)
# In[34]:
data = pd.read_csv('../lifelines/datasets/gehan.dat', sep="\s{1,3}",
header=None)
data.columns = ['treatment', 'time', 'failure']
# In[35]:
data.tail()
# In[36]:
data = data.dropna()
print data.head()
# In[37]:
treatment = data['treatment'] == 2
# In[38]:
T = data['time']
E = data['failure']
t = np.linspace(0,40,150)
# In[ ]:
# In[39]:
kmf = KaplanMeierFitter()
kmf.fit(T[treatment], timeline=t, event_observed=E[treatment], label='With treatment')
ax = kmf.plot()
kmf.fit(T[~treatment], timeline=t, event_observed=E[~treatment], label="Without treatment")
kmf.plot(ax=ax)
ylim(0,1.05)
# ### Hazards, too
# In[40]:
naf = NelsonAalenFitter()
# In[41]:
naf.fit(T[treatment],timeline=t, event_observed=E[treatment], label="With treatment")
ax = naf.cumulative_hazard_.plot()
naf.fit(T[~treatment], timeline=t, event_observed=E[~treatment], label="Without treatment")
ax = naf.cumulative_hazard_.plot(ax=ax)
# ### Try regression models
# In[42]:
from lifelines import AalenAdditiveFitter
# In[43]:
aaf = AalenAdditiveFitter()
aaf.fit(data, duration_col='time', event_col='failure', timeline=t)
# In[49]:
aaf.plot()
# In[45]:
from lifelines import CoxPHFitter
# In[46]:
cp = CoxPHFitter()
cp.fit(data, duration_col='time', event_col='failure')
# In[47]:
cp.print_summary()
# In[ ]: