#!/usr/bin/env python
# coding: utf-8
#
#
# # Lifelines examples
# ##### Gehan Survival Dataset
#
# The data show the length of remission in weeks for two groups of leukemia patients, treated and control, and were analyzed by Cox in his original proportional hazards paper. The data are available in a file containing three columns:
#
# - Treatment: coded Treated (drug) or Control (placebo),
# - Time: weeks of remission,
# - Failure: coded 1 if a failure (relapse), 0 if censored
#
# Thus, the third and fourth observations, 6 and 6+, corresponding to a death and a censored observation at six weeks, are coded 6, 1 and 6, 0, respectively.
# In[3]:
from lifelines.estimation import KaplanMeierFitter, NelsonAalenFitter
import pandas as pd
import numpy as np
get_ipython().run_line_magic('pylab', 'inline')
figsize(12.5,6)
# In[1]:
data = pd.read_csv('../lifelines/datasets/gehan.dat', sep="\s{1,3}", header=None)
# In[2]:
data.tail()
# In[3]:
data = data.dropna()
print data.head()
# In[15]:
treatment = data[0] == 2
# In[19]:
T = data[1]
E = data[2]
t = np.linspace(0,40,150)
# In[ ]:
# In[20]:
kmf = KaplanMeierFitter()
kmf.fit(T[treatment], timeline=t, event_observed=E[treatment], label='With treatment')
ax = kmf.plot()
kmf.fit(T[~treatment], timeline=t, event_observed=E[~treatment], label="Without treatment")
kmf.plot(ax=ax, c="r")
ylim(0,1.05)
# ### Hazards, too
# In[21]:
naf = NelsonAalenFitter()
# In[23]:
naf.fit(T[treatment],timeline=t, event_observed=E[treatment], label="With treatment")
ax = naf.cumulative_hazard_.plot()
naf.fit(T[~treatment], timeline=t, event_observed=E[~treatment], label="Without treatment")
ax = naf.cumulative_hazard_.plot(ax=ax)
# ### Try a more complicated model?
# In[26]:
from lifelines import AalenAdditiveFitter
# In[34]:
aaf = AalenAdditiveFitter()
aaf.fit(data, duration_col=1, event_col=2, timeline=t)
# In[35]:
aaf.cumulative_hazards_.plot()
# In[ ]: