The data show the length of remission in weeks for two groups of leukemia patients, treated and control, and were analyzed by Cox in his original proportional hazards paper. The data are available in a file containing three columns:
from lifelines.estimation import KaplanMeierFitter, NelsonAalenFitter
import pandas as pd
import numpy as np
%pylab inline
figsize(12.5,6)
Populating the interactive namespace from numpy and matplotlib
/Users/camerondavidson-pilon/.virtualenvs/data/lib/python2.7/site-packages/IPython/core/magics/pylab.py:161: UserWarning: pylab import has clobbered these variables: ['datetime'] `%matplotlib` prevents importing * from pylab and numpy "\n`%matplotlib` prevents importing * from pylab and numpy"
data = pd.read_csv('../lifelines/datasets/gehan.dat', sep="\s{1,3}",
header=None)
data.columns = ['treatment', 'time', 'failure']
/Users/camerondavidson-pilon/.virtualenvs/data/lib/python2.7/site-packages/ipykernel/__main__.py:2: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'. from ipykernel import kernelapp as app
data.tail()
| treatment | time | failure | |
|---|---|---|---|
| 37 | 1 | 12 | 1 |
| 38 | 1 | 15 | 1 |
| 39 | 1 | 17 | 1 |
| 40 | 1 | 22 | 1 |
| 41 | 1 | 23 | 1 |
data = data.dropna()
print data.head()
treatment time failure 0 2 6 1 1 2 6 1 2 2 6 1 3 2 6 0 4 2 7 1
treatment = data['treatment'] == 2
T = data['time']
E = data['failure']
t = np.linspace(0,40,150)
kmf = KaplanMeierFitter()
kmf.fit(T[treatment], timeline=t, event_observed=E[treatment], label='With treatment')
ax = kmf.plot()
kmf.fit(T[~treatment], timeline=t, event_observed=E[~treatment], label="Without treatment")
kmf.plot(ax=ax)
ylim(0,1.05)
(0, 1.05)
naf = NelsonAalenFitter()
naf.fit(T[treatment],timeline=t, event_observed=E[treatment], label="With treatment")
ax = naf.cumulative_hazard_.plot()
naf.fit(T[~treatment], timeline=t, event_observed=E[~treatment], label="Without treatment")
ax = naf.cumulative_hazard_.plot(ax=ax)
from lifelines import AalenAdditiveFitter
aaf = AalenAdditiveFitter()
aaf.fit(data, duration_col='time', event_col='failure', timeline=t)
[-----------------100%-----------------] 30 of 30 complete in 0.0 sec
<lifelines.AalenAdditiveFitter: fitted with 42 observations, 12 censored>
aaf.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x111606790>
from lifelines import CoxPHFitter
cp = CoxPHFitter()
cp.fit(data, duration_col='time', event_col='failure')
<lifelines.CoxPHFitter: fitted with 42 observations, 12 censored>
cp.print_summary()
n=42, number of events=30
coef exp(coef) se(coef) z p lower 0.95 upper 0.95
treatment -1.5092 0.2211 0.4096 -3.6849 0.0002 -2.3121 -0.7063 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Concordance = 0.690