#!/usr/bin/env python # coding: utf-8 # Section 4. from "Piecewise exponential models for survival data with covariates" by Michael Friedman # In[23]: from lifelines.datasets import load_lupus df = load_lupus() # preprocessing T_col = 'time_between_diagnosis_and_last_observation_(years)' E_col = 'dead' df['time_elapsed_between_estimated_onset_and_diagnosis_binary'] = df['time_elapsed_between_estimated_onset_and_diagnosis_(months)'] <= 2*12 df['recent'] = df['year_month_of_diagnosis'] < '1951-07' columns = ['is_male', 'is_white', 'age_at_diagnosis', 'time_elapsed_between_estimated_onset_and_diagnosis_binary', 'recent'] + [T_col, E_col] df = df[columns] df = df.dropna() # drop the individual with NaN # these models can naturally handle 0 durations, so we fudge a bit. df.loc[df[T_col] == 0, T_col] = 0.000001 # add a constant column (only needed for non-Cox models.) df['constant'] = 1. # In[24]: from lifelines import PiecewiseExponentialRegressionFitter, CoxPHFitter breakpoints = [ [0.5, 0.8, 1.1, 1.7, 2.5, 3.1], [0.3, 0.8, 1.0, 2.0, 3.0], [0.4, 0.9, 1.5, 2.5], [0.3, 1.0, 2.0, 3.0], [0.4], [0.3] ] results = dict() for i, bp in enumerate(breakpoints, start=1): # by forcing the penalizer to be 1000, the coefs between periods are constrainted to be identical. pcf = PiecewiseExponentialRegressionFitter(penalizer=1000., breakpoints=bp) pcf.fit(df, T_col, E_col) # Note the negative sign. We use a different parameterization than the paper. results["model %d" % i] = -pcf.params_['lambda_0_'].drop('constant') cph = CoxPHFitter().fit(df.drop('constant', axis=1), T_col, E_col) results['Cox'] = cph.params_ # In[25]: results = pd.DataFrame(results).T # In[26]: results # In[27]: pcf = PiecewiseExponentialRegressionFitter(penalizer=1000., breakpoints=breakpoints[0]) pcf.fit(df, T_col, E_col) pcf.plot_covariate_groups(['is_male', 'is_white'], [[1, 1], [1,0], [0, 1], [0, 0]], figsize=(10, 6)) # In[ ]: # In[ ]: