#!/usr/bin/env python # coding: utf-8 # Let's try generalizing our model now. For survival analysis, it is very useful to think about cumulative hazards - that is, modelling the cumulative hazard is typically easiest. # In[1]: # Note: we are shifting the data 4 units to the right. T = (np.random.exponential(size=1000)/1.5) ** 2.3 + 4 E = np.random.binomial(1, 0.95, size=1000) # In[2]: from autograd import numpy as np def cumulative_hazard(params, t): lambda_, rho_, mu = params return ((t - mu) / lambda_) ** rho_ def log_hazard(params, t): lambda_, rho_, mu = params return ... # this could get arbitrarly complicated. # In[3]: from autograd import elementwise_grad hazard = elementwise_grad(cumulative_hazard, argnum=1) def log_hazard(params, t): return np.log(hazard(params, t)) # In[4]: log_hazard(np.array([2., 2. , 0.]), np.array([1,2,3])) # In[5]: # same as previous from autograd import value_and_grad from scipy.optimize import minimize def log_likelihood(params, t, e): return np.sum(e * log_hazard(params, t)) - np.sum(cumulative_hazard(params, t)) def negative_log_likelihood(params, t, e): return -log_likelihood(params, t, e) results = minimize( value_and_grad(negative_log_likelihood), x0 = np.array([1.0, 1.0, 0.0]), method=None, args=(T, E), jac=True, bounds=((0.00001, None), (0.00001, None), (None, np.min(T)-0.001))) print(results) # So long as you are good at "parameter accounting", then you could create arbitrarly complicated survival models simply by specifying the hazard. # # Extending to including covariates is straightforward too, with some modifications to the code. Here's a simple model of the cumulative hazard: # # $$H(t; x) = \left(\frac{t}{\lambda(x)}\right)^\rho $$ # $$ \lambda(x) = \beta_1 x_1 + \beta_2 x_2 $$ # In[6]: from scipy.stats import weibull_min # create some regression data. N = 10000 X = 0.1 * np.random.normal(size=(N, 2)) T = np.exp(2 * X[:, 0] + -2 * X[:, 1]) * weibull_min.rvs(1, scale=1, loc=0, size=N) E = np.random.binomial(1, 0.99, size=N) # In[7]: def cumulative_hazard(params, t, X): log_lambda_coefs_, rho_ = params[:2], params[-1] lambda_ = np.exp(np.dot(X, log_lambda_coefs_)) return (t / lambda_) ** rho_ # these functions are almost identical to above, # expect they have an additional argument, X hazard = elementwise_grad(cumulative_hazard, argnum=1) def log_hazard(params, t, X): return np.log(hazard(params, t, X)) def log_likelihood(params, t, e, X): return np.sum(e * log_hazard(params, t, X)) - np.sum(cumulative_hazard(params, t, X)) def negative_log_likelihood(params, t, e, X): return -log_likelihood(params, t, e, X) results = minimize( value_and_grad(negative_log_likelihood), x0 = np.array([0.0, 0.0, 1.0]), method=None, args=(T, E, X), jac=True, bounds=((None, None), (None, None), (0.00001, None))) print(results) # Great! Let's move onto part 5, how to estimate the _variances_ of the parameters. # In[ ]: