#!/usr/bin/env python
# coding: utf-8

# ### Optimizing in a unbounded space
# 
# Sometimes we have the inclusion bounds, like $[0, \inf)$, or $[0, 1]$. An option is to lean on `optimize.minimize` to enforce these bounds, but alternatively we can express the problem a domain that has no constraint, and then project back to the original domain. We'll use an example to make this concrete.
# 
# 1. To make a parameter cover $[0, \inf)$
#   1. Use `bounds = (0, None)`. 
#   2. Use a parameter over $(-\inf, \inf)$, and put it into the exponential function, which has range $(0, \inf)$. 
# 2. To make a parameter cover $[0, 1]$:
#   1. Use `bounds = (0, None)`. 
#   2. Use a parameter over $(-\inf, \inf)$, and put it into the expit function, which has range $(0, 1)$. 
#   
# $$ \text{expit}(x) = \frac{1}{1+\exp(-x)} $$
# 
# The B. cases typically have better convergence, and can take advantage of a larger family of optimization algorithms. 

# In[1]:


from autograd import numpy as np
from autograd import elementwise_grad, value_and_grad, hessian
from scipy.optimize import minimize

df = pd.read_csv("../churn_data.csv")
T = df['T'].values
E = df['E'].values

breakpoints = np.array([28,  33,  58,  63,  88,  93,  117, 122, 148, 153])


# In[2]:


# same model as last time, and note we have the `bounds` argument inactive.

def cumulative_hazard(log_params, times):
    # this is NumPy black magic to get piecewise hazards, let's chat after. 
    times = np.atleast_1d(times)
    n = times.shape[0]
    times = times.reshape((n, 1))
    M = np.minimum(np.tile(breakpoints, (n, 1)), times)
    M = np.hstack([M[:, tuple([0])], np.diff(M, axis=1)])
    return np.dot(M, np.exp(log_params)) # diff here, use to be np.dot(M, params)

hazard = elementwise_grad(cumulative_hazard, argnum=1)

def survival_function(params, t):
    return np.exp(-cumulative_hazard(params, t))

def log_hazard(params, t):
    return np.log(np.clip(hazard(params, t), 1e-25, np.inf))

def log_likelihood(params, t, e):
    return np.sum(e * log_hazard(params, t)) - np.sum(cumulative_hazard(params, t))

def negative_log_likelihood(params, t, e):
    return -log_likelihood(params, t, e)

from autograd import value_and_grad

results = minimize(
        value_and_grad(negative_log_likelihood), 
        x0 = np.zeros(len(breakpoints)),
        method=None, 
        args=(T, E),
        jac=True,
        bounds=None
)

log_estimates_ = results.x
estimates_ = np.exp(log_estimates_)
print(estimates_)
# see previous Part 7 to confirm these are "really close enough"


# Congratulations! You've sucessfully implemented a pretty good approximations to the Python package [lifelines](https://lifelines.readthedocs.io/)! 
# 
# Let's move onto Part 9 (slides), which is off this track: interpreting output. 

# In[ ]: