We'll start with fitting data to a parametric model
So we need to create the log-likelihood, which we will optimize over.
Let's create some fake data first.
%matplotlib inline
import numpy as np
from matplotlib import pyplot as plt
T = (np.random.exponential(size=1000)/1.5) ** 2.3
plt.hist(T, bins=50);
Let's use the Weibull parametric model: https://en.wikipedia.org/wiki/Weibull_distribution
def pdf(params, t):
lambda_, rho_ = params
# I'm not going to make you type this out - it's annoying and error prone.
return rho_ / lambda_ * (t / lambda_) ** (rho_ - 1) * np.exp(-(t/lambda_) ** rho_)
# okay, but we actually need the _log_ of the pdf
def log_pdf(params, t):
lambda_, rho_ = params
# I'm not going to make you type this out - it's annoying and error prone.
return np.log(rho_) - np.log(lambda_) + (rho_ - 1) * (np.log(t) - np.log(lambda_)) - (t/lambda_) ** rho_
# now we can define the log likehood
def log_likelihood(params, t):
return np.sum(log_pdf(params, t))
scipy.optimize.minimize¶from scipy.optimize import minimize
results = minimize(log_likelihood,
x0 = # some initial guess of the parameters.
method=None,
args=(T, ))
print(results)
def log_likelihood(params, t):
return # this should change
results = minimize(log_likelihood,
x0 = np.array([1.0, 1.0]), # some initial guess of the parameters.
method=None,
args=(T, ))
print(results)
# weibull parameters must be greater than 0!
# we can "nudge" the minimizer to understand this using the bounds argument
results = minimize(log_likelihood,
x0 = np.array([1.0, 1.0]),
method=None,
args=(T, ),
bounds=#fill this in)
print(results)
minimize is a very flexible function for small-mid size parameter optimization¶Let's move to Part 2.