#!/usr/bin/env python # coding: utf-8 # # Part 1 # # - start with fitting data to a parametric model # # - so we need to create the log-likehood # - let's create some fake data first. # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np from matplotlib import pyplot as plt T = (np.random.exponential(size=1000)/1.5) ** 2.3 plt.hist(T, bins=50); # Let's use the Weibull parametric model: https://en.wikipedia.org/wiki/Weibull_distribution # In[2]: def pdf(params, t): lambda_, rho_ = params # I'm not going to make you type this out - it's annoying and error prone. return rho_ / lambda_ * (t / lambda_) ** (rho_ - 1) * np.exp(-(t/lambda_) ** rho_) # okay, but we actually need the _log_ of the pdf def log_pdf(params, t): lambda_, rho_ = params # I'm not going to make you type this out - it's annoying and error prone. return np.log(rho_) - np.log(lambda_) + (rho_ - 1) * (np.log(t) - np.log(lambda_)) - (t/lambda_) ** rho_ # now we can define the log likehood def log_likelihood(params, t): return np.sum(log_pdf(params, t)) # ## `scipy.optimize.minimize` # In[3]: from scipy.optimize import minimize results = minimize(log_likelihood, x0 = np.array([1.0, 1.0]), # some initial guess of the parameters. method=None, args=(T, )) print(results) # In[4]: def log_likelihood(params, t): return -np.sum(log_pdf(params, t)) # In[5]: results = minimize(log_likelihood, x0 = np.array([1.0, 1.0]), # some initial guess of the parameters. method=None, args=(T, )) print(results) # In[6]: # weibull parameters must be greater than 0! # we can "nudge" the minimizer to understand this using the bounds argument results = minimize(log_likelihood, x0 = np.array([1.0, 1.0]), method=None, args=(T, ), bounds=((0, None), (0, None))) print(results) # ## Takeaway: `minimize` is a very flexible function for small-mid size parameter optimization # ### A few problems though: # # 1. You are stuck with only using known parametric models that are easy to implement. # 2. Not very fast minimization routines # # Let's move to Part 2.