Fitting ‘Old Faithful’ Data using mix’EM¶
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show, vplot, hplot
from bokeh.io import output_notebook; output_notebook()
import mixem
For this example, we’ll fit the classic Old Faithful data set of geyser eruption durations and waiting times between eruptions using a mixture of two bivariate normal distributions. We have the data prepared in a CSV-formatted file:
data = pd.read_csv("faithful.csv")
print(data.head())
eruptions waiting
0 3.600 79
1 1.800 54
2 3.333 74
3 2.283 62
4 4.533 85
To see that we have two independent components in the original data, we create a scatterplot of the data:
fig = figure(title="Old Faithful Data", x_axis_label="Eruption duration (minutes)", y_axis_label="Waiting time (minutes)")
fig.scatter(x=data.eruptions, y=data.waiting)
show(fig);
Now it’s time to fit the data using
mixem.em:
We create a numpy array of shape \((N, 2)\) (where N is the number
of data points) from the pandas.DataFrame
specify a model of two
mixem.distribution.MultivariateNormalDistributions
using rough initial parameter estimates of
\(\mu_1 = (2, 50)^T, \sigma_1=I_2\),
\(\mu_2 = (4, 80)^T, \sigma_2=I_2\):
weights, distributions, ll = mixem.em(np.array(data), [
mixem.distribution.MultivariateNormalDistribution(np.array((2, 50)), np.identity(2)),
mixem.distribution.MultivariateNormalDistribution(np.array((4, 80)), np.identity(2)),
])
iteration 0 (log-likelihood=-5.97474e+03): p(x|Φ) = 0.354*MultiNorm[μ=[ 2.064 54.316], σ=[[ 0.121 0.667], [ 0.667 30.598]]] + 0.646*MultiNorm[μ=[ 4.268 79.987], σ=[[ 0.222 1.154], [ 1.154 34.977]]]
iteration 1 (log-likelihood=-9.64626e+02): p(x|Φ) = 0.357*MultiNorm[μ=[ 2.039 54.497], σ=[[ 0.071 0.458], [ 0.458 33.81 ]]] + 0.643*MultiNorm[μ=[ 4.291 79.985], σ=[[ 0.169 0.922], [ 0.922 35.802]]]
iteration 2 (log-likelihood=-9.53829e+02): p(x|Φ) = 0.356*MultiNorm[μ=[ 2.037 54.483], σ=[[ 0.07 0.439], [ 0.439 33.725]]] + 0.644*MultiNorm[μ=[ 4.29 79.973], σ=[[ 0.169 0.934], [ 0.934 35.975]]]
iteration 3 (log-likelihood=-9.53864e+02): p(x|Φ) = 0.356*MultiNorm[μ=[ 2.036 54.48 ], σ=[[ 0.069 0.436], [ 0.436 33.703]]] + 0.644*MultiNorm[μ=[ 4.29 79.969], σ=[[ 0.17 0.939], [ 0.939 36.029]]]
iteration 4 (log-likelihood=-9.53880e+02): p(x|Φ) = 0.356*MultiNorm[μ=[ 2.036 54.479], σ=[[ 0.069 0.435], [ 0.435 33.699]]] + 0.644*MultiNorm[μ=[ 4.29 79.968], σ=[[ 0.17 0.94 ], [ 0.94 36.042]]]
iteration 5 (log-likelihood=-9.53884e+02): p(x|Φ) = 0.356*MultiNorm[μ=[ 2.036 54.479], σ=[[ 0.069 0.435], [ 0.435 33.698]]] + 0.644*MultiNorm[μ=[ 4.29 79.968], σ=[[ 0.17 0.941], [ 0.941 36.045]]]
iteration 6 (log-likelihood=-9.53885e+02): p(x|Φ) = 0.356*MultiNorm[μ=[ 2.036 54.479], σ=[[ 0.069 0.435], [ 0.435 33.697]]] + 0.644*MultiNorm[μ=[ 4.29 79.968], σ=[[ 0.17 0.941], [ 0.941 36.046]]]
iteration 7 (log-likelihood=-9.53885e+02): p(x|Φ) = 0.356*MultiNorm[μ=[ 2.036 54.479], σ=[[ 0.069 0.435], [ 0.435 33.697]]] + 0.644*MultiNorm[μ=[ 4.29 79.968], σ=[[ 0.17 0.941], [ 0.941 36.046]]]
iteration 8 (log-likelihood=-9.53885e+02): p(x|Φ) = 0.356*MultiNorm[μ=[ 2.036 54.479], σ=[[ 0.069 0.435], [ 0.435 33.697]]] + 0.644*MultiNorm[μ=[ 4.29 79.968], σ=[[ 0.17 0.941], [ 0.941 36.046]]]
iteration 9 (log-likelihood=-9.53885e+02): p(x|Φ) = 0.356*MultiNorm[μ=[ 2.036 54.479], σ=[[ 0.069 0.435], [ 0.435 33.697]]] + 0.644*MultiNorm[μ=[ 4.29 79.968], σ=[[ 0.17 0.941], [ 0.941 36.046]]]
iteration 10 (log-likelihood=-9.53885e+02): p(x|Φ) = 0.356*MultiNorm[μ=[ 2.036 54.479], σ=[[ 0.069 0.435], [ 0.435 33.697]]] + 0.644*MultiNorm[μ=[ 4.29 79.968], σ=[[ 0.17 0.941], [ 0.941 36.046]]]
iteration 11 (log-likelihood=-9.53885e+02): p(x|Φ) = 0.356*MultiNorm[μ=[ 2.036 54.479], σ=[[ 0.069 0.435], [ 0.435 33.697]]] + 0.644*MultiNorm[μ=[ 4.29 79.968], σ=[[ 0.17 0.941], [ 0.941 36.046]]]
iteration 12 (log-likelihood=-9.53885e+02): p(x|Φ) = 0.356*MultiNorm[μ=[ 2.036 54.479], σ=[[ 0.069 0.435], [ 0.435 33.697]]] + 0.644*MultiNorm[μ=[ 4.29 79.968], σ=[[ 0.17 0.941], [ 0.941 36.046]]]
mix’EM will return a tuple (weights, distributions, log_likelihood)
with mixing weights, distributions with parameters and the total
log-likelihood of the model, respectively.
We can use this model to compute probabilities for new data points by
using
mixem.probability.
Here, we create a grid of N * N
values and compute probabilities for
all grid points. We then map the result onto a bokeh image:
N = 100
x = np.linspace(np.min(data.eruptions), np.max(data.eruptions), num=N)
y = np.linspace(np.min(data.waiting), np.max(data.waiting), num=N)
xx, yy = np.meshgrid(x, y, indexing="ij")
# Convert meshgrid into a ((N*N), 2) array of coordinates
xxyy = np.array([xx.flatten(), yy.flatten()]).T
# Compute model probabilities
p = mixem.probability(xxyy, weights, distributions).reshape((N, N))
fig2 = figure(title="Fitted Old Faithful Data", x_axis_label="Eruption duration (minutes)", y_axis_label="Waiting time (minutes)")
# Plot the grid of model probabilities -- attention: bokeh expects _transposed_ input matrix!
fig2.image(image=[p.T], x=np.min(data.eruptions), y=np.min(data.waiting), dw=np.ptp(data.eruptions), dh=np.ptp(data.waiting), palette="Spectral11")
# Plot data points
fig2.scatter(x=data.eruptions, y=data.waiting, color="#000000")
show(fig2);