Source code for mixem.em

import numpy as np

import mixem


[docs]def em(data, distributions, initial_weights=None, max_iterations=100, tol=1e-15, tol_iters=10, progress_callback=mixem.simple_progress): """Fit a mixture of probability distributions using the Expectation-Maximization (EM) algorithm. :param data: The data to fit the distributions for. Can be an array-like or a :class:`numpy.ndarray` :type data: numpy.ndarray :param distributions: The list of distributions to fit to the data. :type distributions: list of :class:`mixem.distribution.Distribution` :param initial_weights: Inital weights for the distributions. Must be the same size as distributions. If None, will use uniform initial weights for all distributions. :type initial_weights: numpy.ndarray :param max_iterations: The maximum number of iterations to compute for. :type max_iterations: int :param tol: The minimum relative increase in log-likelihood after tol_iters iterations :type tol: float :param tol_iters: The number of iterations to go back in comparing log-likelihood change :type tol_iters: int :param progress_callback: A function to call to report progress after every iteration. :type progress_callback: function or None :rtype: tuple (weights, distributitons, log_likelihood) """ n_distr = len(distributions) n_data = data.shape[0] if initial_weights is not None: weight = np.array(initial_weights) else: weight = np.ones((n_distr,)) last_ll = np.zeros((tol_iters, )) resp = np.empty((n_data, n_distr)) log_density = np.empty((n_data, n_distr)) iteration = 0 while True: # E-step ####### # compute responsibilities for d in range(n_distr): log_density[:, d] = distributions[d].log_density(data) # normalize responsibilities of distributions so they sum up to one for example resp = weight[np.newaxis, :] * np.exp(log_density) resp /= np.sum(resp, axis=1)[:, np.newaxis] log_likelihood = np.sum(resp * log_density) # M-step ####### for d in range(n_distr): distributions[d].estimate_parameters(data, resp[:, d]) weight = np.mean(resp, axis=0) if progress_callback: progress_callback(iteration, weight, distributions, log_likelihood) # Convergence check ####### if np.isnan(log_likelihood): last_ll[0] = log_likelihood break if iteration >= tol_iters and (last_ll[-1] - log_likelihood) / last_ll[-1] <= tol: last_ll[0] = log_likelihood break if iteration >= max_iterations: last_ll[0] = log_likelihood break # store value of current iteration in last_ll[0] # and shift older values to the right last_ll[1:] = last_ll[:-1] last_ll[0] = log_likelihood iteration += 1 return weight, distributions, last_ll[0]