Skip to content

Instantly share code, notes, and snippets.

@diogojc
Created January 1, 2012 22:51

Revisions

  1. diogojc created this gist Jan 1, 2012.
    112 changes: 112 additions & 0 deletions multivariateGaussian.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,112 @@
    import numpy as np
    import matplotlib.pyplot as plt


    def params(X):
    """
    Calculates the mean vector and covariance matrix for the given data.
    Arguments
    ---------
    X: mxn matrix with m samples drawn from an unknown multivariate Gaussian
    distribution.
    Returns
    ---------
    (mu, Sigma)
    mu: n sized vector with the means of the n independent variables.
    Sigma: nxn covariance matrix of the n independent variables.
    """
    return (np.mean(X, 0), np.cov(X.T))


    def density(X, mu, sigma):
    """
    Calculates the probability of the data according to an estimated
    multivariate gaussian distribution.
    Arguments
    ---------
    X: mxn matrix with m samples drawn from an unknown multivariate Gaussian
    distribution.
    mu: n sized vector with the means of the n independent variables.
    Sigma: nxn covariance matrix of the n independent variables.
    Returns
    ---------
    m sized vector with probabilities of the data.
    """
    assert X.shape[0] > X.shape[1], "Degenerate matrix, m must be greater than n."
    detsigma = np.linalg.det(sigma)
    n = len(mu)
    a = 1 / (np.power(2 * np.pi, n / 2.) * np.sqrt(detsigma))
    invsigma = np.linalg.inv(sigma)
    b = np.array([np.exp(-0.5 * np.dot(np.dot((x - mu), invsigma), (x - mu).T))
    for x in X])
    return a * b


    class Classification(object):
    """
    Model data as a mixture of multivariate gaussian distributions
    """
    def fit(self, X, y):
    self.classes = np.unique(y)
    self.mus, self.sigmas = dict(), dict()
    for i in self.classes:
    self.mus[i], self.sigmas[i] = params(X[y == i])

    def predict_proba(self, X):
    return np.array([density(X, self.mus[i], self.sigmas[i])
    for i in self.classes]).T

    def predict(self, X):
    Y = self.predict_proba(X)
    return self.classes[np.argmax(Y, 1)]


    class OutlierDetection(object):

    def fit(self, X):
    self.mu, self.sigma = params(X)

    def predict_proba(self, X):
    return density(X, self.mu, self.sigma)


    if __name__ == '__main__':
    # Create synthetic data
    mux = np.array([2, 2])
    sigmax = np.array([[3, 0], [0, 3]])
    X = np.random.multivariate_normal(mux, sigmax, 100)
    yx = 1 * np.ones(X.shape[0])

    muz = np.array([10, 10])
    sigmaz = np.array([[1, .8], [.8, 1]])
    Z = np.random.multivariate_normal(muz, sigmaz, 100)
    yz = 2 * np.ones(Z.shape[0])

    muw = np.array([15, 0])
    sigmaw = np.array([[2, 0], [0, 1]])
    W = np.random.multivariate_normal(muw, sigmaw, 100)
    yw = 3 * np.ones(W.shape[0])

    # Model data
    c = Classification()
    X = np.vstack((X, Z, W))
    y = np.hstack((yx, yz, yw))
    c.fit(X, y)

    # Visualize data and model
    fig = plt.figure()
    ax1 = fig.add_subplot(211)
    ax1.plot(X[:, 0], X[:, 1], 'rx')
    ax1.plot(Z[:, 0], Z[:, 1], 'bx')
    ax1.plot(W[:, 0], W[:, 1], 'gx')
    ax2 = fig.add_subplot(212)
    x, y = np.meshgrid(np.linspace(-5, 20, 100), np.linspace(-5, 15, 100))
    p = c.predict_proba(np.vstack((x.ravel(), y.ravel())).T)
    z = np.reshape(np.max(p, 1), (x.shape[0], x.shape[1]))
    ax2.imshow(z, extent=[np.min(x), np.max(x), np.min(y), np.max(y)], origin='lower')

    plt.show()