diogojc · January 1, 2012 22:51 · Jan 1, 2012
diff --git a/multivariateGaussian.py b/multivariateGaussian.py
@@ -0,0 +1,112 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def params(X):
+    """
+    Calculates the mean vector and covariance matrix for the given data.
+
+    Arguments
+    ---------
+    X: mxn matrix with m samples drawn from an unknown multivariate Gaussian
+       distribution.
+
+    Returns
+    ---------
+    (mu, Sigma)
+    mu: n sized vector with the means of the n independent variables.
+    Sigma: nxn covariance matrix of the n independent variables.
+    """
+    return (np.mean(X, 0), np.cov(X.T))
+
+
+def density(X, mu, sigma):
+    """
+    Calculates the probability of the data according to an estimated
+    multivariate gaussian distribution.
+
+    Arguments
+    ---------
+    X: mxn matrix with m samples drawn from an unknown multivariate Gaussian
+       distribution.
+    mu: n sized vector with the means of the n independent variables.
+    Sigma: nxn covariance matrix of the n independent variables.
+
+    Returns
+    ---------
+    m sized vector with probabilities of the data.
+    """
+    assert X.shape[0] > X.shape[1], "Degenerate matrix, m must be greater than n."
+    detsigma = np.linalg.det(sigma)
+    n = len(mu)
+    a = 1 / (np.power(2 * np.pi, n / 2.) * np.sqrt(detsigma))
+    invsigma = np.linalg.inv(sigma)
+    b = np.array([np.exp(-0.5 * np.dot(np.dot((x - mu), invsigma), (x - mu).T))
+                  for x in X])
+    return a * b
+
+
+class Classification(object):
+    """
+    Model data as a mixture of multivariate gaussian distributions
+    """
+    def fit(self, X, y):
+        self.classes = np.unique(y)
+        self.mus, self.sigmas = dict(), dict()
+        for i in self.classes:
+            self.mus[i], self.sigmas[i] = params(X[y == i])
+
+    def predict_proba(self, X):
+        return np.array([density(X, self.mus[i], self.sigmas[i])
+                         for i in self.classes]).T
+
+    def predict(self, X):
+        Y = self.predict_proba(X)
+        return self.classes[np.argmax(Y, 1)]
+
+
+class OutlierDetection(object):
+
+    def fit(self, X):
+        self.mu, self.sigma = params(X)
+
+    def predict_proba(self, X):
+        return density(X, self.mu, self.sigma)
+
+
+if __name__ == '__main__':
+    # Create synthetic data
+    mux = np.array([2, 2])
+    sigmax = np.array([[3, 0], [0, 3]])
+    X = np.random.multivariate_normal(mux, sigmax, 100)
+    yx = 1 * np.ones(X.shape[0])
+
+    muz = np.array([10, 10])
+    sigmaz = np.array([[1, .8], [.8, 1]])
+    Z = np.random.multivariate_normal(muz, sigmaz, 100)
+    yz = 2 * np.ones(Z.shape[0])
+
+    muw = np.array([15, 0])
+    sigmaw = np.array([[2, 0], [0, 1]])
+    W = np.random.multivariate_normal(muw, sigmaw, 100)
+    yw = 3 * np.ones(W.shape[0])
+
+    # Model data
+    c = Classification()
+    X = np.vstack((X, Z, W))
+    y = np.hstack((yx, yz, yw))
+    c.fit(X, y)
+
+    # Visualize data and model
+    fig = plt.figure()
+    ax1 = fig.add_subplot(211)
+    ax1.plot(X[:, 0], X[:, 1], 'rx')
+    ax1.plot(Z[:, 0], Z[:, 1], 'bx')
+    ax1.plot(W[:, 0], W[:, 1], 'gx')
+    ax2 = fig.add_subplot(212)
+    x, y = np.meshgrid(np.linspace(-5, 20, 100), np.linspace(-5, 15, 100))
+    p = c.predict_proba(np.vstack((x.ravel(), y.ravel())).T)
+    z = np.reshape(np.max(p, 1), (x.shape[0], x.shape[1]))
+    ax2.imshow(z, extent=[np.min(x), np.max(x), np.min(y), np.max(y)], origin='lower')
+
+    plt.show()