Created
January 1, 2012 22:51
Revisions
-
diogojc created this gist
Jan 1, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,112 @@ import numpy as np import matplotlib.pyplot as plt def params(X): """ Calculates the mean vector and covariance matrix for the given data. Arguments --------- X: mxn matrix with m samples drawn from an unknown multivariate Gaussian distribution. Returns --------- (mu, Sigma) mu: n sized vector with the means of the n independent variables. Sigma: nxn covariance matrix of the n independent variables. """ return (np.mean(X, 0), np.cov(X.T)) def density(X, mu, sigma): """ Calculates the probability of the data according to an estimated multivariate gaussian distribution. Arguments --------- X: mxn matrix with m samples drawn from an unknown multivariate Gaussian distribution. mu: n sized vector with the means of the n independent variables. Sigma: nxn covariance matrix of the n independent variables. Returns --------- m sized vector with probabilities of the data. """ assert X.shape[0] > X.shape[1], "Degenerate matrix, m must be greater than n." detsigma = np.linalg.det(sigma) n = len(mu) a = 1 / (np.power(2 * np.pi, n / 2.) * np.sqrt(detsigma)) invsigma = np.linalg.inv(sigma) b = np.array([np.exp(-0.5 * np.dot(np.dot((x - mu), invsigma), (x - mu).T)) for x in X]) return a * b class Classification(object): """ Model data as a mixture of multivariate gaussian distributions """ def fit(self, X, y): self.classes = np.unique(y) self.mus, self.sigmas = dict(), dict() for i in self.classes: self.mus[i], self.sigmas[i] = params(X[y == i]) def predict_proba(self, X): return np.array([density(X, self.mus[i], self.sigmas[i]) for i in self.classes]).T def predict(self, X): Y = self.predict_proba(X) return self.classes[np.argmax(Y, 1)] class OutlierDetection(object): def fit(self, X): self.mu, self.sigma = params(X) def predict_proba(self, X): return density(X, self.mu, self.sigma) if __name__ == '__main__': # Create synthetic data mux = np.array([2, 2]) sigmax = np.array([[3, 0], [0, 3]]) X = np.random.multivariate_normal(mux, sigmax, 100) yx = 1 * np.ones(X.shape[0]) muz = np.array([10, 10]) sigmaz = np.array([[1, .8], [.8, 1]]) Z = np.random.multivariate_normal(muz, sigmaz, 100) yz = 2 * np.ones(Z.shape[0]) muw = np.array([15, 0]) sigmaw = np.array([[2, 0], [0, 1]]) W = np.random.multivariate_normal(muw, sigmaw, 100) yw = 3 * np.ones(W.shape[0]) # Model data c = Classification() X = np.vstack((X, Z, W)) y = np.hstack((yx, yz, yw)) c.fit(X, y) # Visualize data and model fig = plt.figure() ax1 = fig.add_subplot(211) ax1.plot(X[:, 0], X[:, 1], 'rx') ax1.plot(Z[:, 0], Z[:, 1], 'bx') ax1.plot(W[:, 0], W[:, 1], 'gx') ax2 = fig.add_subplot(212) x, y = np.meshgrid(np.linspace(-5, 20, 100), np.linspace(-5, 15, 100)) p = c.predict_proba(np.vstack((x.ravel(), y.ravel())).T) z = np.reshape(np.max(p, 1), (x.shape[0], x.shape[1])) ax2.imshow(z, extent=[np.min(x), np.max(x), np.min(y), np.max(y)], origin='lower') plt.show()