Last active
October 2, 2022 16:32
-
-
Save Bollegala/24c5f6d9a5c9770c86f24316e8b170fd to your computer and use it in GitHub Desktop.
This code shows how to perform multi-dimensional scaling (MDS). We will consider three data points (0,1), (0,0), and (1,0) in 2d space, and compute their pairwise Euclidean distance. We then compute the MDS projection, and again plot the projected points in the 2d space. Although the absolute positions of the projected points are different from …
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy | |
import matplotlib.pyplot as plt | |
import sys | |
def bval(D, r, s): | |
n = D.shape[0] | |
total_r = numpy.sum(D[:,s] ** 2) | |
total_s = numpy.sum(D[r,:] ** 2) | |
total = numpy.sum(D ** 2) | |
val = (D[r,s] ** 2) - (float(total_r) / float(n)) - (float(total_s) / float(n)) + (float(total) / float(n * n)) | |
return -0.5 * val | |
def main(): | |
n = 3 | |
Y = numpy.array([[0, 1], [0, 0], [1, 0]], dtype=float) | |
print Y | |
D = numpy.zeros((n, n), dtype=float) | |
for i in range(0, n): | |
for j in range(0, n): | |
D[i, j] = numpy.linalg.norm(Y[i,:] - Y[j,:]) | |
B = numpy.zeros((n, n), dtype=complex) | |
for i in range(0, n): | |
for j in range(0, n): | |
B[i,j] = bval(D, i, j) | |
print "Distance Matrix D" | |
print D | |
w, V = numpy.linalg.eig(D) | |
idx = w.argsort()[::-1] | |
w = w[idx] | |
V = V[:,idx] | |
print "\nEigenvalues of the distance matrix =", w | |
print "\nB matix" | |
print B | |
g, U = numpy.linalg.eig(B) | |
idx = g.argsort()[::-1] | |
g = g[idx] | |
U = U[:,idx] | |
print "Eigenvalues =", g | |
G = numpy.diag(numpy.sqrt(g)) | |
X = numpy.dot(U, G) | |
print "\nMatrix X" | |
print X | |
error = 0.0 | |
total = 0 | |
for i in range(0, n): | |
for j in range(i+1, n): | |
error += (numpy.linalg.norm(X[i] - X[j]) - D[i, j]) ** 2 | |
total += 1 | |
RMSE = numpy.sqrt(error / float(total)) | |
print "RMSE =", RMSE | |
if RMSE > 0: | |
plt.scatter(X[:,0], X[:,1], c='b', s=40) | |
plt.scatter(Y[:,0], Y[:,1], c='r', s=40) | |
plt.show() | |
sys.exit(1) | |
pass | |
if __name__ == '__main__': | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment