Created
June 20, 2012 08:34
Revisions
-
Christoph Schiessl created this gist
Jun 20, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,118 @@ require "rubygems" gem "activesupport" require "active_support/all" def manhattenDistance objectA, objectB return (objectA.first - objectB.first).abs + (objectA.second - objectB.second).abs end def allInterClusterObjectDistances clusterA, clusterB distances = [] for a in clusterA for b in clusterB distances << manhattenDistance(a, b).to_f end end return distances end def singleLinkDistance clusterA, clusterB return allInterClusterObjectDistances(clusterA, clusterB).min end def averageLinkDistance clusterA, clusterB return allInterClusterObjectDistances(clusterA, clusterB).sum / clusterA.size / clusterB.size end def distanceMatrix clusters matrix = [] for a in clusters.values matrix << [] for b in clusters.values matrix.last << (yield a, b) end end return matrix end def singleLinkDistanceMatrix clusters return distanceMatrix clusters do |a, b| singleLinkDistance(a, b) end end def averageLinkDistanceMatrix clusters return distanceMatrix clusters do |a, b| averageLinkDistance(a, b) end end def findClustersWithSmallestDistance distMatrix group = [] minDist = 1.0 / 0 # aka Infintiy distMatrix.each_with_index do |row, i| row.each_with_index do |dist, j| if i != j if dist < minDist group = [i,j] minDist = dist elsif dist == minDist group += [i,j] if group.include?(i) || group.include?(j) end end end end return group.uniq end def mergeClusters(clusters, labels) newLabel = labels.join.chars.sort.join clusters[newLabel] = [] labels.each do |l| clusters[newLabel] += clusters[l] clusters.delete(l) end return clusters end # Input Data: allClusters = { "A" => [[1,1]], "B" => [[2,1]], "C" => [[1,2]], "D" => [[2,2]], "E" => [[3,5]], "F" => [[3,9]], "G" => [[3,10]], "H" => [[4,10]], "I" => [[4,11]], "J" => [[5,10]], "K" => [[7,10]], "L" => [[10,9]], "M" => [[10,6]], "N" => [[9,5]], "O" => [[10,5]], "P" => [[11,5]], "Q" => [[9,4]], "R" => [[10,4]], "S" => [[11,4]], "T" => [[10,3]] } # Core Algorithm (Agglomerative Clustering): until allClusters.size == 1 puts allClusters.keys.inspect + " => " # With Single Link Distance Matrix: # matrix = singleLinkDistanceMatrix allClusters # With Average Link Distance Matrix: matrix = averageLinkDistanceMatrix allClusters toMerge = findClustersWithSmallestDistance matrix toMergeLabels = toMerge.map { |i| allClusters.keys[i] } # aLabel, bLabel = allClusters.keys[aIndex], allClusters.keys[bIndex] puts "merging #{toMergeLabels.to_sentence} at dist = #{matrix[toMerge.first][toMerge.second]} => " allClusters = mergeClusters(allClusters, toMergeLabels) end puts allClusters.keys.inspect