tty · February 8, 2010 14:24 · Feb 8, 2010 · Feb 8, 2010 · Feb 8, 2010 · Feb 8, 2010
diff --git a/gistfile1.rb b/gistfile1.rb
@@ -1,4 +1,5 @@
-# basic text search with relevancy for MongoDB. See http://blog.tty.nl/2010/02/08/simple-ranked-text-search-for-mongodb/
+# Basic text search with relevancy for MongoDB. 
+# See http://blog.tty.nl/2010/02/08/simple-ranked-text-search-for-mongodb/
 # Copythingie 2010 - Ward Bekker - [email protected]
 
 #create (or empty) a docs collection

diff --git a/gistfile1.rb b/gistfile1.rb
@@ -1,3 +1,6 @@
+# basic text search with relevancy for MongoDB. See http://blog.tty.nl/2010/02/08/simple-ranked-text-search-for-mongodb/
+# Copythingie 2010 - Ward Bekker - [email protected]
+
 #create (or empty) a docs collection
 doc_col = MongoMapper.connection.db('example_db').collection('docs')
 doc_col.remove({})

diff --git a/gistfile1.rb b/gistfile1.rb
@@ -2,6 +2,7 @@
 doc_col = MongoMapper.connection.db('example_db').collection('docs')
 doc_col.remove({})
 
+#add some sample data
 doc_col.insert({ "txt" => "it is what it is"})
 doc_col.insert({ "txt" => "what is it"})
 doc_col.insert({ "txt" => "it is a banana"})

diff --git a/gistfile1.rb b/gistfile1.rb
@@ -1,10 +1,12 @@
+#create (or empty) a docs collection
 doc_col = MongoMapper.connection.db('example_db').collection('docs')
 doc_col.remove({})
 
 doc_col.insert({ "txt" => "it is what it is"})
 doc_col.insert({ "txt" => "what is it"})
 doc_col.insert({ "txt" => "it is a banana"})
 
+#The invix creation map function. Splits the texts in individual words
 map_index =<<JS
   function() {
     var words = this.txt.split(' ');
@@ -15,6 +17,7 @@
   }
 JS
 
+# Groups the doc id's for every unique word
 reduce_index =<<JS
   function(key, values) {
       var docs = [];
@@ -25,6 +28,7 @@
   }
 JS
 
+# Every document counts as one
 map_relevance =<<JS
   function() {
    for ( var i=0; i< this.value.docs.length; i++ ) {
@@ -33,6 +37,7 @@
  }
 JS
 
+# And calculate the amount of occurrences for every unique document id
 reduce_relevance=<<JS
   function(key, values) {
       var sum = 0;

diff --git a/gistfile1.rb b/gistfile1.rb
@@ -0,0 +1,55 @@
+doc_col = MongoMapper.connection.db('example_db').collection('docs')
+doc_col.remove({})
+
+doc_col.insert({ "txt" => "it is what it is"})
+doc_col.insert({ "txt" => "what is it"})
+doc_col.insert({ "txt" => "it is a banana"})
+
+map_index =<<JS
+  function() {
+    var words = this.txt.split(' ');
+    
+    for ( var i=0; i<words.length; i++ ) {
+      emit(words[i], { docs: [this._id] });
+    }
+  }
+JS
+
+reduce_index =<<JS
+  function(key, values) {
+      var docs = [];
+      
+      values.forEach ( function(val) { docs = docs.concat(val.docs); })
+      
+      return { docs: docs };
+  }
+JS
+
+map_relevance =<<JS
+  function() {
+   for ( var i=0; i< this.value.docs.length; i++ ) {
+     emit(this.value.docs[i], { count: 1 });
+   }
+ }
+JS
+
+reduce_relevance=<<JS
+  function(key, values) {
+      var sum = 0;
+      
+      values.forEach ( function(val) { sum += val.count; })
+      
+      return { count: sum };
+  }
+JS
+
+#calculate the inverted index
+invix_col = doc_col.map_reduce(map_index, reduce_index)
+#calculate the # occcurances of each searchterm
+query = ["what", "is", "it"]
+ranked_result = invix_col.map_reduce(map_relevance, reduce_relevance, { :query => { "_id" => { "$in" =>  query} }  } )
+
+#output the results, most relevant on top
+ranked_result.find().sort("count", :desc).each do |result|
+  puts "document with id #{result["_id"]} has rank #{result["value"]["count"]} : #{doc_col.find_one("_id" => result["_id"]).inspect}"
+end