Created
February 8, 2010 14:24
Revisions
-
tty revised this gist
Feb 8, 2010 . 1 changed file with 2 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,4 +1,5 @@ # Basic text search with relevancy for MongoDB. # See http://blog.tty.nl/2010/02/08/simple-ranked-text-search-for-mongodb/ # Copythingie 2010 - Ward Bekker - [email protected] #create (or empty) a docs collection -
tty revised this gist
Feb 8, 2010 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,3 +1,6 @@ # basic text search with relevancy for MongoDB. See http://blog.tty.nl/2010/02/08/simple-ranked-text-search-for-mongodb/ # Copythingie 2010 - Ward Bekker - [email protected] #create (or empty) a docs collection doc_col = MongoMapper.connection.db('example_db').collection('docs') doc_col.remove({}) -
tty revised this gist
Feb 8, 2010 . 1 changed file with 1 addition and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,6 +2,7 @@ doc_col = MongoMapper.connection.db('example_db').collection('docs') doc_col.remove({}) #add some sample data doc_col.insert({ "txt" => "it is what it is"}) doc_col.insert({ "txt" => "what is it"}) doc_col.insert({ "txt" => "it is a banana"}) -
tty revised this gist
Feb 8, 2010 . 1 changed file with 5 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,10 +1,12 @@ #create (or empty) a docs collection doc_col = MongoMapper.connection.db('example_db').collection('docs') doc_col.remove({}) doc_col.insert({ "txt" => "it is what it is"}) doc_col.insert({ "txt" => "what is it"}) doc_col.insert({ "txt" => "it is a banana"}) #The invix creation map function. Splits the texts in individual words map_index =<<JS function() { var words = this.txt.split(' '); @@ -15,6 +17,7 @@ } JS # Groups the doc id's for every unique word reduce_index =<<JS function(key, values) { var docs = []; @@ -25,6 +28,7 @@ } JS # Every document counts as one map_relevance =<<JS function() { for ( var i=0; i< this.value.docs.length; i++ ) { @@ -33,6 +37,7 @@ } JS # And calculate the amount of occurrences for every unique document id reduce_relevance=<<JS function(key, values) { var sum = 0; -
tty created this gist
Feb 8, 2010 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,55 @@ doc_col = MongoMapper.connection.db('example_db').collection('docs') doc_col.remove({}) doc_col.insert({ "txt" => "it is what it is"}) doc_col.insert({ "txt" => "what is it"}) doc_col.insert({ "txt" => "it is a banana"}) map_index =<<JS function() { var words = this.txt.split(' '); for ( var i=0; i<words.length; i++ ) { emit(words[i], { docs: [this._id] }); } } JS reduce_index =<<JS function(key, values) { var docs = []; values.forEach ( function(val) { docs = docs.concat(val.docs); }) return { docs: docs }; } JS map_relevance =<<JS function() { for ( var i=0; i< this.value.docs.length; i++ ) { emit(this.value.docs[i], { count: 1 }); } } JS reduce_relevance=<<JS function(key, values) { var sum = 0; values.forEach ( function(val) { sum += val.count; }) return { count: sum }; } JS #calculate the inverted index invix_col = doc_col.map_reduce(map_index, reduce_index) #calculate the # occcurances of each searchterm query = ["what", "is", "it"] ranked_result = invix_col.map_reduce(map_relevance, reduce_relevance, { :query => { "_id" => { "$in" => query} } } ) #output the results, most relevant on top ranked_result.find().sort("count", :desc).each do |result| puts "document with id #{result["_id"]} has rank #{result["value"]["count"]} : #{doc_col.find_one("_id" => result["_id"]).inspect}" end