Created
September 12, 2013 10:08
-
-
Save vthacker/6535332 to your computer and use it in GitHub Desktop.
Perculator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.varun.perculator; | |
import java.util.ArrayList; | |
import java.util.List; | |
import org.apache.lucene.analysis.Analyzer; | |
import org.apache.lucene.analysis.core.SimpleAnalyzer; | |
import org.apache.lucene.index.Term; | |
import org.apache.lucene.index.memory.MemoryIndex; | |
import org.apache.lucene.queryparser.classic.ParseException; | |
import org.apache.lucene.queryparser.classic.QueryParser; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.search.TermQuery; | |
import org.apache.lucene.util.Version; | |
public class Percolator { | |
public static final Version VERSION = Version.LUCENE_43; | |
public static final String CONTENT = "content"; | |
private List<Query> queries; | |
private MemoryIndex index; | |
public Percolator() { | |
queries = new ArrayList<Query>(); | |
index = new MemoryIndex(); | |
} | |
public void addQuery(String query) throws ParseException { | |
Analyzer analyzer = new SimpleAnalyzer(VERSION); | |
QueryParser parser = new QueryParser(VERSION, CONTENT, analyzer); | |
queries.add(parser.parse(query)); | |
} | |
private void addDirectQuery(String string) { | |
Query query = new TermQuery(new Term(CONTENT, string)); | |
queries.add(query); | |
} | |
/* | |
* TODO maybe use automation/ trie fields if list<queries> is large( like 1 million registered queries) | |
*/ | |
public synchronized List<Query> getMatchingQueries(String doc) { | |
index.reset(); | |
index.addField(CONTENT, doc, new SimpleAnalyzer(VERSION)); | |
List<Query> matching = new ArrayList<Query>(); | |
for (Query query : queries) { | |
if (index.search(query) > 0.0f) { | |
matching.add(query); | |
} | |
} | |
return matching; | |
} | |
public static void main(String[] args) throws ParseException { | |
long start = System.currentTimeMillis(); | |
Percolator percolator = new Percolator(); | |
percolator.addDirectQuery("one"); | |
percolator.addDirectQuery("two"); | |
percolator.addDirectQuery("three"); | |
String docs[] = { | |
"one two three", | |
"two", | |
"three", | |
"four" | |
}; | |
for (String doc : docs) { | |
System.out.println(doc + " -> " + percolator.getMatchingQueries(doc)); | |
} | |
long end = System.currentTimeMillis(); | |
System.out.println(end - start); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment