Skip to content

Instantly share code, notes, and snippets.

@vthacker
Created September 12, 2013 10:08

Revisions

  1. vthacker created this gist Sep 12, 2013.
    82 changes: 82 additions & 0 deletions gistfile1.java
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,82 @@
    package com.varun.perculator;

    import java.util.ArrayList;
    import java.util.List;

    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.core.SimpleAnalyzer;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.index.memory.MemoryIndex;
    import org.apache.lucene.queryparser.classic.ParseException;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.TermQuery;
    import org.apache.lucene.util.Version;

    public class Percolator {

    public static final Version VERSION = Version.LUCENE_43;
    public static final String CONTENT = "content";
    private List<Query> queries;
    private MemoryIndex index;

    public Percolator() {
    queries = new ArrayList<Query>();
    index = new MemoryIndex();
    }

    public void addQuery(String query) throws ParseException {
    Analyzer analyzer = new SimpleAnalyzer(VERSION);
    QueryParser parser = new QueryParser(VERSION, CONTENT, analyzer);
    queries.add(parser.parse(query));
    }

    private void addDirectQuery(String string) {
    Query query = new TermQuery(new Term(CONTENT, string));
    queries.add(query);

    }

    /*
    * TODO maybe use automation/ trie fields if list<queries> is large( like 1 million registered queries)
    */
    public synchronized List<Query> getMatchingQueries(String doc) {

    index.reset();
    index.addField(CONTENT, doc, new SimpleAnalyzer(VERSION));

    List<Query> matching = new ArrayList<Query>();
    for (Query query : queries) {
    if (index.search(query) > 0.0f) {
    matching.add(query);
    }
    }

    return matching;
    }


    public static void main(String[] args) throws ParseException {

    long start = System.currentTimeMillis();
    Percolator percolator = new Percolator();
    percolator.addDirectQuery("one");
    percolator.addDirectQuery("two");
    percolator.addDirectQuery("three");

    String docs[] = {
    "one two three",
    "two",
    "three",
    "four"
    };

    for (String doc : docs) {
    System.out.println(doc + " -> " + percolator.getMatchingQueries(doc));
    }
    long end = System.currentTimeMillis();
    System.out.println(end - start);
    }


    }