-
-
Save abler98/40192668fa88384f503c5e41d257a7a2 to your computer and use it in GitHub Desktop.
ElasticSearch fuzzy ngram powered search
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export http_proxy= | |
export https_proxy= | |
curl -XDELETE 'http://localhost:9200/test/' | |
echo "Creating the mapping" | |
curl -XPUT 'http://localhost:9200/test/?pretty=1' -d ' | |
{ | |
"mappings" : { | |
"member" : { | |
"properties" : { | |
"person_name" : { | |
"fields" : { | |
"partial_person_name" : { | |
"search_analyzer" : "ngrams_for_cjk", | |
"index_analyzer" : "ngrams_for_cjk", | |
"type" : "string" | |
}, | |
"person_name" : { | |
"type" : "string", | |
"analyzer" : "full_name" | |
} | |
}, | |
"type" : "multi_field" | |
}, | |
"city_name" : { | |
"fields" : { | |
"partial_city_name" : { | |
"search_analyzer" : "ngrams_for_cjk", | |
"index_analyzer" : "ngrams_for_cjk", | |
"type" : "string" | |
}, | |
"partial_city_name_prefix" : { | |
"search_analyzer" : "prefix_ngrams_for_cjk", | |
"index_analyzer" : "prefix_ngrams_for_cjk", | |
"type" : "string" | |
}, | |
"city_name" : { | |
"type" : "string", | |
"analyzer" : "full_name" | |
} | |
}, | |
"type" : "multi_field" | |
} | |
} | |
} | |
}, | |
"settings" : { | |
"analysis" : { | |
"filter" : { | |
"ngrams_for_every_few_characters" : { | |
"type" : "nGram", | |
"max_gram": "10", | |
"min_gram" : "2" | |
}, | |
"ngrams_for_prefix" : { | |
"type" : "edgeNGram", | |
"max_gram": "10", | |
"min_gram" : "2", | |
"side" : "front" | |
} | |
}, | |
"analyzer" : { | |
"full_name" : { | |
"filter" : [ | |
"standard", | |
"lowercase", | |
"asciifolding" | |
], | |
"type" : "custom", | |
"tokenizer" : "standard" | |
}, | |
"ngrams_for_cjk" : { | |
"filter" : [ | |
"lowercase", | |
"ngrams_for_every_few_characters" | |
], | |
"type" : "custom", | |
"tokenizer" : "standard" | |
}, | |
"prefix_ngrams_for_cjk" : { | |
"filter" : [ | |
"lowercase", | |
"ngrams_for_prefix" | |
], | |
"type" : "custom", | |
"tokenizer" : "standard" | |
} | |
} | |
} | |
} | |
} | |
'; echo | |
echo "Indexing" | |
curl -XPOST 'http://localhost:9200/_bulk?pretty=1' -d ' | |
{"index" : {"_index" : "test", "_type" : "member"}} | |
{"person_name" : "John Smith", "city_name" : "Amsterdam"} | |
{"index" : {"_index" : "test", "_type" : "member"}} | |
{"person_name" : "Ævar Arnfjörð Bjarmason", "city_name" : "Amsterdam"} | |
{"index" : {"_index" : "test", "_type" : "member"}} | |
{"person_name" : "Lucy Lue", "city_name" : "London"} | |
{"index" : {"_index" : "test", "_type" : "member"}} | |
{"person_name" : "Ævar Arnfjörð Bjarmason", "city_name" : "كوالالمبور"} | |
{"index" : {"_index" : "test", "_type" : "member"}} | |
{"person_name" : "Ævar Johnson", "city_name" : "The city of كوالالمبور"} | |
{"index" : {"_index" : "test", "_type" : "member"}} | |
{"person_name" : "Juhen Smith", "city_name" : "Amsterdam"} | |
'; echo | |
# Wait for indexing | |
echo "Refreshing" | |
curl -XPOST 'http://localhost:9200/_refresh?pretty=yes'; echo | |
echo "Doing the search" | |
curl -XGET 'http://localhost:9200/test/member/_search?pretty=1' -d ' | |
{ | |
"explain" : "false", | |
"query" : { | |
"bool" : { | |
"minimum_number_should_match" : 2, | |
"should" : [ | |
{ | |
"dis_max" : { | |
"queries" : [ | |
{ | |
"bool" : { | |
"boost" : 10, | |
"must" : { | |
"text" : { | |
"person_name.person_name" : "Xvar" | |
} | |
} | |
} | |
}, | |
{ | |
"text" : { | |
"person_name.partial_person_name" : "Xvar" | |
} | |
} | |
] | |
} | |
}, | |
{ | |
"dis_max" : { | |
"queries" : [ | |
{ | |
"bool" : { | |
"boost" : 5, | |
"must" : { | |
"text" : { | |
"city_name.city_name" : "كوالFUZZYالمبورWOO" | |
} | |
} | |
} | |
}, | |
{ | |
"text" : { | |
"city_name.partial_city_name" : "كوالFUZZYالمبورWOO" | |
} | |
} | |
] | |
} | |
} | |
] | |
} | |
} | |
} | |
'; echo |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment