Created
May 27, 2017 23:56
-
-
Save jgcoded/4ec81d0e2071af389451187c5d16e724 to your computer and use it in GitHub Desktop.
Autocomplete word suggestions using a trie
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*! | |
A simple command-line application that provides word completion suggestions. | |
The application assumes that a list of words is provided by the system. | |
On Ubuntu, the application attempts to read from the /usr/share/dict/words | |
file installled by the wamerican package. | |
Once the trie is built, word completion suggestions is obtained via a | |
recursve method that collects words as the trie is traversed. | |
*/ | |
#include <iostream> | |
#include <vector> | |
#include <memory> | |
#include <fstream> | |
#include <ctime> | |
#include <cstdlib> | |
using namespace std; | |
struct Node | |
{ | |
char letter; | |
vector<shared_ptr<Node>> children; | |
}; | |
const auto gWordEndNode = shared_ptr<Node>(new Node{ '$', {} }); | |
Node* TraverseTrieWithWord(string& word, Node* root) | |
{ | |
while(!word.empty() && root != nullptr) | |
{ | |
auto letter = word.front(); | |
bool foundLetter = false; | |
for(const auto& child : root->children) | |
{ | |
if(child->letter == letter) | |
{ | |
root = child.get(); | |
word = word.substr(1); | |
foundLetter = true; | |
break; | |
} | |
} | |
if(!foundLetter) | |
{ | |
break; | |
} | |
} | |
return root; | |
} | |
void AddWordToTrie(string word, Node* root) | |
{ | |
if(word.empty() || root == nullptr) | |
{ | |
return; | |
} | |
auto foundNode = TraverseTrieWithWord(word, root); | |
if(word.empty()) | |
{ | |
for(const auto& child : foundNode->children) | |
{ | |
if(child->letter == gWordEndNode->letter) | |
{ | |
return; | |
} | |
} | |
foundNode->children.push_back(gWordEndNode); | |
} else { | |
while(!word.empty()) | |
{ | |
auto n = shared_ptr<Node>(new Node); | |
n->letter = word[0]; | |
word = word.substr(1); | |
foundNode->children.push_back(n); | |
foundNode = n.get(); | |
} | |
foundNode->children.push_back(gWordEndNode); | |
} | |
} | |
std::shared_ptr<Node> BuildTrieFromStream(istream& wordStream) | |
{ | |
auto root = shared_ptr<Node>(new Node); | |
auto count = 0; | |
while(!wordStream.eof()) | |
{ | |
string word; | |
wordStream >> word; | |
AddWordToTrie(word, root.get()); | |
count++; | |
} | |
cout << "Added " << count << " words to trie" << endl; | |
return root; | |
} | |
void AutoCompleteRecurse(string currentText, const Node* root, vector<string>& results, int depth = 5) | |
{ | |
if(depth < 0) | |
{ | |
return; | |
} | |
for(const auto& child : root->children) | |
{ | |
if(child->letter == gWordEndNode->letter) | |
{ | |
results.push_back(currentText); | |
continue; | |
} | |
AutoCompleteRecurse(currentText + child->letter, child.get(), results, depth - 1); | |
} | |
} | |
vector<string> GetAutoCompleteWordList(string currentText, Node* root) | |
{ | |
auto text = currentText; | |
auto foundNode = TraverseTrieWithWord(text, root); | |
vector<string> results; | |
for(const auto& child : foundNode->children) | |
{ | |
if(child->letter == gWordEndNode->letter) | |
{ | |
results.push_back(currentText); | |
continue; | |
} | |
AutoCompleteRecurse(currentText + child->letter, child.get(), results); | |
} | |
return results; | |
} | |
int main(int argc, char** argv) | |
{ | |
#define WORD_LIST "/usr/share/dict/words" | |
ifstream wordList(WORD_LIST); | |
cout << "Attempting to read word list from " << WORD_LIST << endl; | |
if(!wordList) | |
{ | |
cout << "Could not find list of words" << endl | |
<< "You may need to install a word list package, e.g.:" << endl | |
<< "$ sudo apt-get install wamerican" << endl; | |
return 1; | |
} | |
cout << "Building word trie" << endl; | |
auto trie = BuildTrieFromStream(wordList); | |
cout << "Word trie built successfully" << endl; | |
cout << "Type a few characters and " | |
"then press ENTER to see a list " | |
"of possible word completions" << endl; | |
string input; | |
while(true) | |
{ | |
cout << "Enter text: "; | |
string text; | |
cin >> text; | |
if(text.empty()) | |
{ | |
continue; | |
} | |
auto autocomplete = GetAutoCompleteWordList(text, trie.get()); | |
for(const auto& word : autocomplete) | |
{ | |
cout << word << endl; | |
} | |
cout << endl; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment