Skip to content

Instantly share code, notes, and snippets.

@MrNanosh
Last active September 4, 2019 18:52
Show Gist options
  • Save MrNanosh/5ca2d71f8e66381f8c824f376035f6af to your computer and use it in GitHub Desktop.
Save MrNanosh/5ca2d71f8e66381f8c824f376035f6af to your computer and use it in GitHub Desktop.
//makes an array of tokes-> objects and other keywords that arent falsey no upper case
function getTokens(rawString) {
// NB: `.filter(Boolean)` removes any falsy items from an array
return rawString.toLowerCase() //lower case
.split(/[ ,!.";:-]+/) // using a Regex splits array into array indeces that are separated by any of the following: [ ,!.";:-]+
.filter(Boolean) //filter returns an array that is a modified version of this object. only items that evaluate to true using the Boolean conversion function are passed to the new array This means that anything that is false is removed. this is a bit redundant as taking a string and splitting it already encapsulates falsy stuff in ''. Any falsy keywords are caught by error handling. passing an empty string will however result in undefined opposed to getting thru as '' as it normally is falsy. it also filters empty arrays[] but not a weird one `${{}}` which are "[object".
.sort(); //sorts by alphabetical and numberical ascending by default
}
function mostFrequentWord(text) {
let words = getTokens(text); //fitst filter the string
let wordFrequencies = {}; //new empty object
for (let i = 0; i <= words.length; i++) {//iterates through the entire array of words
//control statements to count an object as it appears
if (words[i] in wordFrequencies) {//true if word[i] is a key for object wordFrequencies
wordFrequencies[words[i]]++;//increases the count of word[i]: value
} else {
wordFrequencies[words[i]] = 1;//initializes the count of a word to 1
}
}
let currentMaxKey = Object.keys(wordFrequencies)[0]; //initializes to first key in word frequencies object by taking the index 0 of an array of keys of wordFrequencies. keys function returns an array of keys.
let currentMaxCount = wordFrequencies[currentMaxKey]; //uses the first key to initialize the value of current MaxCount.
for (let word in wordFrequencies) { //allows iteration by aliasing keys in wordFrequencies as word. iterates over those enumerable keys.
if (wordFrequencies[word] > currentMaxCount) {// when greater counts than current record is true things that area equal give prefference to first appearances
currentMaxKey = word;// give currentMaxKey the new record holding key (word with latest greatest count)
currentMaxCount = wordFrequencies[word]; // also record the new max count. This is really unnecessary and can be done at the end of the loop instead of re-assigning every time a new max is found.
}
}
return currentMaxKey; //the function tells you what 'word' in text is seen most. it doesn't count booleans or values that evalueate to false (like the number 0) and it is not case sensitive. anything that is not one of the reggex variables in getTokens counts as a word including but not limited to = * & etc. These 'words' are only interrupted by those reggex chars so repeats that appear consecutively like ***** will count as one word and not many * words. Booleans will appear as words but not things that evaluate to falsy for any reason.
}
mostFrequentWord(`${{}}`);//for testing
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment