Last active
November 9, 2024 20:14
-
-
Save sgeos/fc7eb786ed9cbcad9c0d91b248a1f229 to your computer and use it in GitHub Desktop.
Shell script to get N most words given text input and desired number of words. Redirected input and output verbosity supported.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env sh | |
# script flags | |
VERBOSE=true | |
# default inputs | |
DEFAULT_TEXT="The quick brown fox jumps over the lazy dog. Yes, the quick brown fox." | |
DEFAULT_COUNT=10 | |
# check if input is redirected | |
if [ ! -t 0 ]; then | |
VERBOSE=${2:-false} # do not echo piped text unless requested | |
TEXT=$(cat) # piped input from stdin | |
COUNT=$(expr 0 + ${1:-} 2>/dev/null || :;) # null string if first paramter is not a number | |
else | |
# parse command line inputs | |
TEXT=${1:-$DEFAULT_TEXT} # default text if first parameter is missing | |
COUNT=$(expr 0 + ${2:-} 2>/dev/null || :;) # null string if second paramter is not a number | |
VERBOSE=${3:-$VERBOSE} # update verbose flag given third parameter | |
fi | |
COUNT=${COUNT:-$DEFAULT_COUNT} # default value if count is unset or invalid | |
# verbose output | |
if [ "true" = "${VERBOSE}" ]; then | |
echo "TEXT = \"${TEXT}\"" | |
echo "COUNT = ${COUNT}" | |
echo "" | |
echo "RESULT" | |
fi | |
# process output top words by frequency and alphabetical order | |
echo "${TEXT}" | `# initial TEXT` \ | |
tr '[:upper:]' '[:lower:]' | `# use lower case` \ | |
tr -cd '[:alnum:][:blank:]' | `# strip non-alphanumerics` \ | |
tr -s '[:space:]' | `# squash excess whitespace` \ | |
tr -c '[:alnum:]' '[\n*]' | `# put each word on a line` \ | |
sort | `# sort words to facilitate counting` \ | |
uniq -ci | `# get unique counts` \ | |
sort -k1,1nr -k2 | `# frequency order, alpha order on tie` \ | |
head -"${COUNT}" `# take only requested COUNT results` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment