Last active
January 18, 2022 18:26
-
-
Save lindeloev/e8358e16b84872a3c1f364ff69de2dd2 to your computer and use it in GitHub Desktop.
Optimal guesses for Wordle
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Use these functions to make smart guesses for Wordle (https://www.powerlanguage.co.uk/wordle/) | |
# find_word() returns words that satisfies the wordle feedback. Start with `possible_words`, i.e., all 5-letter english words. | |
# next_word() returns words that are most likely to result in green letters. | |
# | |
# A pretty good strategy on the next_word() output is to use "pathfinder" for the first two words and "guess" thereafter, picking the first commonly-known word. | |
############# | |
# FUNCTIONS # | |
############# | |
#' Detect the letter sequence that is likely to contain most green letters | |
#' | |
#' @param words A character vector of words | |
#' @param strategy | |
#' * `"guess"`: only optimize for green letters. | |
#' * `"learn"`: optimize for green AND yellow letters. | |
#' @param n How many top hits to show (from best to worse) | |
#' @return A vector of `n` words (best first) | |
next_word = function(words, strategy = "guess", n = 3) { | |
# Get frequency of every letter at every position | |
letter_pos_frequency = do.call(rbind, strsplit(words, "")) |> | |
as.data.frame() |> | |
lapply(table) | |
# Score each word as the sum of words with letters in these positions | |
df_scores = data.frame(word = words, score = 0) | |
wordlength = unique(nchar(words)) | |
stopifnot("All words must have the same length" = length(wordlength) == 1) | |
for (i in seq_len(wordlength)) { | |
letter_i = substr(words, i, i) | |
df_scores$score = df_scores$score + letter_pos_frequency[[i]][letter_i] | |
# Also weight in yellow characters for "learn" strategy, i.e.,correct | |
# characters in the wrong position. | |
if (strategy == "learn") { | |
other_letter_pos_frequency = paste0(substr(words, 1, i-1), substr(words, i+1, wordlength)) |> | |
strsplit("") |> | |
unlist() |> | |
table() | |
# Give identification of yellow letters half the info-weight of green letters. | |
yellow_weight = 0.5 / wordlength | |
df_scores$score = df_scores$score + other_letter_pos_frequency[letter_i] * yellow_weight | |
} | |
} | |
# Return the best guess- and pathfinder words | |
df_ordered = df_scores[order(-df_scores$score), ] | |
if (strategy == "guess") { | |
head(df_ordered$word, n) | |
} else if (strategy == "learn") { | |
only_unique_characters = df_ordered$word |> | |
strsplit("") |> | |
lapply(\(x) length(unique(x)) == length(x)) |> | |
unlist() | |
head(df_ordered$word[only_unique_characters], n) | |
} | |
} | |
#' Find words that fulfill Wordle criteria | |
#' | |
#' @param words Vector of possible words at this step, e.g., `c("goats", "horse")`. | |
#' @param green Green characters in their correct position, e.g., `"s???e"`. | |
#' Write ? where there are no green characters. | |
#' @param grey Gray characters, e.g., `"car"` | |
#' @param yellows Yellow characters in their correct position, e.g., `c("???es", "??i??")`. | |
#' @return A vector of words | |
find_words = function(words, green = "?????", grey = "", yellows = c()) { | |
# GREEN: Keep words matching green letters in their position | |
regex_green = paste0("^", gsub("?", "[a-z]", tolower(green), fixed = TRUE), "$") | |
words_remaining = words[grepl(regex_green, words)] | |
# GREY: Remove words with grey letters | |
if (nchar(grey) > 0) { | |
grey_regex = gsub("(?<=.)(?=.)", "|", tolower(grey), perl = TRUE) # split characters by | | |
words_remaining = words_remaining[!grepl(grey_regex, words_remaining)] | |
} | |
# YELLOW | |
for (yellow in yellows) { | |
letters_i = strsplit(yellow, "") |> unlist() | |
for (letter in letters_i[letters_i != "?"]) { | |
# Yellow letter must not be in the entered position | |
letter_position = which(letters_i == letter) | |
illegal_words = substr(words_remaining, letter_position, letter_position) == letter | |
words_remaining = words_remaining[!illegal_words] | |
# Yellow letter must be present | |
legal_words = grepl(letter, words_remaining) | |
words_remaining = words_remaining[legal_words] | |
} | |
} | |
words_remaining | |
} | |
############ | |
# APPLY IT # | |
############ | |
# Vector of all English words | |
all_words = read.csv("https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt", header = FALSE, col.names = "word")$word | |
possible_words = all_words[nchar(all_words) == 5] | |
# 2022-01-01: Third guess | |
words = find_words(possible_words) | |
next_word(words, "learn") # I guess CARES | |
words = find_words(words, green = "????s", grey = "ca", yellows = c("??re?")) | |
next_word(words, "learn") # I guess TIERS | |
words = find_words(words, green = "????s", grey = "cati", yellows = c("??re?", "??er?")) | |
next_word(words, "guess", n = 100) # I guess REBUS | |
# 2022-01-02: Third guess | |
words = find_words(possible_words) | |
next_word(words, "learn") # I guess CARES | |
words = find_words(words, grey = "care", yellows = c("????s")) | |
next_word(words, "learn") # I guess SOILY | |
words = find_words(words, green = "?o???", grey = "careily", yellows = c("????s", "s????")) | |
next_word(words, "guess", n = 100) # I guess BOOST | |
# 2022-01-03: Fourth guess | |
words = find_words(possible_words) | |
next_word(words, "learn") # I guess CARES | |
words = find_words(words, green = "????s", grey = "cae", yellows = c("??r??")) | |
next_word(words, "learn") # I guess GROTS | |
words = find_words(words, green = "?r??s", grey = "caego", yellows = c("??r??", "???t?")) | |
next_word(words, "guess", n = 100) # I guess TRIMS | |
words = find_words(words, green = "tr??s", grey = "caegoim", yellows = c("??r??", "???t?")) | |
next_word(words, "guess", n = 100) # I guess TRUSS | |
# 2022-01-04: Third guess | |
words = find_words(possible_words) | |
next_word(words, "learn") # I guess CARES | |
words = find_words(words, grey = "car", yellows = c("???es")) | |
next_word(words, "learn") # I guess STILE | |
words = find_words(words, green = "s???e", grey = "cartl", yellows = c("???es", "??i??")) | |
next_word(words, "guess", n = 100) # I guess SIEGE | |
# 2022-01-05: Third guess | |
words = find_words(possible_words) | |
next_word(words, "learn") # I guess CARES | |
words = find_words(words, green = "???e?", grey = "cas", yellow = c("??r??")) | |
next_word(words, "learn") # DOTER | |
words = find_words(words, green = "???er", grey = "casdo", yellow = c("??r??", "??t??")) | |
next_word(words, "guess", n = 100) # TIGER | |
# 2022-01-06: third guess | |
words = find_words(possible_words) | |
next_word(words, "learn") # CARES | |
words = find_words(words, green = "?a???", grey = "cres") | |
next_word(words, "learn") # MANLY | |
words = find_words(words, green = "?an??", grey = "cresmy", yellow = c("???l?")) | |
next_word(words, "guess", n = 100) # BANAL |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment