Created
November 25, 2023 18:34
-
-
Save BenjaminRosell/2ca74152ce7298f844c59ee922a37b7d to your computer and use it in GitHub Desktop.
Final Project
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from english_words import get_english_words_set | |
import click | |
import matplotlib.pyplot as plt | |
from itertools import product | |
class Sorcerer: | |
def generate_corpus(self, count): | |
corpus = {} | |
for i in range(count): | |
corpus[i + 1] = [] | |
return corpus | |
def prepare_corpus(self): | |
words = get_english_words_set(['web2'], lower=True) | |
if self.debug: | |
print('There are ' + str(len(words)) + ' words') | |
corpus = self.generate_corpus(28) | |
longest = 0 | |
for word in words: | |
count = len(word) | |
corpus[count].append(word) | |
if count > longest: | |
longest = count | |
longest_word = word | |
if self.debug: | |
print('The longest word is ' + longest_word) | |
print('It\'s lenght is ' + str(longest)) | |
return corpus | |
def describe_corpus(self): | |
counts = [len(list) for i, list in self.corpus.items()] | |
container = plt.bar(range(1, len(counts) + 1), counts) | |
plt.bar_label(container, counts, label_type='edge') | |
plt.axis('on') | |
plt.show() | |
def get_keyboard(self): | |
return { | |
0: ['q', 'w', 'e', 'r', 't', 'y', 'u', 'i', 'o', 'p'], | |
1: ['a', 's', 'd', 'f', 'g', 'h', 'j', 'k', 'l'], | |
2: ['z', 'x', 'c', 'v', 'b', 'n', 'm'] | |
} | |
def find_element_in_keyboard(self, element): | |
for key, value_list in self.keyboard.items(): | |
if element in value_list: | |
return key, value_list.index(element) | |
return None, None | |
def word_to_keyboard(self, word): | |
return [self.find_element_in_keyboard(letter) for letter in word] | |
def keyboard_to_letter(self, letter): | |
row, column = letter | |
return self.keyboard.get(row)[column] | |
def keyboard_to_word(self, word): | |
return ''.join([self.keyboard_to_letter(letter) for letter in word]) | |
def get_permutations(self, position): | |
row, column = position | |
possible_rows = [row - 1, row, row + 1] | |
possible_columns = [column - 1, column, column + 1] | |
# removing impossible values | |
possible_rows = [item for item in possible_rows if 0 <= item <= 2] | |
possible_columns = [item for item in possible_columns if 0 <= item <= 9] | |
possible_permutations = list(product(possible_rows, possible_columns)) | |
if possible_columns[len(possible_columns) - 1] >= 6: | |
possible_permutations = [p for p in possible_permutations if self.validate_permutation(p) is not None] | |
return possible_permutations | |
def generate_misspells(self): | |
positions = self.word_to_keyboard(self.word) | |
permutations = [] | |
for position in positions: | |
permutations.append(self.get_permutations(position)) | |
all_combinations = [] | |
for i in range(len(positions)): | |
for variation in permutations[i]: | |
# Create a new list with the ith element replaced by its variation | |
new_combination = positions[:i] + [variation] + positions[i + 1:] | |
all_combinations.append(new_combination) | |
return all_combinations | |
def generate_swaps(self): | |
all_combinations = [] | |
positions = self.word_to_keyboard(self.word) | |
for i in range(len(positions) - 1): | |
# Swap adjacent elements | |
positions[i], positions[i + 1] = positions[i + 1], positions[i] | |
all_combinations.append(positions.copy()) | |
# Swap back to restore original list | |
positions[i], positions[i + 1] = positions[i + 1], positions[i] | |
return all_combinations | |
def generate_variants(self): | |
misspells = self.generate_misspells() | |
swaps = self.generate_swaps() | |
variants = misspells + swaps | |
# Use a set to keep track of unique lists (converted to tuples of tuples) | |
seen = set() | |
# Store the unique lists here | |
unique_lists = [] | |
for lst in variants: | |
# Convert the list of tuples to a tuple of tuples | |
tuple_version = tuple(lst) | |
# If this tuple of tuples is not in the set, it's unique | |
if tuple_version not in seen: | |
seen.add(tuple_version) | |
unique_lists.append(lst) | |
return unique_lists | |
def predict(self, word): | |
self.word = word | |
variants = self.generate_variants() | |
suggestions = [] | |
for variant in variants: | |
suggestions.append((self.keyboard_to_word(variant), self.get_suggestions(len(word), variant))) | |
if self.debug: | |
for variant, suggestion_list in suggestions: | |
print("The word you typed is: {}".format(word)) | |
print("Calculating suggestions for variant: {}".format(variant)) | |
print("The best 10 suggestions are: ") | |
for suggestion, distance in suggestion_list: | |
print("The word {}, with a total distance of {}".format(suggestion, distance)) | |
self.display_results(suggestions) | |
def __init__(self, debug): | |
self.word = None | |
self.debug = debug | |
self.keyboard = self.get_keyboard() | |
self.corpus = self.prepare_corpus() | |
if self.debug: | |
self.describe_corpus() | |
def validate_permutation(self, permutation): | |
row, column = permutation | |
try: | |
return self.keyboard.get(row)[column] | |
except IndexError: | |
return None | |
def get_suggestions(self, count, variant): | |
corpus = self.corpus[count] | |
distances = [] | |
for word in corpus: | |
distances.append(sum(self.calculate_distance(word, variant))) | |
sorted_with_index = sorted(enumerate(distances), key=lambda x: x[1]) | |
return [(corpus[index], element) for index, element in sorted_with_index[:5]] | |
def calculate_distance(self, word, variant): | |
encoded_word = self.word_to_keyboard(word) | |
return [sum((abs(a - c), abs(b - d))) for (a, b), (c, d) in zip(encoded_word, variant)] | |
def display_results(self, suggestions): | |
# Flatten the structure | |
flattened_data = [(word, variant, distance) for word, data in suggestions for variant, distance in data] | |
# Sort by distance | |
flattened_data.sort(key=lambda x: x[2]) | |
# Display the results | |
for word, variant, distance in flattened_data[:10]: | |
#print(f"Word: {word}, Variant: {variant}, Distance: {distance}") | |
if distance <= 1: | |
print(f"From the input: {word}, our guess is: {variant}, Distance: {distance}") | |
@click.command() | |
@click.option("--word", "-w", prompt="What is your word ? ") | |
# @click.option("--word", "-w", default='tesla') | |
@click.option("--debug", "-d", default=False) | |
def main(word, debug): | |
sorcerer = Sorcerer(debug) | |
sorcerer.predict(word) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment