Wheest · September 27, 2021 20:33
diff --git a/random_letters.py b/random_letters.py
 #!/usr/bin/env python
 import argparse
 import string
 from typing import List
 from collections import Counter
 import numpy as np


 def main(args) -> List[str]:

    """Given a dictionary, and a desired fraction, return a list of random letters
    which at least the desired fraction of words in the dictionary begins with.
    Optionally, set some letters that *must* appear in the list of letters

    :param args:
    :returns: List[str] chosen letters

    """

    # open the dictionary, and calculate the number of times each letter appears
    with open(args.dictionary_path) as f:
        content = f.readlines()

    first_letters = [x[0] for x in content]
    counts_tmp = Counter(first_letters)

    # we only care about lower case ASCII characters
    counts = dict()
    letters = string.ascii_lowercase
    for l in letters:
        counts[l] = counts_tmp[l]

    # create a probability distribution of words starting with a given letter
    counts_v = list(counts.values())
    total = sum(counts_v)
    p_dist = {letters[i]: x / total for i, x in enumerate(counts_v)}

    # Ensure our required letters are in the list
    my_letters = args.letters
    used_p = 0  # how much probabilty we have used
    for l in my_letters:
        used_p += p_dist[l]

    # Randomly sample from the other letters, and stop once we have enough likelihood
    letters_np = np.array([x for x in letters])
    while True:
        if used_p >= args.wordlist_volume:
            break
        l = np.random.choice(letters_np, 1, replace=True, p=list(p_dist.values()))[0]

        if l in my_letters:
            continue
        my_letters.append(l)
        used_p += p_dist[l]

    print(my_letters, used_p)
    my_letters.sort()
    print("Sorted:", my_letters)
    return my_letters


 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="""Given a dictionary, and a desired fraction, return a list of random letters
    which at least the desired fraction of words in the dictionary begins with.
    Optionally, set some letters that *must* appear in the list of letters"""
    )
    parser.add_argument("letters", nargs="+", help="Letters that should be included")
    parser.add_argument(
        "--wordlist_volume",
        type=int,
        default=0.60,
        help="Minimum amount of the wordlist to include",
    )
    parser.add_argument(
        "--dictionary_path",
        type=str,
        default="/usr/share/dict/words",
        help="Path to dictionary file (must be newline separated text file)",
    )
    args = parser.parse_args()
    main(args)
	#!/usr/bin/env python
	import argparse
	import string
	from typing import List
	from collections import Counter
	import numpy as np


	def main(args) -> List[str]:

	"""Given a dictionary, and a desired fraction, return a list of random letters
	which at least the desired fraction of words in the dictionary begins with.
	Optionally, set some letters that must appear in the list of letters

	:param args:
	:returns: List[str] chosen letters

	"""

	# open the dictionary, and calculate the number of times each letter appears
	with open(args.dictionary_path) as f:
	content = f.readlines()

	first_letters = [x[0] for x in content]
	counts_tmp = Counter(first_letters)

	# we only care about lower case ASCII characters
	counts = dict()
	letters = string.ascii_lowercase
	for l in letters:
	counts[l] = counts_tmp[l]

	# create a probability distribution of words starting with a given letter
	counts_v = list(counts.values())
	total = sum(counts_v)
	p_dist = {letters[i]: x / total for i, x in enumerate(counts_v)}

	# Ensure our required letters are in the list
	my_letters = args.letters
	used_p = 0 # how much probabilty we have used
	for l in my_letters:
	used_p += p_dist[l]

	# Randomly sample from the other letters, and stop once we have enough likelihood
	letters_np = np.array([x for x in letters])
	while True:
	if used_p >= args.wordlist_volume:
	break
	l = np.random.choice(letters_np, 1, replace=True, p=list(p_dist.values()))[0]

	if l in my_letters:
	continue
	my_letters.append(l)
	used_p += p_dist[l]

	print(my_letters, used_p)
	my_letters.sort()
	print("Sorted:", my_letters)
	return my_letters


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(
	description="""Given a dictionary, and a desired fraction, return a list of random letters
	which at least the desired fraction of words in the dictionary begins with.
	Optionally, set some letters that must appear in the list of letters"""
	)
	parser.add_argument("letters", nargs="+", help="Letters that should be included")
	parser.add_argument(
	"--wordlist_volume",
	type=int,
	default=0.60,
	help="Minimum amount of the wordlist to include",
	)
	parser.add_argument(
	"--dictionary_path",
	type=str,
	default="/usr/share/dict/words",
	help="Path to dictionary file (must be newline separated text file)",
	)
	args = parser.parse_args()
	main(args)