Created
February 8, 2021 17:09
-
-
Save cacharle/eb4500820019b7085c5a395aee395dff to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/awk -f | |
# Markov chain exercice from chapter 3 (design) of the book The practice of programming. | |
BEGIN { | |
PREFIX_LEN = 2 | |
# need space to detect is the end since other words can't have spaces in them | |
SUFFIX_END = "< END >" | |
# maximum number of words in the output | |
MAX_WORDS = 1000 | |
# words of the input | |
words[0] = "" | |
words_len = 0 | |
} | |
{ | |
for (i = 1; i <= NF; i++) | |
words[words_len++] = $i | |
} | |
# join prefix words with spaces to create a key used in the association table | |
function make_prefix_key(array, _result, _i) | |
{ | |
_result = array[0] | |
for (_i = 1; _i < PREFIX_LEN; _i++) | |
_result = _result " " array[_i] | |
return _result | |
} | |
# shift words in a prefix 1 to the left | |
function shift_prefix(array, _i) | |
{ | |
for (_i = 1; _i < PREFIX_LEN; _i++) | |
array[_i - 1] = array[_i] | |
} | |
# fill prefix array with n elements of words | |
function fill_prefix(array, end, _i) | |
{ | |
for (_i = 0; _i < end; _i++) | |
array[_i] = words[_i] | |
} | |
END { | |
fill_prefix(prefix, PREFIX_LEN - 1) | |
fill_prefix(current_prefix, PREFIX_LEN) | |
for (i = PREFIX_LEN - 1; i < length(words); i++) | |
{ | |
prefix[PREFIX_LEN - 1] = words[i] | |
suffix = (i + 1 != length(words)) ? words[i + 1] : SUFFIX_END | |
key = make_prefix_key(prefix) | |
if (key in states) | |
states[key][length(states[key])] = suffix | |
else | |
states[key][0] = suffix | |
shift_prefix(prefix) | |
} | |
srand() | |
for (i in current_prefix) | |
{ | |
printf current_prefix[i] " " | |
MAX_WORDS-- | |
} | |
for (i = 0; i < MAX_WORDS; i++) | |
{ | |
key = make_prefix_key(current_prefix) | |
state_pick = int(rand() * length(states[key])) | |
suffix = states[key][state_pick] | |
shift_prefix(current_prefix) | |
current_prefix[PREFIX_LEN - 1] = suffix | |
if (suffix == SUFFIX_END) | |
break | |
printf suffix " " | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment