Created
June 20, 2016 18:09
-
-
Save bencbartlett/01d4c700d3edcd6c1497426a59a8667f to your computer and use it in GitHub Desktop.
Very simple second-order Markov text generator. Train on whatever sources you want.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
normalizeLengths = True # Set the length of the two input sources to be equal. | |
# Generate transition matrix | |
A = "ABCDEFGHIJKLMNOPQRSTUVWXYZ " | |
charnums = len(A) | |
# Read through text and index transition matrix | |
trainingText = " ".join(open("KingJamesBible.txt",'r').read().splitlines()) | |
trainingText2 = " ".join(open("GriffithsQuantumMechanics.txt",'r').read().splitlines()) | |
trainingText = trainingText.split() | |
trainingText2 = trainingText2.split() | |
if normalizeLengths: | |
shorterSize = min(len(trainingText), len(trainingText2)) | |
trainingText = trainingText[0:shorterSize] | |
trainingText2 = trainingText2[0:shorterSize] | |
trainingText = " ".join(trainingText + trainingText2).upper() # Removes double spaces and capitalizes | |
# Remove everything not in alphabet | |
newtrainingText = "" | |
for i in trainingText: | |
if i in A: | |
newtrainingText += i | |
trainingText = newtrainingText.split() # List of all caps words | |
# Building transition dictionary | |
d = {} | |
for i in xrange(len(trainingText)-2): | |
try: | |
if trainingText[i+2] not in d[str(trainingText[i] + " " + trainingText[i+1])]: | |
d[str(trainingText[i] + " " + trainingText[i+1])].append(trainingText[i+2]) | |
except KeyError: | |
d[str(trainingText[i] + " " + trainingText[i+1])] = [trainingText[i+2]] | |
# Starting words | |
text = random.choice(d.keys()).split() # This is an array of words | |
#text = "ACCORDING TO ALL KNOWN LAWS OF AVIATION".split() | |
numWords = 1000 | |
for i in xrange(numWords): | |
key = ' '.join(text[-2:]) | |
text.append(random.choice(d[key])) | |
print ' '.join(text).lower() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment