ssophwang · March 25, 2017 15:24
diff --git a/Poet.py b/Poet.py
 import re, random
 import numpy as np
 import json
 poem_files = ['pg17192.txt', 'Where the Sidewalk Ends by Shel Silverstein_djvu.txt']
 poem_lines = []
 firstword_count_dict = {}
 firstword_prob_dict = {}

 for poem_file in poem_files:
    f = open(poem_file, 'r')

    for line in f:
        if len(line) > 1:
            words = re.findall('\w+', line)
            if len(words) > 1:
                poem_lines.append(words)
    f.close()

 #print poem_lines

 unicorns = set()
 words = []
 last = 0
 lines = 0
 for line in poem_lines:
    lines += 1
    if len(line) >= 1 and line[-1] == 'weary':
        last += 1
    for word in line:
        unicorns.add(word)
        words.append(word)

 # firstword_counts = {}
 #
 # for line in poem_lines:
 #     if len(line) > 2:
 #         first_word = line[0]
 #
 #         if first_word in firstword_counts:
 #             firstword_counts[first_word] += 1
 #         else:
 #             firstword_counts[first_word] = 1
 #
 # print firstword_counts
 # words = words.replace(',', '').replace('.', ' ')
 #
 # word_states = re.findall('\w+', words)
 #
 # print word_states
 # print len(set(word_states))
 #
 # counts_dict = {}
 #
 # for i in range(len(word_states)-1):
 #     first_word = word_states[i]
 #     next_word = word_states[i+1]
 #
 #     if (first_word, next_word) in counts_dict:
 #         counts_dict[(first_word,next_word)] += 1
 #     else:
 #         counts_dict[(first_word,next_word)] = 1
 #
 # transition_probabilities = {}
 # s = sum(counts_dict.values())
 #
 #
 # for key in counts_dict:
 #     transition_probabilities[key] = float(counts_dict[key])/s
 # print transition_probabilities
 # #    float(counts_dict.keys())/s
 # #for x in range(10):
 # #    sentence = ''
 # #    for i in range(10):
 # #        word = results[random.randint(0, len(results) - 1)]
 # #        sentence += ' ' + word
 # #    print sentence

 # next_word_counts_dict = {}
 # for word_list in poem_lines:
 #     word_list.append('\n')
 #     for i in range(len(word_list) - 1):
 #         thisword = word_list[i]
 #         nextword = word_list[i+1]
 #
 #         if thisword not in next_word_counts_dict:
 #             next_word_counts_dict[thisword] = {nextword:1}
 #         else:
 #             if nextword not in next_word_counts_dict[thisword]:
 #                 next_word_counts_dict[thisword][nextword] = 1
 #             else:
 #                 next_word_counts_dict[thisword][nextword] += 1

 for line in poem_lines:
    if len(line) > 0:
        first_word = line[0]

        if first_word in firstword_count_dict:
            firstword_count_dict[first_word] += 1
        else:
            firstword_count_dict[first_word] = 1

 count_sum = sum(firstword_count_dict.values())

 for first_word in firstword_count_dict:
    firstword_prob_dict[first_word] = firstword_count_dict[first_word]/float(count_sum)

 print firstword_prob_dict

 next_word_counts_dict = {}
 for word_list in poem_lines:
    for i in range(len(word_list)):
        thisword = word_list[i]
        if i < len(word_list)-1:
            nextword = word_list[i+1]
        else:
            nextword = '\n'

        if thisword not in next_word_counts_dict:
            next_word_counts_dict[thisword] = {nextword:1}
        else:
            if nextword not in next_word_counts_dict[thisword]:
                next_word_counts_dict[thisword][nextword] = 1
            else:
                next_word_counts_dict[thisword][nextword] += 1

 for thisword in next_word_counts_dict:
    count_sum = sum(next_word_counts_dict[thisword].values())
    for nextword in next_word_counts_dict[thisword]:
        old_next_word_count = next_word_counts_dict[thisword][nextword]
        next_word_counts_dict[thisword][nextword] = (float(next_word_counts_dict[thisword][nextword])/count_sum)

 first_states = []
 pp = []

 for key in firstword_prob_dict:
    first_states.append(key)
    pp.append(firstword_prob_dict[key])
 print first_states
 print pp

 #for i in range(len(next_word_counts_dict.items)):
 #print next_word_counts_dict.items()

 for x in range(1000):
    state = np.random.choice(first_states, p = pp)
    sentence = [state]
    for i in range(9):
        next_states = []
        p = []
        if state in next_word_counts_dict:
            for pair in next_word_counts_dict[state].items():
                next_states.append(pair[0])
                p.append(pair[1])
            state = np.random.choice(next_states, p = p)
            sentence.append(state)
    print ' '.join([w for w in sentence if w != '\n'])



 #for word in next_word_counts_dict:
 #    print next_word_counts_dict[word].items()
 #   np.random.choice(word, p=)
 # word, ':', next_word_counts_dict[word]
 print len(next_word_counts_dict)

 output_file = open('poem_model.json', "w")
 output_file.write(json.dumps(next_word_counts_dict))
 output_file.close()

 output_file2 = open('poem_model_firstword.json', "w")
 output_file2.write(json.dumps(firstword_prob_dict))
 output_file2.close()
	import re, random
	import numpy as np
	import json
	poem_files = ['pg17192.txt', 'Where the Sidewalk Ends by Shel Silverstein_djvu.txt']
	poem_lines = []
	firstword_count_dict = {}
	firstword_prob_dict = {}

	for poem_file in poem_files:
	f = open(poem_file, 'r')

	for line in f:
	if len(line) > 1:
	words = re.findall('\w+', line)
	if len(words) > 1:
	poem_lines.append(words)
	f.close()

	#print poem_lines

	unicorns = set()
	words = []
	last = 0
	lines = 0
	for line in poem_lines:
	lines += 1
	if len(line) >= 1 and line[-1] == 'weary':
	last += 1
	for word in line:
	unicorns.add(word)
	words.append(word)

	# firstword_counts = {}
	#
	# for line in poem_lines:
	# if len(line) > 2:
	# first_word = line[0]
	#
	# if first_word in firstword_counts:
	# firstword_counts[first_word] += 1
	# else:
	# firstword_counts[first_word] = 1
	#
	# print firstword_counts
	# words = words.replace(',', '').replace('.', ' ')
	#
	# word_states = re.findall('\w+', words)
	#
	# print word_states
	# print len(set(word_states))
	#
	# counts_dict = {}
	#
	# for i in range(len(word_states)-1):
	# first_word = word_states[i]
	# next_word = word_states[i+1]
	#
	# if (first_word, next_word) in counts_dict:
	# counts_dict[(first_word,next_word)] += 1
	# else:
	# counts_dict[(first_word,next_word)] = 1
	#
	# transition_probabilities = {}
	# s = sum(counts_dict.values())
	#
	#
	# for key in counts_dict:
	# transition_probabilities[key] = float(counts_dict[key])/s
	# print transition_probabilities
	# # float(counts_dict.keys())/s
	# #for x in range(10):
	# # sentence = ''
	# # for i in range(10):
	# # word = results[random.randint(0, len(results) - 1)]
	# # sentence += ' ' + word
	# # print sentence

	# next_word_counts_dict = {}
	# for word_list in poem_lines:
	# word_list.append('\n')
	# for i in range(len(word_list) - 1):
	# thisword = word_list[i]
	# nextword = word_list[i+1]
	#
	# if thisword not in next_word_counts_dict:
	# next_word_counts_dict[thisword] = {nextword:1}
	# else:
	# if nextword not in next_word_counts_dict[thisword]:
	# next_word_counts_dict[thisword][nextword] = 1
	# else:
	# next_word_counts_dict[thisword][nextword] += 1

	for line in poem_lines:
	if len(line) > 0:
	first_word = line[0]

	if first_word in firstword_count_dict:
	firstword_count_dict[first_word] += 1
	else:
	firstword_count_dict[first_word] = 1

	count_sum = sum(firstword_count_dict.values())

	for first_word in firstword_count_dict:
	firstword_prob_dict[first_word] = firstword_count_dict[first_word]/float(count_sum)

	print firstword_prob_dict

	next_word_counts_dict = {}
	for word_list in poem_lines:
	for i in range(len(word_list)):
	thisword = word_list[i]
	if i < len(word_list)-1:
	nextword = word_list[i+1]
	else:
	nextword = '\n'

	if thisword not in next_word_counts_dict:
	next_word_counts_dict[thisword] = {nextword:1}
	else:
	if nextword not in next_word_counts_dict[thisword]:
	next_word_counts_dict[thisword][nextword] = 1
	else:
	next_word_counts_dict[thisword][nextword] += 1

	for thisword in next_word_counts_dict:
	count_sum = sum(next_word_counts_dict[thisword].values())
	for nextword in next_word_counts_dict[thisword]:
	old_next_word_count = next_word_counts_dict[thisword][nextword]
	next_word_counts_dict[thisword][nextword] = (float(next_word_counts_dict[thisword][nextword])/count_sum)

	first_states = []
	pp = []

	for key in firstword_prob_dict:
	first_states.append(key)
	pp.append(firstword_prob_dict[key])
	print first_states
	print pp

	#for i in range(len(next_word_counts_dict.items)):
	#print next_word_counts_dict.items()

	for x in range(1000):
	state = np.random.choice(first_states, p = pp)
	sentence = [state]
	for i in range(9):
	next_states = []
	p = []
	if state in next_word_counts_dict:
	for pair in next_word_counts_dict[state].items():
	next_states.append(pair[0])
	p.append(pair[1])
	state = np.random.choice(next_states, p = p)
	sentence.append(state)
	print ' '.join([w for w in sentence if w != '\n'])



	#for word in next_word_counts_dict:
	# print next_word_counts_dict[word].items()
	# np.random.choice(word, p=)
	# word, ':', next_word_counts_dict[word]
	print len(next_word_counts_dict)

	output_file = open('poem_model.json', "w")
	output_file.write(json.dumps(next_word_counts_dict))
	output_file.close()

	output_file2 = open('poem_model_firstword.json', "w")
	output_file2.write(json.dumps(firstword_prob_dict))
	output_file2.close()