rossgoodwin · May 23, 2019 06:36
diff --git a/generate.py b/generate.py
 #!/usr/bin/env python
 # coding: utf-8

 # In[1]:


 import json
 import os
 import numpy as np
 import tensorflow as tf
 import model, sample, encoder

 from sys import argv, stdin


 # In[2]:


 # !ln -s ../models models # hack to make models "appear" in two places


 # In[2]:

 EXP_NAME, TEMP_STR = argv
 SEED = stdin.read()


 model_name = 'poet3'
 seed = None
 nsamples = 32
 batch_size = 32
 length = None
 temperature = float(TEMP_STR) # 0 is deterministic
 top_k = 0 # 0 means no restrictions

 assert nsamples % batch_size == 0

 enc = encoder.get_encoder(model_name)
 hparams = model.default_hparams()
 with open(os.path.join('models', model_name, 'hparams.json')) as f:
    hparams.override_from_dict(json.load(f))

 if length is None:
    length = hparams.n_ctx // 2
 elif length > hparams.n_ctx:
    raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)


 # In[3]:


 sess = tf.InteractiveSession()

 # replace with this in script:
 # with tf.Session(graph=tf.Graph()) as sess:


 # In[4]:


 context = tf.placeholder(tf.int32, [batch_size, None])
 np.random.seed(seed)
 tf.set_random_seed(seed)
 output = sample.sample_sequence(
    hparams=hparams, length=length,
    context=context,
    batch_size=batch_size,
    temperature=temperature, top_k=top_k
 )

 saver = tf.train.Saver()
 ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
 saver.restore(sess, ckpt)


 # In[9]:


 from utils.list_all_files import *
 import unicodedata
 import os, re, random

 mapping = {
 '\xa0': ' ',
 'Æ': 'AE',
 'æ': 'ae',
 'è': 'e',
 'é': 'e',
 'ë': 'e',
 'ö': 'o',
 '–': '-',
 '—': '-',
 '‘': "'",
 '’': "'",
 '“': '"',
 '”': '"'
 }

 def remove_special(text):
    return ''.join([mapping[e] if e in mapping else e for e in text])

 def strip_word(word):
    word = re.sub(r'^\W*|\W*$', '', word).lower()
    return word

 # basenames = []
 # all_poems = {}
 # total_lines = 0
 # words = set()
 # for fn in list_all_files('../../scraping/poetry/output'):
 #     with open(fn) as f:
 #         original = open(fn).read()
 #         text = remove_special(original).split('\n')
 #         poem = text[3:]
 #         basename = os.path.basename(fn)
 #         basename = os.path.splitext(basename)[0]
 #         basenames.append(basename)
 #         all_poems[basename] = {
 #             'url': text[0],
 #             'title': text[1],
 #             'author': text[2],
 #             'poem': poem
 #         }
 #         total_lines += len(poem)
 #         poem = '\n'.join(poem)
 #         words.update([strip_word(e) for e in poem.split()])
 # words.remove('')
 # words = list(words)
        
 # print(total_lines)


 # In[10]:


 def titlecase_word(word):
    return word[0].upper() + word[1:]

 # titlecase_word("carpenter's"), "carpenter's".title()


 # In[11]:


 def random_chunk(array, length):
    start = random.randint(0, max(0, len(array) - length - 1))
    return array[start:start+length]

 def random_item(array):
    return array[random.randint(0, len(array) - 1)]

 # random_chunk(all_poems[basenames[0]]['poem'], 2), titlecase_word(random_item(words))


 # In[12]:


 # seeds = '''
 # blue
 # epoch
 # ethereal
 # ineffable
 # iridescent
 # nefarious
 # oblivion
 # quiver
 # solitude
 # sonorous
 # '''.split()
 # len(seeds)


 # In[13]:


 #from utils.progress import progress


 # In[26]:


 def clean(text):
    return text.split('<|endoftext|>')[0]

 def generate(raw_text):
    # inspiration = remove_special(inspiration).strip()
    # seed = titlecase_word(seed).strip()

    # raw_text = inspiration + '\n' + seed
    context_tokens = enc.encode(raw_text)
    n_context = len(context_tokens)

    results = []
    for _ in range(nsamples // batch_size):
        out = sess.run(output, feed_dict={
            context: [context_tokens for _ in range(batch_size)]
        })
        for sample in out:
            text = enc.decode(sample[n_context:])
            result = raw_text + text
            results.append(result)

            print( '\n'*3+'='*64+'\n'*3 )
            print( result )
    
    return results


 if __name__ == '__main__':
    
    res_list = generate(SEED)
	#!/usr/bin/env python
	# coding: utf-8

	# In[1]:


	import json
	import os
	import numpy as np
	import tensorflow as tf
	import model, sample, encoder

	from sys import argv, stdin


	# In[2]:


	# !ln -s ../models models # hack to make models "appear" in two places


	# In[2]:

	EXP_NAME, TEMP_STR = argv
	SEED = stdin.read()


	model_name = 'poet3'
	seed = None
	nsamples = 32
	batch_size = 32
	length = None
	temperature = float(TEMP_STR) # 0 is deterministic
	top_k = 0 # 0 means no restrictions

	assert nsamples % batch_size == 0

	enc = encoder.get_encoder(model_name)
	hparams = model.default_hparams()
	with open(os.path.join('models', model_name, 'hparams.json')) as f:
	hparams.override_from_dict(json.load(f))

	if length is None:
	length = hparams.n_ctx // 2
	elif length > hparams.n_ctx:
	raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)


	# In[3]:


	sess = tf.InteractiveSession()

	# replace with this in script:
	# with tf.Session(graph=tf.Graph()) as sess:


	# In[4]:


	context = tf.placeholder(tf.int32, [batch_size, None])
	np.random.seed(seed)
	tf.set_random_seed(seed)
	output = sample.sample_sequence(
	hparams=hparams, length=length,
	context=context,
	batch_size=batch_size,
	temperature=temperature, top_k=top_k
	)

	saver = tf.train.Saver()
	ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
	saver.restore(sess, ckpt)


	# In[9]:


	from utils.list_all_files import *
	import unicodedata
	import os, re, random

	mapping = {
	'\xa0': ' ',
	'Æ': 'AE',
	'æ': 'ae',
	'è': 'e',
	'é': 'e',
	'ë': 'e',
	'ö': 'o',
	'–': '-',
	'—': '-',
	'‘': "'",
	'’': "'",
	'“': '"',
	'”': '"'
	}

	def remove_special(text):
	return ''.join([mapping[e] if e in mapping else e for e in text])

	def strip_word(word):
	word = re.sub(r'^\W\|\W$', '', word).lower()
	return word

	# basenames = []
	# all_poems = {}
	# total_lines = 0
	# words = set()
	# for fn in list_all_files('../../scraping/poetry/output'):
	# with open(fn) as f:
	# original = open(fn).read()
	# text = remove_special(original).split('\n')
	# poem = text[3:]
	# basename = os.path.basename(fn)
	# basename = os.path.splitext(basename)[0]
	# basenames.append(basename)
	# all_poems[basename] = {
	# 'url': text[0],
	# 'title': text[1],
	# 'author': text[2],
	# 'poem': poem
	# }
	# total_lines += len(poem)
	# poem = '\n'.join(poem)
	# words.update([strip_word(e) for e in poem.split()])
	# words.remove('')
	# words = list(words)

	# print(total_lines)


	# In[10]:


	def titlecase_word(word):
	return word[0].upper() + word[1:]

	# titlecase_word("carpenter's"), "carpenter's".title()


	# In[11]:


	def random_chunk(array, length):
	start = random.randint(0, max(0, len(array) - length - 1))
	return array[start:start+length]

	def random_item(array):
	return array[random.randint(0, len(array) - 1)]

	# random_chunk(all_poems[basenames[0]]['poem'], 2), titlecase_word(random_item(words))


	# In[12]:


	# seeds = '''
	# blue
	# epoch
	# ethereal
	# ineffable
	# iridescent
	# nefarious
	# oblivion
	# quiver
	# solitude
	# sonorous
	# '''.split()
	# len(seeds)


	# In[13]:


	#from utils.progress import progress


	# In[26]:


	def clean(text):
	return text.split('<\|endoftext\|>')[0]

	def generate(raw_text):
	# inspiration = remove_special(inspiration).strip()
	# seed = titlecase_word(seed).strip()

	# raw_text = inspiration + '\n' + seed
	context_tokens = enc.encode(raw_text)
	n_context = len(context_tokens)

	results = []
	for _ in range(nsamples // batch_size):
	out = sess.run(output, feed_dict={
	context: [context_tokens for _ in range(batch_size)]
	})
	for sample in out:
	text = enc.decode(sample[n_context:])
	result = raw_text + text
	results.append(result)

	print( '\n'3+'='64+'\n'*3 )
	print( result )

	return results


	if __name__ == '__main__':

	res_list = generate(SEED)