ajbrock · December 22, 2016 01:29
diff --git a/Unrolled GAN Toy Example b/Unrolled GAN Toy Example
 ## Unrolled GAN
 # A Brock, 2016
 # This code implements the toy experiment for unrolled GANs.

 # TODO: Make shared variables and reduce the memory transfer overhead

 # Imports


 import numpy as np
 from numpy import pi
 import theano
 import theano.tensor as T
 from theano.sandbox.rng_mrg import MRG_RandomStreams
 import lasagne
 import lasagne.layers as ll

 from pypr.clustering.gmm import sample_gaussian_mixture as GMM

 import matplotlib.pyplot as plt
 # Todo: add import gaussian mixture

 batch_size = 150
 batch_index = T.iscalar('batch_index')
 batch_slice = slice(batch_index*batch_size, (batch_index+1)*batch_size)
 # Set up generator and fixed random seeds
 # rng_data = np.random.RandomState(args.seed_data)
 rng = np.random.RandomState(42)
 theano_rng = MRG_RandomStreams(rng.randint(2 ** 15))
 lasagne.random.set_rng(np.random.RandomState(rng.randint(2 ** 15)))

 # load CIFAR-10


 # specify generative model
 noise_dim = (batch_size, 256)
 noise = theano_rng.normal(size=noise_dim)
 G = [ll.InputLayer(shape=noise_dim, input_var=noise)]
 G+= [ll.DenseLayer(incoming = G[-1],
                   num_units = 128,
                   W=lasagne.init.Orthogonal(0.8),
                   b=lasagne.init.Constant(0.), 
                   nonlinearity=lasagne.nonlinearities.rectify,
                   name='G1')]
 G+= [ll.DenseLayer(incoming = G[-1],
                   num_units = 128,
                   W=lasagne.init.Orthogonal(0.8),
                   b=lasagne.init.Constant(0.), 
                   nonlinearity=lasagne.nonlinearities.rectify,
                   name='G2')]
 G+= [ll.DenseLayer(incoming = G[-1],
                   num_units = 2,
                   W=lasagne.init.Orthogonal(0.8),
                   b=lasagne.init.Constant(0.), 
                   nonlinearity=None,
                   name='G_out')]
                   
 D = [ll.InputLayer(shape=(None,2))]
 D+= [ll.DenseLayer(incoming = D[-1],
                   num_units = 128,
                   W=lasagne.init.Orthogonal(0.8),
                   b=lasagne.init.Constant(0.), 
                   nonlinearity=lasagne.nonlinearities.rectify,
                   name='D1')]
 D+= [ll.DenseLayer(incoming = D[-1],
                   num_units = 1,
                   W=lasagne.init.Orthogonal(0.8),
                   b=lasagne.init.Constant(0.), 
                   nonlinearity=lasagne.nonlinearities.sigmoid,
                   name='DO')]

 # Variables
 X = T.TensorType('float32', [False]*2)('X')
 X_shared = lasagne.utils.shared_empty(2, dtype='float32')


 # Outputs
 Xh = ll.get_output(G[-1]) # G output
 p_X = ll.get_output(D[-1],X/4) # D(X)
 p_Xh = ll.get_output(D[-1],Xh/4) # D(G(Z))

 # Params
 learning_rate = 1e-4;
 beta1 = 0.5;
                 
 L_Dg = T.nnet.binary_crossentropy(T.clip( p_Xh , 1e-7, 1.0 - 1e-7), T.zeros(p_Xh.shape))
 L_Dd = T.nnet.binary_crossentropy(T.clip( p_X , 1e-7, 1.0 - 1e-7), T.ones(p_X.shape))
 L_G1 = T.nnet.binary_crossentropy(T.clip( p_Xh, 1e-7, 1.0 - 1e-7), T.ones(p_Xh.shape))   
 # Get D Updates for use in unrolling
 D_updates = lasagne.updates.adam(T.mean(L_Dg+L_Dd),ll.get_all_params(D[-1],trainable=True),learning_rate,beta1=beta1)

 # Get L_G for second step
 # L_G = theano.clone(L_G1,replace = D_updates)
 def fprop(X,Xh):
    # p_X = ll.get_output(D[-1],X) # D(X)
    # p_Xh = ll.get_output(D[-1],Xh) # D(G(Z))
    # L_Dg = T.nnet.binary_crossentropy(T.clip( p_Xh , 1e-7, 1.0 - 1e-7), T.zeros(p_Xh.shape))
    # L_Dd = T.nnet.binary_crossentropy(T.clip( p_X , 1e-7, 1.0 - 1e-7), T.ones(p_X.shape))
    # lasagne.updates.adam(T.mean(L_Dg+L_Dd),ll.get_all_params(D[-1],trainable=True),learning_rate,beta1=beta1)

    # return replace = D_updates
    return [L_G1, D_updates]

 # Maybe...
 print('Building G graph...')

 values,updates = theano.scan(fprop,n_steps=10,non_sequences=[X,Xh])
 # Scan and go through all 10 D updates
 # losses = scan...
 # l_G = values[-1]
 L_G = theano.clone(L_G1,replace = updates)
 G_updates = lasagne.updates.adam(T.mean(L_G),ll.get_all_params(G[-1],trainable=True),learning_rate,beta1=beta1)      

 print('Compiling Discriminator Function...')
 Dfn = theano.function([batch_index],T.grad(T.mean(L_G1),Xh),updates=D_updates,givens={X: X_shared[batch_slice]})
 Dgd = theano.function([X],L_Dd)
 print('Compiling Generator Function...')
 Gfn = theano.function([batch_index],Xh,updates=G_updates,givens={X: X_shared[batch_slice]})

 # Define the gaussian mixture
 r = 2
 thetas = np.linspace(0,2*pi-pi/4,8)
 means = np.asarray([r*np.cos(thetas),r*np.sin(thetas)]).transpose()
 variances = [np.diag(0.02*np.ones(2))]*8

 # Contour Plot Limits and Resolution
 delt = 0.1
 axlim = 4.5
 [Xx,Yy] = np.meshgrid(np.arange(-axlim,axlim,delt),np.arange(-axlim,axlim,delt))

 print 'running...'

 batch_index = 0
 num_batches = 50

 X_shared.set_value(np.float32(GMM(centroids=means,ccov = variances,samples = batch_size*num_batches)))

 for i in range(25000):
    
    if not batch_index % num_batches:
        batch_index = 0
        X_shared.set_value(np.float32(GMM(centroids=means,ccov = variances,samples = batch_size*num_batches)))
    
    
    if i%2:
        grads = Dfn(batch_index)
        batch_index += 1
    else:
        Xs = Gfn(batch_index)
    if not i%20:
        plt.scatter(Xs[:,0],Xs[:,1])
        gds = Dgd(np.float32(np.asarray([Xx.flatten(),Yy.flatten()])).transpose())

        plt.contour(Xx,Yy,np.reshape(gds,np.shape(Xx)))
        plt.axis([-axlim,axlim,-axlim,axlim])
        plt.title('Step # '+str(i))
        plt.pause(0.000001)
        plt.clf()
	## Unrolled GAN
	# A Brock, 2016
	# This code implements the toy experiment for unrolled GANs.

	# TODO: Make shared variables and reduce the memory transfer overhead

	# Imports


	import numpy as np
	from numpy import pi
	import theano
	import theano.tensor as T
	from theano.sandbox.rng_mrg import MRG_RandomStreams
	import lasagne
	import lasagne.layers as ll

	from pypr.clustering.gmm import sample_gaussian_mixture as GMM

	import matplotlib.pyplot as plt
	# Todo: add import gaussian mixture

	batch_size = 150
	batch_index = T.iscalar('batch_index')
	batch_slice = slice(batch_indexbatch_size, (batch_index+1)batch_size)
	# Set up generator and fixed random seeds
	# rng_data = np.random.RandomState(args.seed_data)
	rng = np.random.RandomState(42)
	theano_rng = MRG_RandomStreams(rng.randint(2 ** 15))
	lasagne.random.set_rng(np.random.RandomState(rng.randint(2 ** 15)))

	# load CIFAR-10


	# specify generative model
	noise_dim = (batch_size, 256)
	noise = theano_rng.normal(size=noise_dim)
	G = [ll.InputLayer(shape=noise_dim, input_var=noise)]
	G+= [ll.DenseLayer(incoming = G[-1],
	num_units = 128,
	W=lasagne.init.Orthogonal(0.8),
	b=lasagne.init.Constant(0.),
	nonlinearity=lasagne.nonlinearities.rectify,
	name='G1')]
	G+= [ll.DenseLayer(incoming = G[-1],
	num_units = 128,
	W=lasagne.init.Orthogonal(0.8),
	b=lasagne.init.Constant(0.),
	nonlinearity=lasagne.nonlinearities.rectify,
	name='G2')]
	G+= [ll.DenseLayer(incoming = G[-1],
	num_units = 2,
	W=lasagne.init.Orthogonal(0.8),
	b=lasagne.init.Constant(0.),
	nonlinearity=None,
	name='G_out')]

	D = [ll.InputLayer(shape=(None,2))]
	D+= [ll.DenseLayer(incoming = D[-1],
	num_units = 128,
	W=lasagne.init.Orthogonal(0.8),
	b=lasagne.init.Constant(0.),
	nonlinearity=lasagne.nonlinearities.rectify,
	name='D1')]
	D+= [ll.DenseLayer(incoming = D[-1],
	num_units = 1,
	W=lasagne.init.Orthogonal(0.8),
	b=lasagne.init.Constant(0.),
	nonlinearity=lasagne.nonlinearities.sigmoid,
	name='DO')]

	# Variables
	X = T.TensorType('float32', [False]*2)('X')
	X_shared = lasagne.utils.shared_empty(2, dtype='float32')


	# Outputs
	Xh = ll.get_output(G[-1]) # G output
	p_X = ll.get_output(D[-1],X/4) # D(X)
	p_Xh = ll.get_output(D[-1],Xh/4) # D(G(Z))

	# Params
	learning_rate = 1e-4;
	beta1 = 0.5;

	L_Dg = T.nnet.binary_crossentropy(T.clip( p_Xh , 1e-7, 1.0 - 1e-7), T.zeros(p_Xh.shape))
	L_Dd = T.nnet.binary_crossentropy(T.clip( p_X , 1e-7, 1.0 - 1e-7), T.ones(p_X.shape))
	L_G1 = T.nnet.binary_crossentropy(T.clip( p_Xh, 1e-7, 1.0 - 1e-7), T.ones(p_Xh.shape))
	# Get D Updates for use in unrolling
	D_updates = lasagne.updates.adam(T.mean(L_Dg+L_Dd),ll.get_all_params(D[-1],trainable=True),learning_rate,beta1=beta1)

	# Get L_G for second step
	# L_G = theano.clone(L_G1,replace = D_updates)
	def fprop(X,Xh):
	# p_X = ll.get_output(D[-1],X) # D(X)
	# p_Xh = ll.get_output(D[-1],Xh) # D(G(Z))
	# L_Dg = T.nnet.binary_crossentropy(T.clip( p_Xh , 1e-7, 1.0 - 1e-7), T.zeros(p_Xh.shape))
	# L_Dd = T.nnet.binary_crossentropy(T.clip( p_X , 1e-7, 1.0 - 1e-7), T.ones(p_X.shape))
	# lasagne.updates.adam(T.mean(L_Dg+L_Dd),ll.get_all_params(D[-1],trainable=True),learning_rate,beta1=beta1)

	# return replace = D_updates
	return [L_G1, D_updates]

	# Maybe...
	print('Building G graph...')

	values,updates = theano.scan(fprop,n_steps=10,non_sequences=[X,Xh])
	# Scan and go through all 10 D updates
	# losses = scan...
	# l_G = values[-1]
	L_G = theano.clone(L_G1,replace = updates)
	G_updates = lasagne.updates.adam(T.mean(L_G),ll.get_all_params(G[-1],trainable=True),learning_rate,beta1=beta1)

	print('Compiling Discriminator Function...')
	Dfn = theano.function([batch_index],T.grad(T.mean(L_G1),Xh),updates=D_updates,givens={X: X_shared[batch_slice]})
	Dgd = theano.function([X],L_Dd)
	print('Compiling Generator Function...')
	Gfn = theano.function([batch_index],Xh,updates=G_updates,givens={X: X_shared[batch_slice]})

	# Define the gaussian mixture
	r = 2
	thetas = np.linspace(0,2*pi-pi/4,8)
	means = np.asarray([rnp.cos(thetas),rnp.sin(thetas)]).transpose()
	variances = [np.diag(0.02np.ones(2))]8

	# Contour Plot Limits and Resolution
	delt = 0.1
	axlim = 4.5
	[Xx,Yy] = np.meshgrid(np.arange(-axlim,axlim,delt),np.arange(-axlim,axlim,delt))

	print 'running...'

	batch_index = 0
	num_batches = 50

	X_shared.set_value(np.float32(GMM(centroids=means,ccov = variances,samples = batch_size*num_batches)))

	for i in range(25000):

	if not batch_index % num_batches:
	batch_index = 0
	X_shared.set_value(np.float32(GMM(centroids=means,ccov = variances,samples = batch_size*num_batches)))


	if i%2:
	grads = Dfn(batch_index)
	batch_index += 1
	else:
	Xs = Gfn(batch_index)
	if not i%20:
	plt.scatter(Xs[:,0],Xs[:,1])
	gds = Dgd(np.float32(np.asarray([Xx.flatten(),Yy.flatten()])).transpose())

	plt.contour(Xx,Yy,np.reshape(gds,np.shape(Xx)))
	plt.axis([-axlim,axlim,-axlim,axlim])
	plt.title('Step # '+str(i))
	plt.pause(0.000001)
	plt.clf()