Created
December 22, 2016 01:29
-
-
Save ajbrock/de01df65430748f36cb3c2d303e696c8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Unrolled GAN | |
# A Brock, 2016 | |
# This code implements the toy experiment for unrolled GANs. | |
# TODO: Make shared variables and reduce the memory transfer overhead | |
# Imports | |
import numpy as np | |
from numpy import pi | |
import theano | |
import theano.tensor as T | |
from theano.sandbox.rng_mrg import MRG_RandomStreams | |
import lasagne | |
import lasagne.layers as ll | |
from pypr.clustering.gmm import sample_gaussian_mixture as GMM | |
import matplotlib.pyplot as plt | |
# Todo: add import gaussian mixture | |
batch_size = 150 | |
batch_index = T.iscalar('batch_index') | |
batch_slice = slice(batch_index*batch_size, (batch_index+1)*batch_size) | |
# Set up generator and fixed random seeds | |
# rng_data = np.random.RandomState(args.seed_data) | |
rng = np.random.RandomState(42) | |
theano_rng = MRG_RandomStreams(rng.randint(2 ** 15)) | |
lasagne.random.set_rng(np.random.RandomState(rng.randint(2 ** 15))) | |
# load CIFAR-10 | |
# specify generative model | |
noise_dim = (batch_size, 256) | |
noise = theano_rng.normal(size=noise_dim) | |
G = [ll.InputLayer(shape=noise_dim, input_var=noise)] | |
G+= [ll.DenseLayer(incoming = G[-1], | |
num_units = 128, | |
W=lasagne.init.Orthogonal(0.8), | |
b=lasagne.init.Constant(0.), | |
nonlinearity=lasagne.nonlinearities.rectify, | |
name='G1')] | |
G+= [ll.DenseLayer(incoming = G[-1], | |
num_units = 128, | |
W=lasagne.init.Orthogonal(0.8), | |
b=lasagne.init.Constant(0.), | |
nonlinearity=lasagne.nonlinearities.rectify, | |
name='G2')] | |
G+= [ll.DenseLayer(incoming = G[-1], | |
num_units = 2, | |
W=lasagne.init.Orthogonal(0.8), | |
b=lasagne.init.Constant(0.), | |
nonlinearity=None, | |
name='G_out')] | |
D = [ll.InputLayer(shape=(None,2))] | |
D+= [ll.DenseLayer(incoming = D[-1], | |
num_units = 128, | |
W=lasagne.init.Orthogonal(0.8), | |
b=lasagne.init.Constant(0.), | |
nonlinearity=lasagne.nonlinearities.rectify, | |
name='D1')] | |
D+= [ll.DenseLayer(incoming = D[-1], | |
num_units = 1, | |
W=lasagne.init.Orthogonal(0.8), | |
b=lasagne.init.Constant(0.), | |
nonlinearity=lasagne.nonlinearities.sigmoid, | |
name='DO')] | |
# Variables | |
X = T.TensorType('float32', [False]*2)('X') | |
X_shared = lasagne.utils.shared_empty(2, dtype='float32') | |
# Outputs | |
Xh = ll.get_output(G[-1]) # G output | |
p_X = ll.get_output(D[-1],X/4) # D(X) | |
p_Xh = ll.get_output(D[-1],Xh/4) # D(G(Z)) | |
# Params | |
learning_rate = 1e-4; | |
beta1 = 0.5; | |
L_Dg = T.nnet.binary_crossentropy(T.clip( p_Xh , 1e-7, 1.0 - 1e-7), T.zeros(p_Xh.shape)) | |
L_Dd = T.nnet.binary_crossentropy(T.clip( p_X , 1e-7, 1.0 - 1e-7), T.ones(p_X.shape)) | |
L_G1 = T.nnet.binary_crossentropy(T.clip( p_Xh, 1e-7, 1.0 - 1e-7), T.ones(p_Xh.shape)) | |
# Get D Updates for use in unrolling | |
D_updates = lasagne.updates.adam(T.mean(L_Dg+L_Dd),ll.get_all_params(D[-1],trainable=True),learning_rate,beta1=beta1) | |
# Get L_G for second step | |
# L_G = theano.clone(L_G1,replace = D_updates) | |
def fprop(X,Xh): | |
# p_X = ll.get_output(D[-1],X) # D(X) | |
# p_Xh = ll.get_output(D[-1],Xh) # D(G(Z)) | |
# L_Dg = T.nnet.binary_crossentropy(T.clip( p_Xh , 1e-7, 1.0 - 1e-7), T.zeros(p_Xh.shape)) | |
# L_Dd = T.nnet.binary_crossentropy(T.clip( p_X , 1e-7, 1.0 - 1e-7), T.ones(p_X.shape)) | |
# lasagne.updates.adam(T.mean(L_Dg+L_Dd),ll.get_all_params(D[-1],trainable=True),learning_rate,beta1=beta1) | |
# return replace = D_updates | |
return [L_G1, D_updates] | |
# Maybe... | |
print('Building G graph...') | |
values,updates = theano.scan(fprop,n_steps=10,non_sequences=[X,Xh]) | |
# Scan and go through all 10 D updates | |
# losses = scan... | |
# l_G = values[-1] | |
L_G = theano.clone(L_G1,replace = updates) | |
G_updates = lasagne.updates.adam(T.mean(L_G),ll.get_all_params(G[-1],trainable=True),learning_rate,beta1=beta1) | |
print('Compiling Discriminator Function...') | |
Dfn = theano.function([batch_index],T.grad(T.mean(L_G1),Xh),updates=D_updates,givens={X: X_shared[batch_slice]}) | |
Dgd = theano.function([X],L_Dd) | |
print('Compiling Generator Function...') | |
Gfn = theano.function([batch_index],Xh,updates=G_updates,givens={X: X_shared[batch_slice]}) | |
# Define the gaussian mixture | |
r = 2 | |
thetas = np.linspace(0,2*pi-pi/4,8) | |
means = np.asarray([r*np.cos(thetas),r*np.sin(thetas)]).transpose() | |
variances = [np.diag(0.02*np.ones(2))]*8 | |
# Contour Plot Limits and Resolution | |
delt = 0.1 | |
axlim = 4.5 | |
[Xx,Yy] = np.meshgrid(np.arange(-axlim,axlim,delt),np.arange(-axlim,axlim,delt)) | |
print 'running...' | |
batch_index = 0 | |
num_batches = 50 | |
X_shared.set_value(np.float32(GMM(centroids=means,ccov = variances,samples = batch_size*num_batches))) | |
for i in range(25000): | |
if not batch_index % num_batches: | |
batch_index = 0 | |
X_shared.set_value(np.float32(GMM(centroids=means,ccov = variances,samples = batch_size*num_batches))) | |
if i%2: | |
grads = Dfn(batch_index) | |
batch_index += 1 | |
else: | |
Xs = Gfn(batch_index) | |
if not i%20: | |
plt.scatter(Xs[:,0],Xs[:,1]) | |
gds = Dgd(np.float32(np.asarray([Xx.flatten(),Yy.flatten()])).transpose()) | |
plt.contour(Xx,Yy,np.reshape(gds,np.shape(Xx))) | |
plt.axis([-axlim,axlim,-axlim,axlim]) | |
plt.title('Step # '+str(i)) | |
plt.pause(0.000001) | |
plt.clf() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment