andrewliao11 · March 25, 2019 02:38 · andrewliao11 · Mar 22, 2019
diff --git a/natural_gradient.py b/natural_gradient.py
 import imageio
 import numpy as np
 from utils import *

 mode = 'sgd'    # sgd, fisher, or dig_fisher


 X_train, X_test, t_train, t_test = get_data()
 W = get_model()


 alpha = 1
 ys = []
 ws = []
 ls = []

 # Training
 for it in range(15):
    # Forward
    z = X_train @ W
    y = sigm(z)
    ys.append(y)
    ws.append(W)

    loss = NLL(y, t_train)
    ls.append(loss)

    # Loss
    print(f'Loss: {loss:.3f}')

    m = y.shape[0]

 	if mode == 'sgd':
 		dy = (y-t_train)/(m * (y - y*y))        # dloss/dy
 		dz = sigm(z)*(1-sigm(z))                # dy/dz
 		dW = X_train.T @ (dz * dy)

 		# Step
 		W = W - alpha * dW
 		
 	elif mode == 'fisher':
 		dy = (y-t_train)/(m * (y - y*y))
 		dz = sigm(z)*(1-sigm(z))
 		dW = X_train.T @ (dz * dy)

 		dloglik_dy = (t_train-y)/(y - y*y)
 		dloglik_dz = dloglik_dy * dz
 		dloglik_dw = dloglik_dz * X_train
 		F = np.cov(dloglik_dw.T)

 		# Step
 		W = W - alpha * np.linalg.inv(F) @ dW

 	elif mode == 'dig_fisher':
 		dy = (y-t_train)/(m * (y - y*y))
 		dz = sigm(z)*(1-sigm(z))
 		dW = X_train.T @ (dz * dy)

 		dloglik_dy = (t_train-y)/(y - y*y)
 		dloglik_dz = dloglik_dy * dz
 		dloglik_dw = dloglik_dz * X_train

 		# Diagonal approx.
 		F = np.mean(dloglik_dw * dloglik_dw, axis=0)[:, None]

 		# Step
 		W = W - alpha * (1/F * dW)
 	else:
 		raise ValueError

 y = sigm(X_test @ W)

 acc = get_acc(y, t_test)
 print(f'Accuracy: {acc:.3f}')

 dist_imgs = [viz_y(y, '[{}] Iteration {}'.format(mode, it)) for it, y in enumerate(ys)]
 imageio.mimsave('{}_dist_change.gif'.format(mode), dist_imgs)

 ylim = [np.stack(ws).min() - 0.5, np.stack(ws).max() + 0.5]
 w_imgs = [viz_w(w, '[{}] Iteration {}'.format(mode, it), ylim) for it, w in enumerate(ws)]
 imageio.mimsave('{}_weight_change.gif'.format(mode), w_imgs)

 viz_loss(ls, '[sgd] loss curve', 'sgd_loss.png')
diff --git a/utils.py b/utils.py
 import imageio
 import numpy as np
 import matplotlib.pyplot as plt
 from sklearn.utils import shuffle


 plt.style.use('ggplot')
 np.random.seed(9999)


 def get_data():
    X0 = np.random.randn(100, 2) - 1
    X1 = np.random.randn(100, 2) + 1
    X = np.vstack([X0, X1])
    t = np.vstack([np.zeros([100, 1]), np.ones([100, 1])])

    X, t = shuffle(X, t)

    X_train, X_test = X[:150], X[150:]
    t_train, t_test = t[:150], t[150:]

    return X_train, X_test, t_train, t_test


 def get_model():
    # Model
    W = np.random.randn(2, 1) * 0.01
    return W


 def get_acc(y, t):
    acc = np.mean((y.ravel() >= 0.5) == t.ravel())
    return acc


 def sigm(x):
    return 1/(1+np.exp(-x))


 def NLL(y, t):
    return -np.mean(t*np.log(y) + (1-t)*np.log(1-y))


 def viz_y(y, title):

    fig, ax = plt.subplots()
    y = y.ravel()
    ax.set_ylim(0, 1)
    ax.bar(np.arange(len(y)), y, align='center', alpha=0.5)
    ax.set_title(title)

    # grab the pixel buffer and dump it into a numpy array
    fig.canvas.draw()
    img = np.array(fig.canvas.renderer._renderer)
    plt.close()

    return img


 def viz_w(w, title, ylim):

    fig, ax = plt.subplots()
    w = w.ravel()
    ax.set_ylim(*ylim)
    ax.bar(np.arange(len(w)), w, align='center', alpha=0.5)
    ax.set_title(title)

    # grab the pixel buffer and dump it into a numpy array
    fig.canvas.draw()
    img = np.array(fig.canvas.renderer._renderer)
    plt.close()

    return img


 def viz_loss(ls, title, p):
    
    fig, ax = plt.subplots()
    ax.plot(ls, alpha=0.5)
    ax.set_title(title)

    # grab the pixel buffer and dump it into a numpy array
    fig.canvas.draw()
    img = np.array(fig.canvas.renderer._renderer)
    plt.close()

    imageio.imsave(p, img)
	import imageio
	import numpy as np
	from utils import *

	mode = 'sgd' # sgd, fisher, or dig_fisher


	X_train, X_test, t_train, t_test = get_data()
	W = get_model()


	alpha = 1
	ys = []
	ws = []
	ls = []

	# Training
	for it in range(15):
	# Forward
	z = X_train @ W
	y = sigm(z)
	ys.append(y)
	ws.append(W)

	loss = NLL(y, t_train)
	ls.append(loss)

	# Loss
	print(f'Loss: {loss:.3f}')

	m = y.shape[0]

	if mode == 'sgd':
	dy = (y-t_train)/(m * (y - y*y)) # dloss/dy
	dz = sigm(z)*(1-sigm(z)) # dy/dz
	dW = X_train.T @ (dz * dy)

	# Step
	W = W - alpha * dW

	elif mode == 'fisher':
	dy = (y-t_train)/(m * (y - y*y))
	dz = sigm(z)*(1-sigm(z))
	dW = X_train.T @ (dz * dy)

	dloglik_dy = (t_train-y)/(y - y*y)
	dloglik_dz = dloglik_dy * dz
	dloglik_dw = dloglik_dz * X_train
	F = np.cov(dloglik_dw.T)

	# Step
	W = W - alpha * np.linalg.inv(F) @ dW

	elif mode == 'dig_fisher':
	dy = (y-t_train)/(m * (y - y*y))
	dz = sigm(z)*(1-sigm(z))
	dW = X_train.T @ (dz * dy)

	dloglik_dy = (t_train-y)/(y - y*y)
	dloglik_dz = dloglik_dy * dz
	dloglik_dw = dloglik_dz * X_train

	# Diagonal approx.
	F = np.mean(dloglik_dw * dloglik_dw, axis=0)[:, None]

	# Step
	W = W - alpha * (1/F * dW)
	else:
	raise ValueError

	y = sigm(X_test @ W)

	acc = get_acc(y, t_test)
	print(f'Accuracy: {acc:.3f}')

	dist_imgs = [viz_y(y, '[{}] Iteration {}'.format(mode, it)) for it, y in enumerate(ys)]
	imageio.mimsave('{}_dist_change.gif'.format(mode), dist_imgs)

	ylim = [np.stack(ws).min() - 0.5, np.stack(ws).max() + 0.5]
	w_imgs = [viz_w(w, '[{}] Iteration {}'.format(mode, it), ylim) for it, w in enumerate(ws)]
	imageio.mimsave('{}_weight_change.gif'.format(mode), w_imgs)

	viz_loss(ls, '[sgd] loss curve', 'sgd_loss.png')
	import imageio
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn.utils import shuffle


	plt.style.use('ggplot')
	np.random.seed(9999)


	def get_data():
	X0 = np.random.randn(100, 2) - 1
	X1 = np.random.randn(100, 2) + 1
	X = np.vstack([X0, X1])
	t = np.vstack([np.zeros([100, 1]), np.ones([100, 1])])

	X, t = shuffle(X, t)

	X_train, X_test = X[:150], X[150:]
	t_train, t_test = t[:150], t[150:]

	return X_train, X_test, t_train, t_test


	def get_model():
	# Model
	W = np.random.randn(2, 1) * 0.01
	return W


	def get_acc(y, t):
	acc = np.mean((y.ravel() >= 0.5) == t.ravel())
	return acc


	def sigm(x):
	return 1/(1+np.exp(-x))


	def NLL(y, t):
	return -np.mean(tnp.log(y) + (1-t)np.log(1-y))


	def viz_y(y, title):

	fig, ax = plt.subplots()
	y = y.ravel()
	ax.set_ylim(0, 1)
	ax.bar(np.arange(len(y)), y, align='center', alpha=0.5)
	ax.set_title(title)

	# grab the pixel buffer and dump it into a numpy array
	fig.canvas.draw()
	img = np.array(fig.canvas.renderer._renderer)
	plt.close()

	return img


	def viz_w(w, title, ylim):

	fig, ax = plt.subplots()
	w = w.ravel()
	ax.set_ylim(*ylim)
	ax.bar(np.arange(len(w)), w, align='center', alpha=0.5)
	ax.set_title(title)

	# grab the pixel buffer and dump it into a numpy array
	fig.canvas.draw()
	img = np.array(fig.canvas.renderer._renderer)
	plt.close()

	return img


	def viz_loss(ls, title, p):

	fig, ax = plt.subplots()
	ax.plot(ls, alpha=0.5)
	ax.set_title(title)

	# grab the pixel buffer and dump it into a numpy array
	fig.canvas.draw()
	img = np.array(fig.canvas.renderer._renderer)
	plt.close()

	imageio.imsave(p, img)