Skip to content

Instantly share code, notes, and snippets.

@andrewliao11
Last active March 25, 2019 02:38
Show Gist options
  • Save andrewliao11/fca1627d1529892f4a8396817c8e16fb to your computer and use it in GitHub Desktop.
Save andrewliao11/fca1627d1529892f4a8396817c8e16fb to your computer and use it in GitHub Desktop.
import imageio
import numpy as np
from utils import *
mode = 'sgd' # sgd, fisher, or dig_fisher
X_train, X_test, t_train, t_test = get_data()
W = get_model()
alpha = 1
ys = []
ws = []
ls = []
# Training
for it in range(15):
# Forward
z = X_train @ W
y = sigm(z)
ys.append(y)
ws.append(W)
loss = NLL(y, t_train)
ls.append(loss)
# Loss
print(f'Loss: {loss:.3f}')
m = y.shape[0]
if mode == 'sgd':
dy = (y-t_train)/(m * (y - y*y)) # dloss/dy
dz = sigm(z)*(1-sigm(z)) # dy/dz
dW = X_train.T @ (dz * dy)
# Step
W = W - alpha * dW
elif mode == 'fisher':
dy = (y-t_train)/(m * (y - y*y))
dz = sigm(z)*(1-sigm(z))
dW = X_train.T @ (dz * dy)
dloglik_dy = (t_train-y)/(y - y*y)
dloglik_dz = dloglik_dy * dz
dloglik_dw = dloglik_dz * X_train
F = np.cov(dloglik_dw.T)
# Step
W = W - alpha * np.linalg.inv(F) @ dW
elif mode == 'dig_fisher':
dy = (y-t_train)/(m * (y - y*y))
dz = sigm(z)*(1-sigm(z))
dW = X_train.T @ (dz * dy)
dloglik_dy = (t_train-y)/(y - y*y)
dloglik_dz = dloglik_dy * dz
dloglik_dw = dloglik_dz * X_train
# Diagonal approx.
F = np.mean(dloglik_dw * dloglik_dw, axis=0)[:, None]
# Step
W = W - alpha * (1/F * dW)
else:
raise ValueError
y = sigm(X_test @ W)
acc = get_acc(y, t_test)
print(f'Accuracy: {acc:.3f}')
dist_imgs = [viz_y(y, '[{}] Iteration {}'.format(mode, it)) for it, y in enumerate(ys)]
imageio.mimsave('{}_dist_change.gif'.format(mode), dist_imgs)
ylim = [np.stack(ws).min() - 0.5, np.stack(ws).max() + 0.5]
w_imgs = [viz_w(w, '[{}] Iteration {}'.format(mode, it), ylim) for it, w in enumerate(ws)]
imageio.mimsave('{}_weight_change.gif'.format(mode), w_imgs)
viz_loss(ls, '[sgd] loss curve', 'sgd_loss.png')
import imageio
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
plt.style.use('ggplot')
np.random.seed(9999)
def get_data():
X0 = np.random.randn(100, 2) - 1
X1 = np.random.randn(100, 2) + 1
X = np.vstack([X0, X1])
t = np.vstack([np.zeros([100, 1]), np.ones([100, 1])])
X, t = shuffle(X, t)
X_train, X_test = X[:150], X[150:]
t_train, t_test = t[:150], t[150:]
return X_train, X_test, t_train, t_test
def get_model():
# Model
W = np.random.randn(2, 1) * 0.01
return W
def get_acc(y, t):
acc = np.mean((y.ravel() >= 0.5) == t.ravel())
return acc
def sigm(x):
return 1/(1+np.exp(-x))
def NLL(y, t):
return -np.mean(t*np.log(y) + (1-t)*np.log(1-y))
def viz_y(y, title):
fig, ax = plt.subplots()
y = y.ravel()
ax.set_ylim(0, 1)
ax.bar(np.arange(len(y)), y, align='center', alpha=0.5)
ax.set_title(title)
# grab the pixel buffer and dump it into a numpy array
fig.canvas.draw()
img = np.array(fig.canvas.renderer._renderer)
plt.close()
return img
def viz_w(w, title, ylim):
fig, ax = plt.subplots()
w = w.ravel()
ax.set_ylim(*ylim)
ax.bar(np.arange(len(w)), w, align='center', alpha=0.5)
ax.set_title(title)
# grab the pixel buffer and dump it into a numpy array
fig.canvas.draw()
img = np.array(fig.canvas.renderer._renderer)
plt.close()
return img
def viz_loss(ls, title, p):
fig, ax = plt.subplots()
ax.plot(ls, alpha=0.5)
ax.set_title(title)
# grab the pixel buffer and dump it into a numpy array
fig.canvas.draw()
img = np.array(fig.canvas.renderer._renderer)
plt.close()
imageio.imsave(p, img)
@andrewliao11
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment