Last active
September 26, 2016 03:36
-
-
Save standarderror/c76af8dccbbaa919b64d to your computer and use it in GitHub Desktop.
20150715 Autoencoder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Autoencoder | |
################################### | |
## [1] Parameters & hyperparameters | |
################################### | |
# shape of data | |
N = shape(X)[1] # num observation | |
D = shape(X)[0] # num features (dimensionality) | |
## architecture and learning parameters | |
h = 200 # size of hidden layer | |
# weight vectors are randomly initialised from a uniform distribution | |
r = sqrt(6) / sqrt(D + h + 1) | |
rand = np.random.RandomState() | |
W1 = np.asarray(rand.uniform(low = -r, high = r, size = (h, D))) | |
W2 = np.asarray(rand.uniform(low = -r, high = r, size = (D, h))) | |
# bias vectors are 0s | |
b1 = np.zeros((h, 1)) | |
b2 = np.zeros((D, 1)) | |
# hyperparameters | |
_lambda = 0.0003 # regularization strength (1 is max) | |
rho = 0.1 # sparsity parameters | |
beta = 0.3 | |
############################### | |
## [2] Define helper functions | |
############################### | |
# sigmoid activation function | |
def sigmoid(x): | |
return 1.0 / (1 + np.exp(-x)) | |
# unravel theta vector | |
def unravel(theta): | |
W1 = theta[0 : D*h].reshape(h, D) | |
W2 = theta[D*h : 2*D*h].reshape(D, h) | |
b1 = theta[2*D*h : 2*D*h+h].reshape(h, 1) | |
b2 = theta[2*D*h+h : 2*D*h+h+D].reshape(D, 1) | |
return W1, W2, b1, b2 | |
# ravel theta vector | |
def _ravel(W1, W2, b1, b2): | |
return numpy.concatenate((W1.flatten(), W2.flatten(), b1.flatten(), b2.flatten())) | |
############################################################ | |
## [3] Define backpropagation algorithm to train autoencoder | |
############################################################ | |
def backpropagate(theta): | |
W1, W2, b1, b2 = unravel(theta) | |
# Forward propagate to calculate output of hidden layer then output layer | |
hidden_layer = sigmoid(np.dot(W1, X) + b1) | |
output_layer = sigmoid(np.dot(W2, hidden_layer) + b2) | |
# calculate error of outputs | |
error = output_layer - X | |
## calculate sparsity penalty term and cost component | |
rho_hat = np.sum(hidden_layer, axis = 1) / N # average activation value of hidden layer | |
cost_sparsity = beta * np.sum(rho * np.log(rho / rho_hat) + (1 - rho) * np.log((1 - rho) / (1 - rho_hat))) | |
delta_sparsity = beta * (-(rho / rho_hat) + ((1 - rho) / (1 - rho_hat))) | |
## calculate cost | |
cost_sq_error = 0.5 * np.sum(np.multiply(error, error)) / N | |
cost_regularisation = 0.5 * _lambda * (np.sum(np.multiply(W1, W1)) + np.sum(np.multiply(W2, W2))) | |
cost = cost_sq_error + cost_regularisation + cost_sparsity | |
## backpropagate error | |
delta_output = np.multiply(error, np.multiply(output_layer, 1 - output_layer)) | |
delta_hidden = np.multiply(np.dot(numpy.transpose(W2), delta_output) + np.transpose(np.matrix(delta_sparsity)), | |
np.multiply(hidden_layer, 1 - hidden_layer)) | |
## calculate gradient values | |
W1_grad = np.dot(delta_hidden, np.transpose(X)) | |
W2_grad = np.dot(delta_output, np.transpose(hidden_layer)) | |
b1_grad = np.sum(delta_hidden, axis = 1) | |
b2_grad = np.sum(delta_output, axis = 1) | |
## average and apply regularisation | |
W1_grad = W1_grad / N + _lambda * W1 | |
W2_grad = W2_grad / N + _lambda * W2 | |
b1_grad = b1_grad / N | |
b2_grad = b2_grad / N | |
# Ravel weight and bias vectors into a single vector to return to minimise function | |
theta_grad = _ravel(np.array(W1_grad), np.array(W2_grad), np.array(b1_grad), np.array(b2_grad)) | |
return [cost, theta_grad] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# plot weights | |
opt_theta = opt_solution.x | |
opt_W1,_,_,_ = unravel(opt_theta) | |
weights = [(opt_W1[i,:]/np.sum(opt_W1[i,:]**2)).reshape(sqrt(D), sqrt(D)) for i in xrange(opt_W1.shape[0])] | |
for i, w in enumerate(weights): | |
plt.subplot(20, 15, i + 1) | |
plt.set_cmap('gray') | |
plt.axis('off') | |
plt.imshow(w) | |
plt.subplots_adjust(hspace=0.05, wspace=0.05) | |
plt.gcf().set_size_inches(15, 20) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Prepare Python & IPython notebook | |
%pylab inline | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import scipy.optimize | |
import cPickle, gzip | |
## Prepare dataset | |
# Import data | |
# Load the dataset | |
with gzip.open('mnist.pkl.gz', 'rb') as f: | |
train_set, valid_set, test_set = cPickle.load(f) | |
# grab the training data | |
X = train_set[0] | |
## pre-processing | |
## standardise & normalise | |
X -= np.mean(X) # subtract mean | |
xstd = np.std(X) | |
# standardise - divide by SD = scales to [-1,1] | |
X /= xstd | |
# rescale from [-1,1] to [0.1, 0.9] which is optimal for sigmoid function | |
rng = X.max(axis=1, keepdims=True) - X.min(axis=1, keepdims=True) | |
X = 0.9 - (((0.9 - 0.1) * (X.max(axis=1, keepdims=True) - X)) / rng) | |
# transpose to align with our design | |
X = X.T |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# combine weights and betas into a single vector for optimisation | |
theta = _ravel(W1, W2, b1, b2) | |
# Train! | |
opt_solution = scipy.optimize.minimize(backpropagate, theta, method = 'L-BFGS-B', jac = True, options = {'maxiter': 1000}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment