Last active
March 13, 2023 05:46
-
-
Save yusugomori/cf7bce19b8e16d57488a to your computer and use it in GitHub Desktop.
Dropout Neural Networks (with ReLU)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import sys | |
import numpy | |
numpy.seterr(all='ignore') | |
''' | |
activation function | |
''' | |
def sigmoid(x): | |
return 1. / (1 + numpy.exp(-x)) | |
def dsigmoid(x): | |
return x * (1. - x) | |
def tanh(x): | |
return numpy.tanh(x) | |
def dtanh(x): | |
return 1. - x * x | |
def softmax(x): | |
e = numpy.exp(x - numpy.max(x)) # prevent overflow | |
if e.ndim == 1: | |
return e / numpy.sum(e, axis=0) | |
else: | |
return e / numpy.array([numpy.sum(e, axis=1)]).T # ndim = 2 | |
def ReLU(x): | |
return x * (x > 0) | |
def dReLU(x): | |
return 1. * (x > 0) | |
''' | |
Dropout | |
''' | |
class Dropout(object): | |
def __init__(self, input, label,\ | |
n_in, hidden_layer_sizes, n_out,\ | |
rng=None, activation=ReLU): | |
self.x = input | |
self.y = label | |
self.hidden_layers = [] | |
self.n_layers = len(hidden_layer_sizes) | |
if rng is None: | |
rng = numpy.random.RandomState(1234) | |
assert self.n_layers > 0 | |
# construct multi-layer | |
for i in xrange(self.n_layers): | |
# layer_size | |
if i == 0: | |
input_size = n_in | |
else: | |
input_size = hidden_layer_sizes[i-1] | |
# layer_input | |
if i == 0: | |
layer_input = self.x | |
else: | |
layer_input = self.hidden_layers[-1].output() | |
# construct hidden_layer | |
hidden_layer = HiddenLayer(input=layer_input, | |
n_in=input_size, | |
n_out=hidden_layer_sizes[i], | |
rng=rng, | |
activation=activation) | |
self.hidden_layers.append(hidden_layer) | |
# layer for ouput using Logistic Regression (softmax) | |
self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(), | |
label=self.y, | |
n_in=hidden_layer_sizes[-1], | |
n_out=n_out) | |
def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None): | |
for epoch in xrange(epochs): | |
dropout_masks = [] # create different masks in each training epoch | |
# forward hidden_layers | |
for i in xrange(self.n_layers): | |
if i == 0: | |
layer_input = self.x | |
layer_input = self.hidden_layers[i].forward(input=layer_input) | |
if dropout == True: | |
mask = self.hidden_layers[i].dropout(input=layer_input, p=p_dropout, rng=rng) | |
layer_input *= mask | |
dropout_masks.append(mask) | |
# forward & backward log_layer | |
self.log_layer.train(input=layer_input) | |
# backward hidden_layers | |
for i in reversed(xrange(0, self.n_layers)): | |
if i == self.n_layers-1: | |
prev_layer = self.log_layer | |
else: | |
prev_layer = self.hidden_layers[i+1] | |
self.hidden_layers[i].backward(prev_layer=prev_layer) | |
if dropout == True: | |
self.hidden_layers[i].d_y *= dropout_masks[i] # also mask here | |
def predict(self, x, dropout=True, p_dropout=0.5): | |
layer_input = x | |
for i in xrange(self.n_layers): | |
if dropout == True: | |
self.hidden_layers[i].W = p_dropout * self.hidden_layers[i].W | |
self.hidden_layers[i].b = p_dropout * self.hidden_layers[i].b | |
layer_input = self.hidden_layers[i].output(input=layer_input) | |
return self.log_layer.predict(layer_input) | |
''' | |
Hidden Layer | |
''' | |
class HiddenLayer(object): | |
def __init__(self, input, n_in, n_out,\ | |
W=None, b=None, rng=None, activation=tanh): | |
if rng is None: | |
rng = numpy.random.RandomState(1234) | |
if W is None: | |
a = 1. / n_in | |
W = numpy.array(rng.uniform( # initialize W uniformly | |
low=-a, | |
high=a, | |
size=(n_in, n_out))) | |
if b is None: | |
b = numpy.zeros(n_out) # initialize bias 0 | |
self.rng = rng | |
self.x = input | |
self.W = W | |
self.b = b | |
if activation == tanh: | |
self.dactivation = dtanh | |
elif activation == sigmoid: | |
self.dactivation = dsigmoid | |
elif activation == ReLU: | |
self.dactivation = dReLU | |
else: | |
raise ValueError('activation function not supported.') | |
self.activation = activation | |
def output(self, input=None): | |
if input is not None: | |
self.x = input | |
linear_output = numpy.dot(self.x, self.W) + self.b | |
return (linear_output if self.activation is None | |
else self.activation(linear_output)) | |
def sample_h_given_v(self, input=None): | |
if input is not None: | |
self.x = input | |
v_mean = self.output() | |
h_sample = self.rng.binomial(size=v_mean.shape, | |
n=1, | |
p=v_mean) | |
return h_sample | |
def forward(self, input=None): | |
return self.output(input=input) | |
def backward(self, prev_layer, lr=0.1, input=None): | |
if input is not None: | |
self.x = input | |
d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) | |
self.W += lr * numpy.dot(self.x.T, d_y) | |
self.b += lr * numpy.mean(d_y, axis=0) | |
self.d_y = d_y | |
def dropout(self, input, p, rng=None): | |
if rng is None: | |
rng = numpy.random.RandomState(123) | |
mask = rng.binomial(size=input.shape, | |
n=1, | |
p=1-p) # p is the prob of dropping | |
return mask | |
''' | |
Logistic Regression | |
''' | |
class LogisticRegression(object): | |
def __init__(self, input, label, n_in, n_out): | |
self.x = input | |
self.y = label | |
self.W = numpy.zeros((n_in, n_out)) # initialize W 0 | |
self.b = numpy.zeros(n_out) # initialize bias 0 | |
def train(self, lr=0.1, input=None, L2_reg=0.00): | |
if input is not None: | |
self.x = input | |
p_y_given_x = softmax(numpy.dot(self.x, self.W) + self.b) | |
d_y = self.y - p_y_given_x | |
self.W += lr * numpy.dot(self.x.T, d_y) - lr * L2_reg * self.W | |
self.b += lr * numpy.mean(d_y, axis=0) | |
self.d_y = d_y | |
# cost = self.negative_log_likelihood() | |
# return cost | |
def negative_log_likelihood(self): | |
sigmoid_activation = softmax(numpy.dot(self.x, self.W) + self.b) | |
cross_entropy = - numpy.mean( | |
numpy.sum(self.y * numpy.log(sigmoid_activation) + | |
(1 - self.y) * numpy.log(1 - sigmoid_activation), | |
axis=1)) | |
return cross_entropy | |
def predict(self, x): | |
return softmax(numpy.dot(x, self.W) + self.b) | |
def output(self, x): | |
return self.predict(x) | |
def test_dropout(n_epochs=5000, dropout=True, p_dropout=0.5): | |
# XOR | |
x = numpy.array([[0, 0], | |
[0, 1], | |
[1, 0], | |
[1, 1]]) | |
y = numpy.array([[0, 1], | |
[1, 0], | |
[1, 0], | |
[0, 1]]) | |
rng = numpy.random.RandomState(123) | |
# construct Dropout MLP | |
classifier = Dropout(input=x, label=y, \ | |
n_in=2, hidden_layer_sizes=[10, 10], n_out=2, \ | |
rng=rng, activation=ReLU) | |
# train | |
classifier.train(epochs=n_epochs, dropout=dropout, \ | |
p_dropout=p_dropout, rng=rng) | |
# test | |
print classifier.predict(x) | |
if __name__ == "__main__": | |
test_dropout() |
yeah it's def sigmoid_p(x) : return sigmoid(x) * (1-sigmoid(x))
U r right, but see the use of dsigmoid in the code. There they are passing the predictions of different hidden layers, which are already passed through sigmoid as argument, so we don't need to again pass them through sigmoid function.
Idk why but if I change the data, I am getting nan as the prediction
which function will be used with softmax to take derivative?
Kaiyoto it is probably because of overflow
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Shouldn't def dsigmoid(x): return sigmoid(x) * (1. - sigmoid(x)) ?