Skip to content

Instantly share code, notes, and snippets.

@martinferianc
Created February 27, 2022 16:38
Show Gist options
  • Save martinferianc/38c57e7089b985f26c34e1b2bd353dd9 to your computer and use it in GitHub Desktop.
Save martinferianc/38c57e7089b985f26c34e1b2bd353dd9 to your computer and use it in GitHub Desktop.
Neural network from scratch by only using numpy!
# Adapted from https://github.com/SkalskiP/ILearnDeepLearning.py/blob/master/01_mysteries_of_neural_networks/03_numpy_neural_net/Numpy%20deep%20neural%20network.ipynb
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
# Define a sample function to sample a dataset of 2 dimensional points
def get_data(N = 300):
# Generate a dataset of 2 dimensional points
X = np.random.randn(N, 2)
# Assign labels based on the sum of the coordinates
T = np.where(X[:, 0] + X[:, 1] < 1.0, 0, 1)
# Update the labels
Y = np.zeros((N, 1))
Y[T == 1] = 1
return X, Y
class NN():
def __init__(self, architecture):
self.activations = []
self.params_values = {}
self.layers = len(architecture)
self.grads_momentum = {}
for i, layer in enumerate(architecture):
input_size, output_size, activation = layer["input_dim"], layer["output_dim"], layer["activation"]
self.activations.append(activation)
self.params_values[f"W{str(i)}"] = np.random.randn(
output_size, input_size
) / np.sqrt(input_size)
self.params_values[f"b{str(i)}"] = np.zeros((1, output_size))
self.grads_momentum[f"W{str(i)}"] = np.zeros_like(self.params_values[f"W{str(i)}"])
self.grads_momentum[f"b{str(i)}"] = np.zeros_like(self.params_values[f"b{str(i)}"])
self.reset()
# Reset the gradients and the cache for activations
def reset(self):
self.cache = {}
self.grads = {}
# Define the ReLU function
def relu(self, x):
return np.maximum(0, x)
# Define the derivative of the ReLU function with respect to its input and the previous gradient
def drelu(self, dA, z):
dA_ = np.copy(dA)
dA_[z <= 0] = 0
return dA_
# Define the sigmoid function
def sigmoid(self, x):
return 1. / (1. + np.exp(-x))
# Define the derivative of the sigmoid function with respect to its input and the previous gradient
def dsigmoid(self, dA, z):
s = self.sigmoid(z)
return s * (1. - s) * dA
# Define the binary cross-entropy function
def bce(self, yhat, y):
yhat, y = yhat.flatten(), y.flatten()
cost = -np.mean(np.dot(y, np.log(yhat+1e-8)) + np.dot((1 - y), np.log(1 - yhat+1e-8)))
return np.squeeze(cost)
# Define the binary cross-entropy function derivative
def dbce(self, yhat, y):
return -(y / (yhat+1e-8) - (1 - y) / (1 - yhat+1e-8))
# Define the forward function of a linear layer
def single_forward(self, x, W, b, activation):
Z = x @ W.T + b
A = getattr(self, activation)(Z)
# Cache both the preactivation and activation values
return A, Z
# Compute the full forward step by going though each layer in the NN
def forward(self, x):
A_prev = None
A_curr = x
for i in range(self.layers):
W, b = self.params_values[f"W{str(i)}"], self.params_values[f"b{str(i)}"]
activation = self.activations[i]
A_prev = A_curr
A_curr, Z_curr = self.single_forward(A_prev, W, b, activation)
self.cache[str(i)] = (Z_curr, A_prev)
return A_curr
def single_backward(self, dA_curr, W, Z_curr, A_prev, activation):
m = A_prev.shape[1]
# Compute the gradient of the cost with respect to the activation of the current layer
dactivation = getattr(self, f"d{activation}")
dA_curr = dactivation(dA_curr, Z_curr)
# Compute the gradient with respect to weights
dW = np.dot(dA_curr.T, A_prev) / m
# Compute the gradient with respect to the bias
db = np.sum(dA_curr, axis = 0, keepdims = True) / m
# Compute the gradient for the previous layer
dA_curr = np.dot(dA_curr, W)
return dA_curr, dW, db
# Do the full backward step by going through each layer in the NN
def backward(self, yhat, y):
# First compute the cost derivative
dA_curr = self.dbce(yhat, y)
for i in range(self.layers - 1, -1, -1):
W = self.params_values[f"W{str(i)}"]
# Reuse the cached values for the current layer
Z_curr, A_prev = self.cache[str(i)]
dA_curr, dW, db = self.single_backward(dA_curr, W, Z_curr, A_prev, self.activations[i])
self.grads[f"W{str(i)}"] = dW
self.grads[f"b{str(i)}"] = db
# Compute accuracy and do not forget to turn the labels to 0-1
def accuracy(self, yhat, y):
prediction = np.where(yhat > 0.5, 1, 0)
return np.mean(prediction == y)
# The full training loop by groung first through the forward step
# Then through the backward step
# Followed by the parameter update step
def train(self, x, y, learning_rate, epochs, momentum = 0.9, weight_decay = 0.0001):
losses = []
accuracies = []
for _ in range(epochs):
yhat = self.forward(x)
loss = self.bce(yhat, y)
losses.append(loss)
accuracy = self.accuracy(yhat, y)
accuracies.append(accuracy)
self.backward(yhat, y)
self.update_params(weight_decay, momentum, learning_rate)
return losses, accuracies
# Update the parameters of the NN
def update_params(self, weight_decay, momentum, learning_rate):
for i in range(self.layers):
# Notice that weight decay is added very simply through addition and scaling
dW = self.grads[f"W{str(i)}"] + weight_decay * self.params_values[f"W{str(i)}"]
db = self.grads[f"b{str(i)}"] + weight_decay * self.params_values[f"b{str(i)}"]
# Update the momentum bufferst with the new gradients
self.grads_momentum[f"W{str(i)}"] = momentum * self.grads_momentum[f"W{str(i)}"] + (1 - momentum) * dW
self.grads_momentum[f"b{str(i)}"] = momentum * self.grads_momentum[f"b{str(i)}"] + (1 - momentum) * db
# Finally, update the parameters with the momentum buffer and the learning rate
self.params_values[f"W{str(i)}"] -= learning_rate * self.grads_momentum[f"W{str(i)}"]
self.params_values[f"b{str(i)}"] -= learning_rate * self.grads_momentum[f"b{str(i)}"]
self.reset()
X, Y = get_data()
# Plot the dataset with the labels
plt.scatter(X[:, 0], X[:, 1], c=Y, s=50, cmap='RdBu')
plt.savefig('data.png')
plt.close()
nn_architecture = [
{"input_dim": 2, "output_dim": 4, "activation": "relu"},
{"input_dim": 4, "output_dim": 6, "activation": "relu"},
{"input_dim": 6, "output_dim": 6, "activation": "relu"},
{"input_dim": 6, "output_dim": 4, "activation": "relu"},
{"input_dim": 4, "output_dim": 1, "activation": "sigmoid"},
]
# Initialize a NN
nn = NN(nn_architecture)
# Train the NN
losses, accuracies = nn.train(X, Y, 0.005, 1000, 0.9, 0.1)
# Plot the loss for the number of epochs
plt.plot(losses)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.savefig('loss.png')
plt.close()
# Plot the accuracy for the number of epochs
plt.plot(accuracies)
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.savefig('accuracy.png')
plt.close()
# Plot the decision boundary
yhat = nn.forward(X)
yhat[yhat > 0.5] = 1
plt.scatter(X[:, 0], X[:, 1], c=yhat, s=50, cmap='RdBu')
plt.savefig('decision_boundary.png')
plt.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment