Created
May 17, 2019 08:42
-
-
Save GerardMaggiolino/631e69621715367ac2def1eb735f1f34 to your computer and use it in GitHub Desktop.
Code for a ridiculously over explained Piazza post.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from sklearn.datasets import load_iris | |
def test(net, data, target): | |
''' | |
Returns number correct. | |
''' | |
return (target == (torch.sigmoid(net(data)) > 0.5).float()).sum().item()/98 | |
def train(init_model, x, y, learning_rates): | |
''' | |
Trains model returned by init_model over learning rates. | |
Returns results of training. | |
''' | |
results = list() | |
loss_func = nn.BCEWithLogitsLoss() | |
# Train over all learning rates | |
for lr in learning_rates: | |
model = init_model() | |
optim = torch.optim.SGD(model.parameters(), lr) | |
for trials in range(1, 201): | |
out = model(x) | |
loss = loss_func(out, y) | |
optim.zero_grad() | |
loss.backward() | |
optim.step() | |
if test(model, x, y) == 1.: | |
break | |
results.append(trials) | |
return results | |
def plot(learning_rates, results): | |
plt.ylim((1, 201)) | |
plt.xlabel('Learning Rate') | |
plt.ylabel('Iterations until Convergence') | |
plt.plot(learning_rates, results) | |
plt.show() | |
# Data processing to make set linearly separable | |
data = load_iris() | |
x, y = data['data'][0:100][:,[0,1]], data['target'][0:100].reshape(-1, 1) | |
x, y = np.delete(x, [41, 61], axis=0), np.delete(y, [41, 61], axis=0) | |
x, y = torch.from_numpy(x).float(), torch.from_numpy(y).float() | |
# Showing category 0 | |
# plt.scatter(x[:49][:,0], x[:49][:,1]) | |
# Showing category 1 | |
# plt.scatter(x[49:][:,0], x[49:][:,1]) | |
# plt.show() | |
learning_rates = [i/10000 for i in range(1, 1001, 20)] | |
learning_rates.extend([i/20 for i in range(3, 21)]) | |
print('Learning Rates Tested\n', learning_rates) | |
# Deep Tests | |
def deep_init(): | |
''' | |
Deep network init. | |
''' | |
param = [nn.Linear(2, 5), nn.ReLU()] | |
for _ in range(5): | |
param.append(nn.Linear(5, 5)) | |
param.append(nn.ReLU()) | |
param.append(nn.Linear(5, 1)) | |
deep = nn.Sequential(*param) | |
torch.manual_seed(2) | |
for layer in deep: | |
if isinstance(layer, nn.Linear): | |
torch.nn.init.xavier_normal_(layer.weight) | |
return deep | |
deep_results = train(deep_init, x, y, learning_rates) | |
print(f'Fastest converging deep layer network: {min(deep_results)}') | |
plot(learning_rates, deep_results) | |
# Hidden Tests | |
def hidden_init(): | |
''' | |
Hidden network init. | |
''' | |
hidden = nn.Sequential(nn.Linear(2, 3), nn.ReLU(), nn.Linear(3, 1)) | |
torch.manual_seed(2) | |
for layer in hidden: | |
if isinstance(layer, nn.Linear): | |
torch.nn.init.xavier_normal_(layer.weight) | |
return hidden | |
hidden_results = train(hidden_init, x, y, learning_rates) | |
print(f'Fastest converging hidden layer network: {min(hidden_results)}') | |
plot(learning_rates, hidden_results) | |
# Linear Tests | |
def linear_init(): | |
linear = nn.Linear(2, 1) | |
torch.manual_seed(2) | |
torch.nn.init.normal_(linear.weight) | |
return linear | |
linear_results = train(linear_init, x, y, learning_rates) | |
print(f'Fastest converging linear network: {min(linear_results)}') | |
plot(learning_rates, linear_results) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment