Last active
November 3, 2018 16:22
-
-
Save padjiman/e4d3314ac5dd2aec5cfb151fb9c849b0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def lin(params,x): | |
return params[0]*x + params[1] | |
def linear_gradients(params_to_learn,x,y): | |
a = params_to_learn[0] | |
b = params_to_learn[1] | |
de_da = 2*x*(a*x+b-y) | |
de_db = 2*(a*x+b-y) | |
return [de_da,de_db] | |
def gradient_descent(X,Y, init_params, f, gradients_func, optim, optim_params, epochs=1, anneal_epoch_freq=5): | |
params_to_learn = init_params.copy() | |
wavg_grads = [0] * len(params_to_learn) #used if optim = Momentum or Adam | |
wavg_squared_grads = [0] * len(params_to_learn) #used if optim = Adam | |
lr_basis = optim_params[0] #used if optim = AdamAnn | |
for e in range(epochs): | |
if optim == "AdamAnn" and (e+1) % anneal_epoch_freq == 0: | |
lr_basis = lr_basis / (4.0) #annealing the gradient | |
for x_ , y_ in zip(X,Y): | |
loss = mse(X,Y,f,params_to_learn) | |
gradients = gradients_func(params_to_learn,x_ ,y_) | |
if optim == "SGD": | |
alpha = optim_params[0] | |
for i in range(len(params_to_learn)): | |
params_to_learn[i] = params_to_learn[i] - alpha*gradients[i] | |
elif optim == "Momentum": | |
alpha = optim_params[0] | |
beta = optim_params[1] | |
for i in range(len(params_to_learn)): | |
wavg_grads[i] = wavg_grads[i]*beta + (1.0-beta)*gradients[i] | |
params_to_learn[i] = params_to_learn[i] - alpha*wavg_grads[i] | |
elif optim == "Adam": | |
lr_basis = optim_params[0] | |
beta1 = optim_params[1] | |
beta2 = optim_params[2] | |
for i in range(len(params_to_learn)): | |
wavg_grads[i] = wavg_grads[i]*beta1 + (1.0-beta1)*gradients[i] | |
wavg_squared_grads[i] = wavg_squared_grads[i]*beta2 + (1.0-beta2)*(gradients[i]*gradients[i]) | |
lr = lr_basis / np.sqrt(wavg_squared_grads[i]) | |
params_to_learn[i] = params_to_learn[i] - lr*wavg_grads[i] | |
elif optim == "AdamAnn": | |
beta1 = optim_params[1] | |
beta2 = optim_params[2] | |
for i in range(len(params_to_learn)): | |
wavg_grads[i] = wavg_grads[i]*beta1 + (1.0-beta1)*gradients[i] | |
wavg_squared_grads[i] = wavg_squared_grads[i]*beta2 + (1.0-beta2)*(gradients[i]*gradients[i]) | |
lr = lr_basis / np.sqrt(wavg_squared_grads[i]) | |
params_to_learn[i] = params_to_learn[i] - lr*wavg_grads[i] | |
print("Learned params with {}: {}".format(optim,params_to_learn,loss)) | |
return params_to_learn | |
epochs=20 | |
init_params = [0,0] | |
gradient_descent(x_gen,y_real,init_params,lin,linear_gradients,"SGD", [0.001], epochs=epochs) | |
gradient_descent(x_gen,y_real,init_params,lin,linear_gradients,"Momentum", [0.001,0.9], epochs=epochs) | |
gradient_descent(x_gen,y_real,init_params,lin,linear_gradients,"Adam", [1,0.7,0.9], epochs=epochs) | |
gradient_descent(x_gen,y_real,init_params,lin,linear_gradients,"AdamAnn", [1,0.7,0.9], epochs=epochs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment