Created
June 8, 2017 10:37
-
-
Save se7oluti0n/8f2521d198d4400929a893a33638aeb1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from random import shuffle | |
from past.builtins import xrange | |
def svm_loss_naive(W, X, y, reg): | |
""" | |
Structured SVM loss function, naive implementation (with loops). | |
Inputs have dimension D, there are C classes, and we operate on minibatches | |
of N examples. | |
Inputs: | |
- W: A numpy array of shape (D, C) containing weights. | |
- X: A numpy array of shape (N, D) containing a minibatch of data. | |
- y: A numpy array of shape (N,) containing training labels; y[i] = c means | |
that X[i] has label c, where 0 <= c < C. | |
- reg: (float) regularization strength | |
Returns a tuple of: | |
- loss as single float | |
- gradient with respect to weights W; an array of same shape as W | |
""" | |
dW = np.zeros(W.shape) # initialize the gradient as zero | |
# compute the loss and the gradient | |
num_classes = W.shape[1] | |
num_train = X.shape[0] | |
loss = 0.0 | |
margins = np.zeros((num_train, num_classes)) | |
for i in xrange(num_train): | |
scores = X[i].dot(W) | |
correct_class_score = scores[y[i]] | |
for j in xrange(num_classes): | |
if j == y[i]: | |
continue | |
margin = scores[j] - correct_class_score + 1 # note delta = 1 | |
margins[i, j] = margin | |
if margin > 0: | |
loss += margin | |
# Right now the loss is a sum over all training examples, but we want it | |
# to be an average instead so we divide by num_train. | |
loss /= num_train | |
# Add regularization to the loss. | |
loss += reg * np.sum(W * W) | |
############################################################################# | |
# TODO: # | |
# Compute the gradient of the loss function and store it dW. # | |
# Rather that first computing the loss and then computing the derivative, # | |
# it may be simpler to compute the derivative at the same time that the # | |
# loss is being computed. As a result you may need to modify some of the # | |
# code above to compute the gradient. # | |
############################################################################# | |
for i in xrange(num_train): | |
dScore_i = np.zeros(num_classes) | |
for j in xrange(num_classes): | |
if j == y[i]: | |
continue | |
if margins[i, j] > 0: | |
dScore_i[j] += 1 | |
dScore_i[y[i]] -= 1 | |
for j in xrange(num_classes): | |
dW[:, j] += X[i] * dScore_i[j] | |
dW /= num_train | |
dW += 2 * reg * W | |
return loss, dW | |
def svm_loss_vectorized(W, X, y, reg): | |
""" | |
Structured SVM loss function, vectorized implementation. | |
Inputs and outputs are the same as svm_loss_naive. | |
""" | |
loss = 0.0 | |
dW = np.zeros(W.shape) # initialize the gradient as zero | |
############################################################################# | |
# TODO: # | |
# Implement a vectorized version of the structured SVM loss, storing the # | |
# result in loss. # | |
############################################################################# | |
num_classes = W.shape[1] | |
num_train = X.shape[0] | |
scores = X.dot(W) | |
correct_class_scores = scores[np.arange(num_train), y] | |
margins = scores - correct_class_scores[:, None] + 1 | |
large_zero = (margins > 0).astype(np.float) | |
large_zero[np.arange(num_train), y] = np.zeros(num_train) | |
loss = np.sum(margins * large_zero, axis=1) | |
loss = np.mean(loss) | |
############################################################################# | |
# END OF YOUR CODE # | |
############################################################################# | |
############################################################################# | |
# TODO: # | |
# Implement a vectorized version of the gradient for the structured SVM # | |
# loss, storing the result in dW. # | |
# # | |
# Hint: Instead of computing the gradient from scratch, it may be easier # | |
# to reuse some of the intermediate values that you used to compute the # | |
# loss. # | |
############################################################################# | |
dMargins = large_zero | |
dScore = large_zero * np.ones_like(scores) | |
dScore[np.arange(num_train), y] = np.sum(large_zero, 1) * (-1) | |
dW = X.T.dot(dScore) | |
dW /= num_train | |
dW += 2 * reg * W | |
############################################################################# | |
# END OF YOUR CODE # | |
############################################################################# | |
return loss, dW |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment