Last active
April 9, 2021 23:59
-
-
Save mcleonard/f6ae1ef6b36ecbda1167c78f332a3d41 to your computer and use it in GitHub Desktop.
Example of implementing a super basic attention layer in numpy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def softmax(x, axis=0): | |
""" Calculate softmax function for an array x | |
axis=0 calculates softmax across rows which means each column sums to 1 | |
axis=1 calculates softmax across columns which means each row sums to 1 | |
""" | |
return np.exp(x) / np.expand_dims(np.sum(np.exp(x), axis=axis), axis) | |
def attention(encoder_vectors, decoder_vector): | |
""" Example function that calculates attention, returns the context vector | |
Arguments: | |
encoder_vectors: NxM numpy array, where N is the number of vectors and M is the vector length | |
decoder_vector: 1xM numpy array, M is the vector length, much be the same M as encoder_vectors | |
""" | |
# First, calculate the dot product of each encoder vector with the decoder vector | |
dot_prod = np.matmul(encoder_vectors, decoder_vector.T) | |
# Then take the softmax of those dot products to get a weight distribution | |
scores = softmax(dot_prod) | |
# Use those weights to scale encoder_vectors to get the alignment vectors | |
alignment = encoder_vectors * scores | |
# Sum up alignment vectors to get the context vector and return it | |
context = alignment.sum(axis=0) | |
return context | |
# Fake vectors used as an example | |
encoder_vectors = np.random.randn(10, 128) | |
decoder_vector = np.random.randn(1, 128) | |
context_vector = attention(encoder_vectors, decoder_vector) | |
print(context_vector) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment