mcleonard · April 9, 2021 23:59
diff --git a/attention.py b/attention.py
 import numpy as np

 def softmax(x, axis=0):
    """ Calculate softmax function for an array x
    
        axis=0 calculates softmax across rows which means each column sums to 1 
        axis=1 calculates softmax across columns which means each row sums to 1
    """
    return np.exp(x) / np.expand_dims(np.sum(np.exp(x), axis=axis), axis)

 def attention(encoder_vectors, decoder_vector):
    """ Example function that calculates attention, returns the context vector 
    
        Arguments:
        encoder_vectors: NxM numpy array, where N is the number of vectors and M is the vector length
        decoder_vector: 1xM numpy array, M is the vector length, much be the same M as encoder_vectors
    """ 
    
    # First, calculate the dot product of each encoder vector with the decoder vector
    dot_prod = np.matmul(encoder_vectors, decoder_vector.T)
    
    # Then take the softmax of those dot products to get a weight distribution
    scores = softmax(dot_prod)
    
    # Use those weights to scale encoder_vectors to get the alignment vectors
    alignment = encoder_vectors * scores
    
    # Sum up alignment vectors to get the context vector and return it
    context = alignment.sum(axis=0)
    return context
  

 # Fake vectors used as an example
 encoder_vectors = np.random.randn(10, 128)
 decoder_vector = np.random.randn(1, 128)

 context_vector = attention(encoder_vectors, decoder_vector)

 print(context_vector)
	import numpy as np

	def softmax(x, axis=0):
	""" Calculate softmax function for an array x

	axis=0 calculates softmax across rows which means each column sums to 1
	axis=1 calculates softmax across columns which means each row sums to 1
	"""
	return np.exp(x) / np.expand_dims(np.sum(np.exp(x), axis=axis), axis)

	def attention(encoder_vectors, decoder_vector):
	""" Example function that calculates attention, returns the context vector

	Arguments:
	encoder_vectors: NxM numpy array, where N is the number of vectors and M is the vector length
	decoder_vector: 1xM numpy array, M is the vector length, much be the same M as encoder_vectors
	"""

	# First, calculate the dot product of each encoder vector with the decoder vector
	dot_prod = np.matmul(encoder_vectors, decoder_vector.T)

	# Then take the softmax of those dot products to get a weight distribution
	scores = softmax(dot_prod)

	# Use those weights to scale encoder_vectors to get the alignment vectors
	alignment = encoder_vectors * scores

	# Sum up alignment vectors to get the context vector and return it
	context = alignment.sum(axis=0)
	return context


	# Fake vectors used as an example
	encoder_vectors = np.random.randn(10, 128)
	decoder_vector = np.random.randn(1, 128)

	context_vector = attention(encoder_vectors, decoder_vector)

	print(context_vector)