Created
August 10, 2023 04:18
-
-
Save djinn/64cec4191cce49326e55035deb317ce7 to your computer and use it in GitHub Desktop.
This defines a Transformer using minimum dependencies
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import math | |
# Define the Transformer model architecture | |
class Transformer: | |
def __init__(self, input_vocab_size, output_vocab_size, max_seq_length, d_model, num_heads, num_layers): | |
self.input_vocab_size = input_vocab_size | |
self.output_vocab_size = output_vocab_size | |
self.max_seq_length = max_seq_length | |
self.d_model = d_model | |
self.num_heads = num_heads | |
self.num_layers = num_layers | |
self.embedding = self._create_embedding() | |
self.encoder = self._create_encoder() | |
self.decoder = self._create_decoder() | |
self.final_layer = self._create_final_layer() | |
def _create_embedding(self): | |
return np.random.randn(self.max_seq_length, self.d_model) | |
def _create_encoder(self): | |
return [self._create_encoder_layer() for _ in range(self.num_layers)] | |
def _create_encoder_layer(self): | |
return { | |
'self_attention': self._create_multi_head_attention(), | |
'feed_forward': self._create_feed_forward() | |
} | |
def _create_decoder(self): | |
return [self._create_decoder_layer() for _ in range(self.num_layers)] | |
def _create_decoder_layer(self): | |
return { | |
'self_attention': self._create_multi_head_attention(), | |
'encoder_attention': self._create_multi_head_attention(), | |
'feed_forward': self._create_feed_forward() | |
} | |
def _create_multi_head_attention(self): | |
return { | |
'query_weights': np.random.randn(self.d_model, self.d_model), | |
'key_weights': np.random.randn(self.d_model, self.d_model), | |
'value_weights': np.random.randn(self.d_model, self.d_model) | |
} | |
def _create_feed_forward(self): | |
return { | |
'weights1': np.random.randn(self.d_model, 2048), | |
'bias1': np.random.randn(2048), | |
'weights2': np.random.randn(2048, self.d_model), | |
'bias2': np.random.randn(self.d_model) | |
} | |
def _create_final_layer(self): | |
return { | |
'weights': np.random.randn(self.d_model, self.output_vocab_size), | |
'bias': np.random.randn(self.output_vocab_size) | |
} | |
def _dot_product_attention(self, query, key, value): | |
scores = np.dot(query, key.T) / math.sqrt(self.d_model) | |
attention_weights = np.softmax(scores, axis=1) | |
output = np.dot(attention_weights, value) | |
return output, attention_weights | |
def encode(self, inputs): | |
embeddings = np.dot(inputs, self.embedding) | |
return embeddings | |
def decode(self, targets): | |
embeddings = np.dot(targets, self.embedding) | |
return embeddings | |
def forward(self, source_inputs, target_inputs): | |
# Encoding | |
encoder_output = self.encode(source_inputs) | |
# Decoding | |
decoder_output = self.decode(target_inputs) | |
return decoder_output | |
# Initialize the Transformer model | |
input_vocab_size = 10000 | |
output_vocab_size = 8000 | |
max_seq_length = 50 | |
d_model = 512 | |
num_heads = 8 | |
num_layers = 6 | |
batch_size = 32 | |
transformer = Transformer(input_vocab_size, output_vocab_size, max_seq_length, d_model, num_heads, num_layers) | |
# Example inputs | |
source_inputs = np.random.randint(0, input_vocab_size, size=(batch_size, max_seq_length)) | |
target_inputs = np.random.randint(0, output_vocab_size, size=(batch_size, max_seq_length)) | |
# Forward pass | |
decoder_output = transformer.forward(source_inputs, target_inputs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment