Skip to content

Instantly share code, notes, and snippets.

@shrimo
Created March 2, 2025 00:05
Show Gist options
  • Save shrimo/7a56374053f62d23f0e822f247a48ad6 to your computer and use it in GitHub Desktop.
Save shrimo/7a56374053f62d23f0e822f247a48ad6 to your computer and use it in GitHub Desktop.
SimpleTransformer
import math
import random
class SimpleTransformer:
def __init__(self, d_model):
"""Initializes the transformer with a given embedding size (d_model)."""
self.d_model = d_model
self.W_q = [[random.uniform(-0.5, 0.5) for _ in range(d_model)] for _ in range(d_model)]
self.W_k = [[random.uniform(-0.5, 0.5) for _ in range(d_model)] for _ in range(d_model)]
self.W_v = [[random.uniform(-0.5, 0.5) for _ in range(d_model)] for _ in range(d_model)]
def matmul(self, A, B):
"""Performs matrix multiplication of A and B."""
return [[sum(A[i][k] * B[k][j] for k in range(len(B))) for j in range(len(B[0]))] for i in range(len(A))]
def softmax(self, x):
"""Computes the softmax function for a list of values."""
exp_x = [math.exp(i) for i in x]
sum_exp = sum(exp_x)
return [i / sum_exp for i in exp_x]
def attention(self, Q, K, V):
"""Computes the scaled dot-product attention."""
scores = self.matmul(Q, list(map(list, zip(*K))))
scores = [[score / math.sqrt(self.d_model) for score in row] for row in scores]
weights = [self.softmax(row) for row in scores]
return self.matmul(weights, V)
def forward(self, X):
"""Performs a forward pass through the transformer."""
Q = self.matmul(X, self.W_q)
K = self.matmul(X, self.W_k)
V = self.matmul(X, self.W_v)
return self.attention(Q, K, V)
# Tokenization & Embedding
def tokenize(sentence):
"""Splits a sentence into words."""
return sentence.lower().split()
def generate_embeddings(tokens, d_model):
"""Creates random embeddings for tokens."""
embedding_dict = {token: [random.uniform(-1, 1) for _ in range(d_model)] for token in set(tokens)}
return [embedding_dict[token] for token in tokens], embedding_dict
# Training Data (Input → Expected Response)
training_data = [
("hello", "hi"),
("how are you", "i am fine"),
("what is your name", "i am a chatbot"),
("bye", "goodbye"),
]
# Mean Squared Error Loss
def mse_loss(y_pred, y_true):
"""Computes Mean Squared Error (MSE) loss."""
return sum((yp - yt) ** 2 for yp, yt in zip(y_pred, y_true)) / len(y_pred)
# Training Function
def train(transformer, embedding_dict, learning_rate=0.01, epochs=100):
"""Trains the transformer model using a simple gradient update on embeddings."""
d_model = transformer.d_model
for epoch in range(epochs):
total_loss = 0
for input_text, target_text in training_data:
# Convert words to embeddings
input_tokens = tokenize(input_text)
target_tokens = tokenize(target_text)
if input_tokens[0] not in embedding_dict or target_tokens[0] not in embedding_dict:
continue # Skip unknown words
input_embedding = [embedding_dict[input_tokens[0]]]
target_embedding = embedding_dict[target_tokens[0]]
# Forward pass through transformer
output_embedding = transformer.forward(input_embedding)[0]
# Compute loss
loss = mse_loss(output_embedding, target_embedding)
total_loss += loss
# Simple gradient update (gradient descent)
for i in range(d_model):
embedding_dict[input_tokens[0]][i] -= learning_rate * (output_embedding[i] - target_embedding[i])
if epoch % 10 == 0:
print(f"Epoch {epoch}, Loss: {total_loss:.4f}")
# Find Closest Word
def closest_word(vector, embedding_dict):
"""Finds the closest word in the dictionary to the given vector (cosine similarity)."""
best_match = None
best_score = -float('inf')
def cosine_similarity(v1, v2):
dot = sum(a * b for a, b in zip(v1, v2))
norm1 = math.sqrt(sum(a * a for a in v1))
norm2 = math.sqrt(sum(b * b for b in v2))
return dot / (norm1 * norm2 + 1e-9) # Avoid division by zero
for word, embed in embedding_dict.items():
score = cosine_similarity(vector, embed)
if score > best_score:
best_match = word
best_score = score
return best_match
# Chatbot Interaction
def chatbot():
"""Runs a simple chatbot loop."""
d_model = 4
transformer = SimpleTransformer(d_model)
# Collect all words from training data
all_words = set(word for pair in training_data for sentence in pair for word in tokenize(sentence))
embeddings, embedding_dict = generate_embeddings(list(all_words), d_model)
# Train the model
train(transformer, embedding_dict)
print("Chatbot: Hello! Type something to chat. Type 'exit' to stop.")
while True:
user_input = input("You: ").strip().lower()
if user_input == "exit":
print("Chatbot: Goodbye!")
break
tokens = tokenize(user_input)
if not tokens or tokens[0] not in embedding_dict:
print("Chatbot: I don't understand.")
continue
input_embedding = [embedding_dict[tokens[0]]]
transformer_output = transformer.forward(input_embedding)
# Find the best response based on trained embeddings
response_word = closest_word(transformer_output[0], embedding_dict)
print(f"Chatbot: {response_word}")
# Start chatbot
chatbot()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment