Created
March 2, 2025 00:05
-
-
Save shrimo/7a56374053f62d23f0e822f247a48ad6 to your computer and use it in GitHub Desktop.
SimpleTransformer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import random | |
class SimpleTransformer: | |
def __init__(self, d_model): | |
"""Initializes the transformer with a given embedding size (d_model).""" | |
self.d_model = d_model | |
self.W_q = [[random.uniform(-0.5, 0.5) for _ in range(d_model)] for _ in range(d_model)] | |
self.W_k = [[random.uniform(-0.5, 0.5) for _ in range(d_model)] for _ in range(d_model)] | |
self.W_v = [[random.uniform(-0.5, 0.5) for _ in range(d_model)] for _ in range(d_model)] | |
def matmul(self, A, B): | |
"""Performs matrix multiplication of A and B.""" | |
return [[sum(A[i][k] * B[k][j] for k in range(len(B))) for j in range(len(B[0]))] for i in range(len(A))] | |
def softmax(self, x): | |
"""Computes the softmax function for a list of values.""" | |
exp_x = [math.exp(i) for i in x] | |
sum_exp = sum(exp_x) | |
return [i / sum_exp for i in exp_x] | |
def attention(self, Q, K, V): | |
"""Computes the scaled dot-product attention.""" | |
scores = self.matmul(Q, list(map(list, zip(*K)))) | |
scores = [[score / math.sqrt(self.d_model) for score in row] for row in scores] | |
weights = [self.softmax(row) for row in scores] | |
return self.matmul(weights, V) | |
def forward(self, X): | |
"""Performs a forward pass through the transformer.""" | |
Q = self.matmul(X, self.W_q) | |
K = self.matmul(X, self.W_k) | |
V = self.matmul(X, self.W_v) | |
return self.attention(Q, K, V) | |
# Tokenization & Embedding | |
def tokenize(sentence): | |
"""Splits a sentence into words.""" | |
return sentence.lower().split() | |
def generate_embeddings(tokens, d_model): | |
"""Creates random embeddings for tokens.""" | |
embedding_dict = {token: [random.uniform(-1, 1) for _ in range(d_model)] for token in set(tokens)} | |
return [embedding_dict[token] for token in tokens], embedding_dict | |
# Training Data (Input → Expected Response) | |
training_data = [ | |
("hello", "hi"), | |
("how are you", "i am fine"), | |
("what is your name", "i am a chatbot"), | |
("bye", "goodbye"), | |
] | |
# Mean Squared Error Loss | |
def mse_loss(y_pred, y_true): | |
"""Computes Mean Squared Error (MSE) loss.""" | |
return sum((yp - yt) ** 2 for yp, yt in zip(y_pred, y_true)) / len(y_pred) | |
# Training Function | |
def train(transformer, embedding_dict, learning_rate=0.01, epochs=100): | |
"""Trains the transformer model using a simple gradient update on embeddings.""" | |
d_model = transformer.d_model | |
for epoch in range(epochs): | |
total_loss = 0 | |
for input_text, target_text in training_data: | |
# Convert words to embeddings | |
input_tokens = tokenize(input_text) | |
target_tokens = tokenize(target_text) | |
if input_tokens[0] not in embedding_dict or target_tokens[0] not in embedding_dict: | |
continue # Skip unknown words | |
input_embedding = [embedding_dict[input_tokens[0]]] | |
target_embedding = embedding_dict[target_tokens[0]] | |
# Forward pass through transformer | |
output_embedding = transformer.forward(input_embedding)[0] | |
# Compute loss | |
loss = mse_loss(output_embedding, target_embedding) | |
total_loss += loss | |
# Simple gradient update (gradient descent) | |
for i in range(d_model): | |
embedding_dict[input_tokens[0]][i] -= learning_rate * (output_embedding[i] - target_embedding[i]) | |
if epoch % 10 == 0: | |
print(f"Epoch {epoch}, Loss: {total_loss:.4f}") | |
# Find Closest Word | |
def closest_word(vector, embedding_dict): | |
"""Finds the closest word in the dictionary to the given vector (cosine similarity).""" | |
best_match = None | |
best_score = -float('inf') | |
def cosine_similarity(v1, v2): | |
dot = sum(a * b for a, b in zip(v1, v2)) | |
norm1 = math.sqrt(sum(a * a for a in v1)) | |
norm2 = math.sqrt(sum(b * b for b in v2)) | |
return dot / (norm1 * norm2 + 1e-9) # Avoid division by zero | |
for word, embed in embedding_dict.items(): | |
score = cosine_similarity(vector, embed) | |
if score > best_score: | |
best_match = word | |
best_score = score | |
return best_match | |
# Chatbot Interaction | |
def chatbot(): | |
"""Runs a simple chatbot loop.""" | |
d_model = 4 | |
transformer = SimpleTransformer(d_model) | |
# Collect all words from training data | |
all_words = set(word for pair in training_data for sentence in pair for word in tokenize(sentence)) | |
embeddings, embedding_dict = generate_embeddings(list(all_words), d_model) | |
# Train the model | |
train(transformer, embedding_dict) | |
print("Chatbot: Hello! Type something to chat. Type 'exit' to stop.") | |
while True: | |
user_input = input("You: ").strip().lower() | |
if user_input == "exit": | |
print("Chatbot: Goodbye!") | |
break | |
tokens = tokenize(user_input) | |
if not tokens or tokens[0] not in embedding_dict: | |
print("Chatbot: I don't understand.") | |
continue | |
input_embedding = [embedding_dict[tokens[0]]] | |
transformer_output = transformer.forward(input_embedding) | |
# Find the best response based on trained embeddings | |
response_word = closest_word(transformer_output[0], embedding_dict) | |
print(f"Chatbot: {response_word}") | |
# Start chatbot | |
chatbot() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment