Skip to content

Instantly share code, notes, and snippets.

@dtrce
Created June 26, 2025 19:31
Show Gist options
  • Save dtrce/b41aae51b41b8ae77d47e22ecaaab395 to your computer and use it in GitHub Desktop.
Save dtrce/b41aae51b41b8ae77d47e22ecaaab395 to your computer and use it in GitHub Desktop.
llm_benchmark.py
import torch
import time
# Model Settings
input_dim = 4096
hidden_dim = 4096
output_dim = 4096
batch_size = 8
num_layers = 4 # Transformer-style depth
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running benchmark on {device} ...")
# Dummy LLM-like Model
layers = []
for i in range(num_layers):
in_dim = input_dim if i == 0 else hidden_dim
layers.append(torch.nn.Linear(in_dim, hidden_dim))
layers.append(torch.nn.ReLU())
layers.append(torch.nn.Linear(hidden_dim, hidden_dim))
layers.append(torch.nn.ReLU())
layers.append(torch.nn.Linear(hidden_dim, output_dim))
model = torch.nn.Sequential(*layers).to(device)
# Dummy Data
input_data = torch.randn(batch_size, input_dim).to(device)
target = torch.randn(batch_size, output_dim).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = torch.nn.MSELoss()
# Benchmark Settings
num_iterations = 500
warmup_iterations = 50
# Warmup
for _ in range(warmup_iterations):
optimizer.zero_grad()
output = model(input_data)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()
torch.cuda.synchronize()
# Timed Benchmark
start_time = time.time()
for _ in range(num_iterations):
optimizer.zero_grad()
output = model(input_data)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()
torch.cuda.synchronize()
end_time = time.time()
# Results
total_time = end_time - start_time
it_per_sec = num_iterations / total_time
tokens_per_iter = batch_size * output_dim
tokens_per_sec = it_per_sec * tokens_per_iter
print(f"=== LLM GPU Benchmark Results ===")
print(f"Iterations per second : {it_per_sec:.2f} it/s")
print(f"Tokens per second : {tokens_per_sec:.2f} tokens/s")
print(f"Elapsed time : {total_time:.2f} seconds")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment