Skip to content

Instantly share code, notes, and snippets.

@biranchi2018
Last active May 21, 2026 01:09
Show Gist options
  • Select an option

  • Save biranchi2018/f7aa554d258f8990e7297fd77c03920e to your computer and use it in GitHub Desktop.

Select an option

Save biranchi2018/f7aa554d258f8990e7297fd77c03920e to your computer and use it in GitHub Desktop.
GPT2 Model
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
model = AutoModelForCausalLM.from_pretrained("distilgpt2", output_hidden_states=True)
print(model)
print(model.lm_head)
print(model.transformer.wte) # Dimensions are: (Number of tokens in vocabulary, dimension of model)
print(model.transformer.wte(torch.tensor(464)))
text = "The chicken didn't cross the road because it was"
# Tokenize the input string
input = tokenizer(text, return_tensors="pt")
# Generate text using beam search for more coherent output
# Set do_sample=False when num_beams > 1
# Increase max_length to give the model more room to form a complete sentence
output = model.generate(
**input,
max_length=50, # Increased max_length
num_beams=5, # Using beam search for more coherent output
do_sample=False, # Set to False when using num_beams
eos_token_id=tokenizer.eos_token_id, # Explicitly set EOS token
pad_token_id=tokenizer.eos_token_id # Often set to eos_token_id for generation
)
# Print the output, skipping special tokens
print('\n', tokenizer.decode(output[0], skip_special_tokens=True))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment