biranchi2018 · May 21, 2026 01:09
diff --git a/learn_gpt2.py b/learn_gpt2.py
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch

 tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
 model = AutoModelForCausalLM.from_pretrained("distilgpt2", output_hidden_states=True)

 print(model)
 print(model.lm_head)
 print(model.transformer.wte) # Dimensions are: (Number of tokens in vocabulary, dimension of model)
 print(model.transformer.wte(torch.tensor(464)))


 text = "The chicken didn't cross the road because it was"

 # Tokenize the input string
 input = tokenizer(text, return_tensors="pt")

 # Generate text using beam search for more coherent output
 # Set do_sample=False when num_beams > 1
 # Increase max_length to give the model more room to form a complete sentence
 output = model.generate(
    **input,
    max_length=50,  # Increased max_length
    num_beams=5,    # Using beam search for more coherent output
    do_sample=False, # Set to False when using num_beams
    eos_token_id=tokenizer.eos_token_id, # Explicitly set EOS token
    pad_token_id=tokenizer.eos_token_id # Often set to eos_token_id for generation
 )

 # Print the output, skipping special tokens
 print('\n', tokenizer.decode(output[0], skip_special_tokens=True))
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch

	tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
	model = AutoModelForCausalLM.from_pretrained("distilgpt2", output_hidden_states=True)

	print(model)
	print(model.lm_head)
	print(model.transformer.wte) # Dimensions are: (Number of tokens in vocabulary, dimension of model)
	print(model.transformer.wte(torch.tensor(464)))


	text = "The chicken didn't cross the road because it was"

	# Tokenize the input string
	input = tokenizer(text, return_tensors="pt")

	# Generate text using beam search for more coherent output
	# Set do_sample=False when num_beams > 1
	# Increase max_length to give the model more room to form a complete sentence
	output = model.generate(
	**input,
	max_length=50, # Increased max_length
	num_beams=5, # Using beam search for more coherent output
	do_sample=False, # Set to False when using num_beams
	eos_token_id=tokenizer.eos_token_id, # Explicitly set EOS token
	pad_token_id=tokenizer.eos_token_id # Often set to eos_token_id for generation
	)

	# Print the output, skipping special tokens
	print('\n', tokenizer.decode(output[0], skip_special_tokens=True))
No results found