Created
April 18, 2025 21:33
-
-
Save ryanpadilha/d7335ce58ed7c429a5e1e006e9e2f224 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import AutoTokenizer | |
bert_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") | |
gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2") | |
sentences = [ | |
"Today is a beautiful day.", | |
"Today is a beatiful day.", | |
"That beat is so dope!" | |
] | |
for s in sentences: | |
print(f"Frase: {s}") | |
print("BERT:", bert_tokenizer.tokenize(s)) | |
print("GPT-2:", gpt2_tokenizer.tokenize(s)) | |
print("-" * 40) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment