Skip to content

Instantly share code, notes, and snippets.

@taq
Created January 26, 2026 21:16
Show Gist options
  • Select an option

  • Save taq/e9aaac2cec12031e94e11b354ceafdc9 to your computer and use it in GitHub Desktop.

Select an option

Save taq/e9aaac2cec12031e94e11b354ceafdc9 to your computer and use it in GitHub Desktop.
Counting tokens in Anthropic
#!/usr/bin/env python3
"""
Script para contar tokens em arquivos de código usando o tokenizador da Anthropic.
Uso:
python count_tokens.py User.java user.rb
"""
import sys
from anthropic import Anthropic
def count_tokens(text):
"""Conta tokens em um texto usando o tokenizador da Anthropic."""
client = Anthropic()
# Usa o método beta de contagem de tokens
response = client.messages.count_tokens(
model="claude-sonnet-4-20250514",
messages=[{"role": "user", "content": text}]
)
return response.input_tokens
def analyze_file(filepath):
"""Analisa um arquivo e retorna informações sobre tokens."""
try:
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
token_count = count_tokens(content)
lines = len(content.splitlines())
chars = len(content)
return {
'filepath': filepath,
'tokens': token_count,
'lines': lines,
'chars': chars
}
except Exception as e:
print(f"Erro ao processar {filepath}: {e}")
return None
def main():
if len(sys.argv) < 2:
print("Uso: python count_tokens.py <arquivo1> [arquivo2] ...")
sys.exit(1)
results = []
for filepath in sys.argv[1:]:
result = analyze_file(filepath)
if result:
results.append(result)
# Exibe resultados
print("\n" + "="*70)
print("ANÁLISE DE TOKENS COM TOKENIZADOR DA ANTHROPIC")
print("="*70)
for r in results:
print(f"\n📄 {r['filepath']}")
print(f" Tokens: {r['tokens']:4d}")
print(f" Linhas: {r['lines']:4d}")
print(f" Caracteres: {r['chars']:4d}")
print(f" Chars/Token: {r['chars']/r['tokens']:.2f}")
# Comparação se houver 2 arquivos
if len(results) == 2:
print("\n" + "="*70)
print("COMPARAÇÃO")
print("="*70)
# Garante que r_bigger tem mais tokens que r_smaller
if results[0]['tokens'] > results[1]['tokens']:
r_bigger, r_smaller = results[0], results[1]
else:
r_bigger, r_smaller = results[1], results[0]
token_diff = r_bigger['tokens'] - r_smaller['tokens']
token_reduction = (1 - r_smaller['tokens'] / r_bigger['tokens']) * 100
lines_reduction = (1 - r_smaller['lines'] / r_bigger['lines']) * 100
chars_reduction = (1 - r_smaller['chars'] / r_bigger['chars']) * 100
print(f"\n{r_smaller['filepath']} vs {r_bigger['filepath']}:")
print(f" Redução em tokens: {token_reduction:5.1f}% (economiza {token_diff} tokens)")
print(f" Redução em linhas: {lines_reduction:5.1f}%")
print(f" Redução em caracteres: {chars_reduction:5.1f}%")
# Proporção
if r_smaller['tokens'] > 0:
ratio = r_bigger['tokens'] / r_smaller['tokens']
print(f"\n 📊 {r_bigger['filepath']} usa {ratio:.2f}x mais tokens")
print(f" que {r_smaller['filepath']}")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment