Created
January 26, 2026 21:16
-
-
Save taq/e9aaac2cec12031e94e11b354ceafdc9 to your computer and use it in GitHub Desktop.
Counting tokens in Anthropic
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Script para contar tokens em arquivos de código usando o tokenizador da Anthropic. | |
| Uso: | |
| python count_tokens.py User.java user.rb | |
| """ | |
| import sys | |
| from anthropic import Anthropic | |
| def count_tokens(text): | |
| """Conta tokens em um texto usando o tokenizador da Anthropic.""" | |
| client = Anthropic() | |
| # Usa o método beta de contagem de tokens | |
| response = client.messages.count_tokens( | |
| model="claude-sonnet-4-20250514", | |
| messages=[{"role": "user", "content": text}] | |
| ) | |
| return response.input_tokens | |
| def analyze_file(filepath): | |
| """Analisa um arquivo e retorna informações sobre tokens.""" | |
| try: | |
| with open(filepath, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| token_count = count_tokens(content) | |
| lines = len(content.splitlines()) | |
| chars = len(content) | |
| return { | |
| 'filepath': filepath, | |
| 'tokens': token_count, | |
| 'lines': lines, | |
| 'chars': chars | |
| } | |
| except Exception as e: | |
| print(f"Erro ao processar {filepath}: {e}") | |
| return None | |
| def main(): | |
| if len(sys.argv) < 2: | |
| print("Uso: python count_tokens.py <arquivo1> [arquivo2] ...") | |
| sys.exit(1) | |
| results = [] | |
| for filepath in sys.argv[1:]: | |
| result = analyze_file(filepath) | |
| if result: | |
| results.append(result) | |
| # Exibe resultados | |
| print("\n" + "="*70) | |
| print("ANÁLISE DE TOKENS COM TOKENIZADOR DA ANTHROPIC") | |
| print("="*70) | |
| for r in results: | |
| print(f"\n📄 {r['filepath']}") | |
| print(f" Tokens: {r['tokens']:4d}") | |
| print(f" Linhas: {r['lines']:4d}") | |
| print(f" Caracteres: {r['chars']:4d}") | |
| print(f" Chars/Token: {r['chars']/r['tokens']:.2f}") | |
| # Comparação se houver 2 arquivos | |
| if len(results) == 2: | |
| print("\n" + "="*70) | |
| print("COMPARAÇÃO") | |
| print("="*70) | |
| # Garante que r_bigger tem mais tokens que r_smaller | |
| if results[0]['tokens'] > results[1]['tokens']: | |
| r_bigger, r_smaller = results[0], results[1] | |
| else: | |
| r_bigger, r_smaller = results[1], results[0] | |
| token_diff = r_bigger['tokens'] - r_smaller['tokens'] | |
| token_reduction = (1 - r_smaller['tokens'] / r_bigger['tokens']) * 100 | |
| lines_reduction = (1 - r_smaller['lines'] / r_bigger['lines']) * 100 | |
| chars_reduction = (1 - r_smaller['chars'] / r_bigger['chars']) * 100 | |
| print(f"\n{r_smaller['filepath']} vs {r_bigger['filepath']}:") | |
| print(f" Redução em tokens: {token_reduction:5.1f}% (economiza {token_diff} tokens)") | |
| print(f" Redução em linhas: {lines_reduction:5.1f}%") | |
| print(f" Redução em caracteres: {chars_reduction:5.1f}%") | |
| # Proporção | |
| if r_smaller['tokens'] > 0: | |
| ratio = r_bigger['tokens'] / r_smaller['tokens'] | |
| print(f"\n 📊 {r_bigger['filepath']} usa {ratio:.2f}x mais tokens") | |
| print(f" que {r_smaller['filepath']}") | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment