Skip to content

Instantly share code, notes, and snippets.

@minikomi
Created October 23, 2025 01:15
Show Gist options
  • Save minikomi/8a591b3371489053f7f850c60be486ca to your computer and use it in GitHub Desktop.
Save minikomi/8a591b3371489053f7f850c60be486ca to your computer and use it in GitHub Desktop.
#!/usr/bin/env -S uv run -q -s
# /// script
# requires-python = ">=3.11"
# dependencies = ["tiktoken>=0.12"]
# ///
import sys
import argparse
import tiktoken
from tiktoken.model import (
encoding_name_for_model,
) # uses the official model→encoding map
DEFAULT_ENCODING = "o200k_base"
def resolve_encoding(name: str):
if not name:
return tiktoken.get_encoding(DEFAULT_ENCODING)
try:
enc_name = encoding_name_for_model(name)
return tiktoken.get_encoding(enc_name)
except Exception:
pass
try:
return tiktoken.get_encoding(name)
except Exception:
return tiktoken.get_encoding(DEFAULT_ENCODING)
def main():
ap = argparse.ArgumentParser(
description="Count tokens from stdin for a given model/encoding."
)
ap.add_argument(
"-m",
"--model",
help="Model or encoding (e.g., gpt-4o, gpt-3.5-turbo, cl100k_base, o200k_base).",
)
ap.add_argument(
"--no-strip",
action="store_true",
help="Do not strip leading/trailing whitespace before counting.",
)
args = ap.parse_args()
text = sys.stdin.read()
if not args.no_strip:
text = text.strip()
enc = resolve_encoding(args.model)
print(len(enc.encode(text)))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment