Skip to content

Instantly share code, notes, and snippets.

@aurotripathy
Last active June 2, 2025 17:16
Show Gist options
  • Save aurotripathy/d7b1a742067c43ca73233638a5699eea to your computer and use it in GitHub Desktop.
Save aurotripathy/d7b1a742067c43ca73233638a5699eea to your computer and use it in GitHub Desktop.
# from https://community.openai.com/t/easy-way-to-get-a-context-window-for-a-model/552099/4
# the hard way...
# Makes a gigantic meaningless OpenAI chat-completion promp call into the vLLM server
# Parses the (error) return and determines the actual context window supported
# Usage: python context-window-discovery.py --model gpt-4o-mini --base-url http://localhost:8080/v1
from openai import OpenAI
import re
import argparse
example_error_msg = ("This model's maximum context length is 128000 tokens. However, "
"your messages resulted in 262151 tokens. Please reduce the length of the messages.")
def get_context_error(err_msg):
"""Search for the integer value after 'maximum context length is"""
match = re.search(r'maximum input context length is (\d+)', err_msg)
if match:
max_context_length = int(match.group(1))
if 1000 <= max_context_length <= 200000:
return max_context_length
else:
raise ValueError("extracted context length beyond (1000-200000).")
else:
raise ValueError("No value found matching context length.")
def get_context_len(modelparam="gpt-3.5-turbo", base_url=None):
"""Probe for OpenAI chat completion model context length limit;
API request with input and max_tokens bigger than any model"""
if base_url is None:
base_url = "http://eval.furiosa.ai:32553/v1/"
api_key = "EMPTY"
client = OpenAI(api_key=api_key, base_url=base_url)
bigdata = "!@" * 2**17 # 256k
try:
response = client.chat.completions.create(
model=modelparam, max_tokens=265000, top_p=0.01,
messages=[{"role": "system", "content": bigdata}]
)
raise ValueError(f"Context len: $$NO ERROR!$$:\n"
f"{response.choices[0].message.content}")
except Exception as e:
print(f'Error: {e}')
err = e
if err.code == 400:
return get_context_error(err.body['message'])
else:
raise ValueError(err)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Discover context window size for a model')
parser.add_argument('--base-url', type=str,
default="http://eval.furiosa.ai:32553/v1/",
help='Base URL for the API endpoint')
parser.add_argument('--model', type=str,
default="EMPTY",
help='Model name to test')
args = parser.parse_args()
context_len = get_context_len(args.model, args.base_url)
print(f"Max context length: {context_len} tokens.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment