Last active
June 2, 2025 17:16
-
-
Save aurotripathy/d7b1a742067c43ca73233638a5699eea to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# from https://community.openai.com/t/easy-way-to-get-a-context-window-for-a-model/552099/4 | |
# the hard way... | |
# Makes a gigantic meaningless OpenAI chat-completion promp call into the vLLM server | |
# Parses the (error) return and determines the actual context window supported | |
# Usage: python context-window-discovery.py --model gpt-4o-mini --base-url http://localhost:8080/v1 | |
from openai import OpenAI | |
import re | |
import argparse | |
example_error_msg = ("This model's maximum context length is 128000 tokens. However, " | |
"your messages resulted in 262151 tokens. Please reduce the length of the messages.") | |
def get_context_error(err_msg): | |
"""Search for the integer value after 'maximum context length is""" | |
match = re.search(r'maximum input context length is (\d+)', err_msg) | |
if match: | |
max_context_length = int(match.group(1)) | |
if 1000 <= max_context_length <= 200000: | |
return max_context_length | |
else: | |
raise ValueError("extracted context length beyond (1000-200000).") | |
else: | |
raise ValueError("No value found matching context length.") | |
def get_context_len(modelparam="gpt-3.5-turbo", base_url=None): | |
"""Probe for OpenAI chat completion model context length limit; | |
API request with input and max_tokens bigger than any model""" | |
if base_url is None: | |
base_url = "http://eval.furiosa.ai:32553/v1/" | |
api_key = "EMPTY" | |
client = OpenAI(api_key=api_key, base_url=base_url) | |
bigdata = "!@" * 2**17 # 256k | |
try: | |
response = client.chat.completions.create( | |
model=modelparam, max_tokens=265000, top_p=0.01, | |
messages=[{"role": "system", "content": bigdata}] | |
) | |
raise ValueError(f"Context len: $$NO ERROR!$$:\n" | |
f"{response.choices[0].message.content}") | |
except Exception as e: | |
print(f'Error: {e}') | |
err = e | |
if err.code == 400: | |
return get_context_error(err.body['message']) | |
else: | |
raise ValueError(err) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description='Discover context window size for a model') | |
parser.add_argument('--base-url', type=str, | |
default="http://eval.furiosa.ai:32553/v1/", | |
help='Base URL for the API endpoint') | |
parser.add_argument('--model', type=str, | |
default="EMPTY", | |
help='Model name to test') | |
args = parser.parse_args() | |
context_len = get_context_len(args.model, args.base_url) | |
print(f"Max context length: {context_len} tokens.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment