Last active
February 4, 2025 00:09
-
-
Save andrewginns/1fa5f67bd670823813b802b90f26e295 to your computer and use it in GitHub Desktop.
Filter the latest OpenAPI (YAML) specification to extract only the parts relevant to specific API endpoints.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# /// script | |
# dependencies = [ | |
# "requests", | |
# "pyyaml", | |
# "argparse" | |
# ] | |
# /// | |
""" | |
This script downloads an OpenAPI specification from a given URL, | |
filters it to extract information related to specific API endpoints, | |
and saves the filtered specifications to individual YAML files. | |
Inline script for automatic dependency installation when running with uv (recommended). | |
It uses the following libraries: | |
- requests: To download the OpenAPI specification from a URL. | |
- pyyaml: To parse and dump YAML files. | |
- argparse: To handle command-line arguments for specifying endpoints and output directory. | |
The script takes the following command-line arguments: | |
--endpoints: Comma-separated list of endpoint paths to extract (e.g., '/chat/completions,/embeddings'). | |
Defaults to '/chat/completions' if not provided. | |
--output_dir: Directory where the filtered YAML specs will be saved. Defaults to 'output_specs'. | |
Example usage with uv (recommended): | |
uv run fetch_OAI_spec.py --endpoints "/chat/completions" | |
Example usage: | |
pip install requests pyyaml argparse | |
python script_name.py --endpoints "/chat/completions,/v1/models" --output_dir custom_specs | |
""" | |
import requests | |
import yaml | |
import argparse | |
import os | |
def extract_endpoint_spec( | |
openapi_url: str, | |
endpoint_path: str, | |
output_dir: str, | |
valid_endpoints: list[str] = None, | |
) -> None: | |
"""Downloads OpenAPI spec, filters for endpoint & schemas, saves to YAML file. | |
Enhanced error handling and provides list of valid endpoints if not found. | |
Args: | |
openapi_url: URL of the OpenAPI YAML file. | |
endpoint_path: The specific endpoint path to filter for (e.g., '/chat/completions'). | |
output_dir: Directory to save the filtered spec YAML file. | |
valid_endpoints: A list of valid endpoint paths, used for error reporting. | |
Defaults to None. | |
""" | |
try: | |
response = requests.get(openapi_url) | |
response.raise_for_status() | |
openapi_spec = yaml.safe_load(response.text) | |
# Normalize endpoint path: ensure it starts with '/' | |
endpoint_path = ( | |
endpoint_path if endpoint_path.startswith("/") else "/" + endpoint_path | |
) | |
if endpoint_path in openapi_spec.get("paths", {}): | |
endpoint_info = openapi_spec["paths"][endpoint_path] | |
components_schemas = openapi_spec.get("components", {}).get("schemas", {}) | |
relevant_schemas = set() | |
def find_schema_refs(data, path="root"): | |
if isinstance(data, dict): | |
if "$ref" in data: | |
schema_name = data["$ref"].split("/")[-1] | |
if schema_name not in relevant_schemas: | |
relevant_schemas.add(schema_name) | |
if schema_name in components_schemas: | |
find_schema_refs( | |
components_schemas[schema_name], | |
path=path + "->" + schema_name, | |
) | |
else: | |
for key, value in data.items(): | |
find_schema_refs(value, path=path + "->" + key) | |
elif isinstance(data, list): | |
for index, item in enumerate(data): | |
find_schema_refs(item, path=path + f"->[{index}]") | |
post_operation = endpoint_info.get("post") | |
if post_operation: | |
if "requestBody" in post_operation: | |
find_schema_refs(post_operation["requestBody"], path="requestBody") | |
if "responses" in post_operation: | |
find_schema_refs(post_operation["responses"], path="responses") | |
filtered_components_schemas = {} | |
for schema_name in relevant_schemas: | |
if schema_name in components_schemas: | |
filtered_components_schemas[schema_name] = components_schemas[ | |
schema_name | |
] | |
filtered_spec = { | |
"openapi": openapi_spec.get("openapi", "3.0.0"), | |
"info": openapi_spec.get("info"), | |
"servers": openapi_spec.get("servers"), | |
"paths": {endpoint_path: endpoint_info}, | |
"components": {"schemas": filtered_components_schemas}, | |
} | |
# Ensure output directory exists | |
os.makedirs(output_dir, exist_ok=True) | |
filename = os.path.join( | |
output_dir, endpoint_path.strip("/").replace("/", "_") + ".yaml" | |
) | |
with open(filename, "w") as f: | |
yaml.dump(filtered_spec, f, indent=2, sort_keys=False) | |
print(f"Specification for '{endpoint_path}' saved to {filename}") | |
else: | |
print(f"Endpoint path '{endpoint_path}' not found.") | |
if valid_endpoints: | |
print("Valid endpoint paths are:") | |
for valid_ep in sorted(valid_endpoints): # Sort for better readability | |
print(f"- {valid_ep}") | |
except requests.exceptions.RequestException as e: | |
print(f"Download error: {e}") | |
except yaml.YAMLError as e: | |
print(f"YAML parse error: {e}") | |
except Exception as e: | |
print(f"An unexpected error occurred: {e}") | |
if __name__ == "__main__": | |
openapi_url = ( | |
"https://raw.githubusercontent.com/openai/openai-openapi/master/openapi.yaml" | |
) | |
parser = argparse.ArgumentParser( | |
description="Extract OpenAPI spec for specific endpoints." | |
) | |
parser.add_argument( | |
"--endpoints", | |
type=str, | |
default="/chat/completions", | |
help="Comma-separated list of endpoint paths (e.g., '/chat/completions,/embeddings')", | |
) | |
parser.add_argument( | |
"--output_dir", | |
type=str, | |
default="output_specs", | |
help="Directory to save the filtered specs (default: output_specs)", | |
) | |
args = parser.parse_args() | |
target_endpoints = [ep.strip() for ep in args.endpoints.split(",")] | |
output_directory = args.output_dir | |
# Extract valid endpoint paths for error reporting | |
response = requests.get(openapi_url) | |
response.raise_for_status() | |
openapi_spec = yaml.safe_load(response.text) | |
valid_endpoints = list(openapi_spec.get("paths", {}).keys()) | |
for endpoint in target_endpoints: | |
print(f"Extracting specification for endpoint: {endpoint}") | |
extract_endpoint_spec(openapi_url, endpoint, output_directory, valid_endpoints) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
OpenAI's spec is ridiculously large. I wanted a way to reliably extract only the most relevant information from spec to provide as context to an LLM.
I think this approach works well to filter the most up to date spec down into what is needed for each endpoint.
e.g. as of 04/02/2024 the full spec is 905973 characters long, but the parts relevant to the
/chat/completions
is 82481 characters long.