Skip to content

Instantly share code, notes, and snippets.

@andrewginns
Last active February 4, 2025 00:09
Show Gist options
  • Save andrewginns/1fa5f67bd670823813b802b90f26e295 to your computer and use it in GitHub Desktop.
Save andrewginns/1fa5f67bd670823813b802b90f26e295 to your computer and use it in GitHub Desktop.
Filter the latest OpenAPI (YAML) specification to extract only the parts relevant to specific API endpoints.
# /// script
# dependencies = [
# "requests",
# "pyyaml",
# "argparse"
# ]
# ///
"""
This script downloads an OpenAPI specification from a given URL,
filters it to extract information related to specific API endpoints,
and saves the filtered specifications to individual YAML files.
Inline script for automatic dependency installation when running with uv (recommended).
It uses the following libraries:
- requests: To download the OpenAPI specification from a URL.
- pyyaml: To parse and dump YAML files.
- argparse: To handle command-line arguments for specifying endpoints and output directory.
The script takes the following command-line arguments:
--endpoints: Comma-separated list of endpoint paths to extract (e.g., '/chat/completions,/embeddings').
Defaults to '/chat/completions' if not provided.
--output_dir: Directory where the filtered YAML specs will be saved. Defaults to 'output_specs'.
Example usage with uv (recommended):
uv run fetch_OAI_spec.py --endpoints "/chat/completions"
Example usage:
pip install requests pyyaml argparse
python script_name.py --endpoints "/chat/completions,/v1/models" --output_dir custom_specs
"""
import requests
import yaml
import argparse
import os
def extract_endpoint_spec(
openapi_url: str,
endpoint_path: str,
output_dir: str,
valid_endpoints: list[str] = None,
) -> None:
"""Downloads OpenAPI spec, filters for endpoint & schemas, saves to YAML file.
Enhanced error handling and provides list of valid endpoints if not found.
Args:
openapi_url: URL of the OpenAPI YAML file.
endpoint_path: The specific endpoint path to filter for (e.g., '/chat/completions').
output_dir: Directory to save the filtered spec YAML file.
valid_endpoints: A list of valid endpoint paths, used for error reporting.
Defaults to None.
"""
try:
response = requests.get(openapi_url)
response.raise_for_status()
openapi_spec = yaml.safe_load(response.text)
# Normalize endpoint path: ensure it starts with '/'
endpoint_path = (
endpoint_path if endpoint_path.startswith("/") else "/" + endpoint_path
)
if endpoint_path in openapi_spec.get("paths", {}):
endpoint_info = openapi_spec["paths"][endpoint_path]
components_schemas = openapi_spec.get("components", {}).get("schemas", {})
relevant_schemas = set()
def find_schema_refs(data, path="root"):
if isinstance(data, dict):
if "$ref" in data:
schema_name = data["$ref"].split("/")[-1]
if schema_name not in relevant_schemas:
relevant_schemas.add(schema_name)
if schema_name in components_schemas:
find_schema_refs(
components_schemas[schema_name],
path=path + "->" + schema_name,
)
else:
for key, value in data.items():
find_schema_refs(value, path=path + "->" + key)
elif isinstance(data, list):
for index, item in enumerate(data):
find_schema_refs(item, path=path + f"->[{index}]")
post_operation = endpoint_info.get("post")
if post_operation:
if "requestBody" in post_operation:
find_schema_refs(post_operation["requestBody"], path="requestBody")
if "responses" in post_operation:
find_schema_refs(post_operation["responses"], path="responses")
filtered_components_schemas = {}
for schema_name in relevant_schemas:
if schema_name in components_schemas:
filtered_components_schemas[schema_name] = components_schemas[
schema_name
]
filtered_spec = {
"openapi": openapi_spec.get("openapi", "3.0.0"),
"info": openapi_spec.get("info"),
"servers": openapi_spec.get("servers"),
"paths": {endpoint_path: endpoint_info},
"components": {"schemas": filtered_components_schemas},
}
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
filename = os.path.join(
output_dir, endpoint_path.strip("/").replace("/", "_") + ".yaml"
)
with open(filename, "w") as f:
yaml.dump(filtered_spec, f, indent=2, sort_keys=False)
print(f"Specification for '{endpoint_path}' saved to {filename}")
else:
print(f"Endpoint path '{endpoint_path}' not found.")
if valid_endpoints:
print("Valid endpoint paths are:")
for valid_ep in sorted(valid_endpoints): # Sort for better readability
print(f"- {valid_ep}")
except requests.exceptions.RequestException as e:
print(f"Download error: {e}")
except yaml.YAMLError as e:
print(f"YAML parse error: {e}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
if __name__ == "__main__":
openapi_url = (
"https://raw.githubusercontent.com/openai/openai-openapi/master/openapi.yaml"
)
parser = argparse.ArgumentParser(
description="Extract OpenAPI spec for specific endpoints."
)
parser.add_argument(
"--endpoints",
type=str,
default="/chat/completions",
help="Comma-separated list of endpoint paths (e.g., '/chat/completions,/embeddings')",
)
parser.add_argument(
"--output_dir",
type=str,
default="output_specs",
help="Directory to save the filtered specs (default: output_specs)",
)
args = parser.parse_args()
target_endpoints = [ep.strip() for ep in args.endpoints.split(",")]
output_directory = args.output_dir
# Extract valid endpoint paths for error reporting
response = requests.get(openapi_url)
response.raise_for_status()
openapi_spec = yaml.safe_load(response.text)
valid_endpoints = list(openapi_spec.get("paths", {}).keys())
for endpoint in target_endpoints:
print(f"Extracting specification for endpoint: {endpoint}")
extract_endpoint_spec(openapi_url, endpoint, output_directory, valid_endpoints)
@andrewginns
Copy link
Author

andrewginns commented Feb 4, 2025

OpenAI's spec is ridiculously large. I wanted a way to reliably extract only the most relevant information from spec to provide as context to an LLM.

I think this approach works well to filter the most up to date spec down into what is needed for each endpoint.

e.g. as of 04/02/2024 the full spec is 905973 characters long, but the parts relevant to the /chat/completions is 82481 characters long.

  • This is a >10x improvement to character (and therefore token) reduction

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment