andrewginns · February 4, 2025 00:09 · andrewginns · Feb 4, 2025
diff --git a/fetch_OAI_spec.py b/fetch_OAI_spec.py
 # /// script
 # dependencies = [
 #   "requests",
 #   "pyyaml",
 #   "argparse"
 # ]
 # ///
 """
 This script downloads an OpenAPI specification from a given URL,
 filters it to extract information related to specific API endpoints,
 and saves the filtered specifications to individual YAML files.

 Inline script for automatic dependency installation when running with uv (recommended).

 It uses the following libraries:
    - requests: To download the OpenAPI specification from a URL.
    - pyyaml: To parse and dump YAML files.
    - argparse: To handle command-line arguments for specifying endpoints and output directory.

 The script takes the following command-line arguments:
    --endpoints: Comma-separated list of endpoint paths to extract (e.g., '/chat/completions,/embeddings').
                 Defaults to '/chat/completions' if not provided.
    --output_dir: Directory where the filtered YAML specs will be saved. Defaults to 'output_specs'.

 Example usage with uv (recommended):
    uv run fetch_OAI_spec.py --endpoints "/chat/completions"

 Example usage:
    pip install requests pyyaml argparse
    python script_name.py --endpoints "/chat/completions,/v1/models" --output_dir custom_specs
 """

 import requests
 import yaml
 import argparse
 import os


 def extract_endpoint_spec(
    openapi_url: str,
    endpoint_path: str,
    output_dir: str,
    valid_endpoints: list[str] = None,
 ) -> None:
    """Downloads OpenAPI spec, filters for endpoint & schemas, saves to YAML file.

    Enhanced error handling and provides list of valid endpoints if not found.

    Args:
        openapi_url: URL of the OpenAPI YAML file.
        endpoint_path: The specific endpoint path to filter for (e.g., '/chat/completions').
        output_dir: Directory to save the filtered spec YAML file.
        valid_endpoints: A list of valid endpoint paths, used for error reporting.
                         Defaults to None.
    """
    try:
        response = requests.get(openapi_url)
        response.raise_for_status()
        openapi_spec = yaml.safe_load(response.text)

        # Normalize endpoint path: ensure it starts with '/'
        endpoint_path = (
            endpoint_path if endpoint_path.startswith("/") else "/" + endpoint_path
        )

        if endpoint_path in openapi_spec.get("paths", {}):
            endpoint_info = openapi_spec["paths"][endpoint_path]
            components_schemas = openapi_spec.get("components", {}).get("schemas", {})
            relevant_schemas = set()

            def find_schema_refs(data, path="root"):
                if isinstance(data, dict):
                    if "$ref" in data:
                        schema_name = data["$ref"].split("/")[-1]
                        if schema_name not in relevant_schemas:
                            relevant_schemas.add(schema_name)
                            if schema_name in components_schemas:
                                find_schema_refs(
                                    components_schemas[schema_name],
                                    path=path + "->" + schema_name,
                                )
                    else:
                        for key, value in data.items():
                            find_schema_refs(value, path=path + "->" + key)
                elif isinstance(data, list):
                    for index, item in enumerate(data):
                        find_schema_refs(item, path=path + f"->[{index}]")

            post_operation = endpoint_info.get("post")

            if post_operation:
                if "requestBody" in post_operation:
                    find_schema_refs(post_operation["requestBody"], path="requestBody")

                if "responses" in post_operation:
                    find_schema_refs(post_operation["responses"], path="responses")

            filtered_components_schemas = {}
            for schema_name in relevant_schemas:
                if schema_name in components_schemas:
                    filtered_components_schemas[schema_name] = components_schemas[
                        schema_name
                    ]

            filtered_spec = {
                "openapi": openapi_spec.get("openapi", "3.0.0"),
                "info": openapi_spec.get("info"),
                "servers": openapi_spec.get("servers"),
                "paths": {endpoint_path: endpoint_info},
                "components": {"schemas": filtered_components_schemas},
            }

            # Ensure output directory exists
            os.makedirs(output_dir, exist_ok=True)
            filename = os.path.join(
                output_dir, endpoint_path.strip("/").replace("/", "_") + ".yaml"
            )

            with open(filename, "w") as f:
                yaml.dump(filtered_spec, f, indent=2, sort_keys=False)
            print(f"Specification for '{endpoint_path}' saved to {filename}")

        else:
            print(f"Endpoint path '{endpoint_path}' not found.")
            if valid_endpoints:
                print("Valid endpoint paths are:")
                for valid_ep in sorted(valid_endpoints):  # Sort for better readability
                    print(f"- {valid_ep}")

    except requests.exceptions.RequestException as e:
        print(f"Download error: {e}")
    except yaml.YAMLError as e:
        print(f"YAML parse error: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


 if __name__ == "__main__":
    openapi_url = (
        "https://raw.githubusercontent.com/openai/openai-openapi/master/openapi.yaml"
    )

    parser = argparse.ArgumentParser(
        description="Extract OpenAPI spec for specific endpoints."
    )
    parser.add_argument(
        "--endpoints",
        type=str,
        default="/chat/completions",
        help="Comma-separated list of endpoint paths (e.g., '/chat/completions,/embeddings')",
    )
    parser.add_argument(
        "--output_dir",
        type=str,
        default="output_specs",
        help="Directory to save the filtered specs (default: output_specs)",
    )

    args = parser.parse_args()
    target_endpoints = [ep.strip() for ep in args.endpoints.split(",")]
    output_directory = args.output_dir

    # Extract valid endpoint paths for error reporting
    response = requests.get(openapi_url)
    response.raise_for_status()
    openapi_spec = yaml.safe_load(response.text)
    valid_endpoints = list(openapi_spec.get("paths", {}).keys())

    for endpoint in target_endpoints:
        print(f"Extracting specification for endpoint: {endpoint}")
        extract_endpoint_spec(openapi_url, endpoint, output_directory, valid_endpoints)
	# /// script
	# dependencies = [
	# "requests",
	# "pyyaml",
	# "argparse"
	# ]
	# ///
	"""
	This script downloads an OpenAPI specification from a given URL,
	filters it to extract information related to specific API endpoints,
	and saves the filtered specifications to individual YAML files.

	Inline script for automatic dependency installation when running with uv (recommended).

	It uses the following libraries:
	- requests: To download the OpenAPI specification from a URL.
	- pyyaml: To parse and dump YAML files.
	- argparse: To handle command-line arguments for specifying endpoints and output directory.

	The script takes the following command-line arguments:
	--endpoints: Comma-separated list of endpoint paths to extract (e.g., '/chat/completions,/embeddings').
	Defaults to '/chat/completions' if not provided.
	--output_dir: Directory where the filtered YAML specs will be saved. Defaults to 'output_specs'.

	Example usage with uv (recommended):
	uv run fetch_OAI_spec.py --endpoints "/chat/completions"

	Example usage:
	pip install requests pyyaml argparse
	python script_name.py --endpoints "/chat/completions,/v1/models" --output_dir custom_specs
	"""

	import requests
	import yaml
	import argparse
	import os


	def extract_endpoint_spec(
	openapi_url: str,
	endpoint_path: str,
	output_dir: str,
	valid_endpoints: list[str] = None,
	) -> None:
	"""Downloads OpenAPI spec, filters for endpoint & schemas, saves to YAML file.

	Enhanced error handling and provides list of valid endpoints if not found.

	Args:
	openapi_url: URL of the OpenAPI YAML file.
	endpoint_path: The specific endpoint path to filter for (e.g., '/chat/completions').
	output_dir: Directory to save the filtered spec YAML file.
	valid_endpoints: A list of valid endpoint paths, used for error reporting.
	Defaults to None.
	"""
	try:
	response = requests.get(openapi_url)
	response.raise_for_status()
	openapi_spec = yaml.safe_load(response.text)

	# Normalize endpoint path: ensure it starts with '/'
	endpoint_path = (
	endpoint_path if endpoint_path.startswith("/") else "/" + endpoint_path
	)

	if endpoint_path in openapi_spec.get("paths", {}):
	endpoint_info = openapi_spec["paths"][endpoint_path]
	components_schemas = openapi_spec.get("components", {}).get("schemas", {})
	relevant_schemas = set()

	def find_schema_refs(data, path="root"):
	if isinstance(data, dict):
	if "$ref" in data:
	schema_name = data["$ref"].split("/")[-1]
	if schema_name not in relevant_schemas:
	relevant_schemas.add(schema_name)
	if schema_name in components_schemas:
	find_schema_refs(
	components_schemas[schema_name],
	path=path + "->" + schema_name,
	)
	else:
	for key, value in data.items():
	find_schema_refs(value, path=path + "->" + key)
	elif isinstance(data, list):
	for index, item in enumerate(data):
	find_schema_refs(item, path=path + f"->[{index}]")

	post_operation = endpoint_info.get("post")

	if post_operation:
	if "requestBody" in post_operation:
	find_schema_refs(post_operation["requestBody"], path="requestBody")

	if "responses" in post_operation:
	find_schema_refs(post_operation["responses"], path="responses")

	filtered_components_schemas = {}
	for schema_name in relevant_schemas:
	if schema_name in components_schemas:
	filtered_components_schemas[schema_name] = components_schemas[
	schema_name
	]

	filtered_spec = {
	"openapi": openapi_spec.get("openapi", "3.0.0"),
	"info": openapi_spec.get("info"),
	"servers": openapi_spec.get("servers"),
	"paths": {endpoint_path: endpoint_info},
	"components": {"schemas": filtered_components_schemas},
	}

	# Ensure output directory exists
	os.makedirs(output_dir, exist_ok=True)
	filename = os.path.join(
	output_dir, endpoint_path.strip("/").replace("/", "_") + ".yaml"
	)

	with open(filename, "w") as f:
	yaml.dump(filtered_spec, f, indent=2, sort_keys=False)
	print(f"Specification for '{endpoint_path}' saved to {filename}")

	else:
	print(f"Endpoint path '{endpoint_path}' not found.")
	if valid_endpoints:
	print("Valid endpoint paths are:")
	for valid_ep in sorted(valid_endpoints): # Sort for better readability
	print(f"- {valid_ep}")

	except requests.exceptions.RequestException as e:
	print(f"Download error: {e}")
	except yaml.YAMLError as e:
	print(f"YAML parse error: {e}")
	except Exception as e:
	print(f"An unexpected error occurred: {e}")


	if __name__ == "__main__":
	openapi_url = (
	"https://raw.githubusercontent.com/openai/openai-openapi/master/openapi.yaml"
	)

	parser = argparse.ArgumentParser(
	description="Extract OpenAPI spec for specific endpoints."
	)
	parser.add_argument(
	"--endpoints",
	type=str,
	default="/chat/completions",
	help="Comma-separated list of endpoint paths (e.g., '/chat/completions,/embeddings')",
	)
	parser.add_argument(
	"--output_dir",
	type=str,
	default="output_specs",
	help="Directory to save the filtered specs (default: output_specs)",
	)

	args = parser.parse_args()
	target_endpoints = [ep.strip() for ep in args.endpoints.split(",")]
	output_directory = args.output_dir

	# Extract valid endpoint paths for error reporting
	response = requests.get(openapi_url)
	response.raise_for_status()
	openapi_spec = yaml.safe_load(response.text)
	valid_endpoints = list(openapi_spec.get("paths", {}).keys())

	for endpoint in target_endpoints:
	print(f"Extracting specification for endpoint: {endpoint}")
	extract_endpoint_spec(openapi_url, endpoint, output_directory, valid_endpoints)