Created
April 18, 2026 04:11
-
-
Save pokutuna/839ceca4a98c9be0fbf9eea9b23aed53 to your computer and use it in GitHub Desktop.
cuda-img: find container images by CUDA env vars (PEP 723 + crane)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env -S uv run --script | |
| # /// script | |
| # requires-python = ">=3.12" | |
| # dependencies = [ | |
| # "click", | |
| # "httpx", | |
| # ] | |
| # /// | |
| """Find container images whose CUDA (or arbitrary env var) matches constraints. | |
| Backend: `crane` CLI for registry access (must be in PATH). | |
| Usage | |
| ----- | |
| cuda-img env IMAGE[:TAG] [--var VAR] | |
| cuda-img scan REPO [options] | |
| Examples | |
| -------- | |
| # Single image: CUDA_VERSION of a specific tag | |
| cuda-img env ghcr.io/ggml-org/llama.cpp:server-cuda-b5343 | |
| cuda-img env docker.io/vllm/vllm-openai:v0.13.0 --var NVIDIA_REQUIRE_CUDA | |
| # 10 newest vllm release tags whose base accepts driver 535 | |
| cuda-img scan docker.io/vllm/vllm-openai \\ | |
| --tags '^v[0-9]+\\.[0-9]+\\.[0-9]+$' \\ | |
| --min-driver 535 --limit 10 | |
| # Newest llama.cpp server-cuda tags with CUDA 12.x | |
| cuda-img scan ghcr.io/ggml-org/llama.cpp \\ | |
| --tags '^server-cuda-b[0-9]+$' --max-cuda 12 --limit 10 | |
| Output (scan): TSV tag <TAB> created <TAB> min-driver <TAB> VAR | |
| Caveats | |
| ------- | |
| Static env vars cannot prove that an image runs on a given driver. | |
| `NVIDIA_REQUIRE_CUDA` is the *base image*'s gate (permissive). | |
| `CUDA_VERSION` is the toolkit build version. | |
| Whether the application binary actually runs depends on which CUDA | |
| features it uses at runtime (minor-version compatibility). | |
| Always verify by booting the image on the target GPU. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import re | |
| import subprocess | |
| import sys | |
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| from datetime import datetime, timedelta, timezone | |
| import click | |
| CLI_HELP = """\ | |
| \b | |
| Find container images whose CUDA (or arbitrary env var) matches constraints. | |
| Backend: `crane` CLI for registry access (must be in PATH). | |
| \b | |
| Examples: | |
| cuda-img env ghcr.io/ggml-org/llama.cpp:server-cuda-b5343 | |
| cuda-img env docker.io/vllm/vllm-openai:v0.13.0 --var NVIDIA_REQUIRE_CUDA | |
| \b | |
| cuda-img scan docker.io/vllm/vllm-openai \\ | |
| --tags '^v[0-9]+\\.[0-9]+\\.[0-9]+$' --min-driver 535 --limit 10 | |
| \b | |
| cuda-img scan ghcr.io/ggml-org/llama.cpp \\ | |
| --tags '^server-cuda-b[0-9]+$' --max-cuda 12 --limit 10 | |
| scan output (TSV): tag <TAB> created <TAB> min-driver <TAB> VAR | |
| \b | |
| Caveats: | |
| Static env vars cannot prove an image runs on a given driver. | |
| NVIDIA_REQUIRE_CUDA is the *base image*'s gate (permissive). | |
| CUDA_VERSION is the toolkit build version. | |
| Whether the binary actually runs depends on which CUDA features it | |
| uses at runtime (minor-version compatibility). | |
| Always verify by booting the image on the target GPU. | |
| """ | |
| def run_crane(*args: str) -> str: | |
| result = subprocess.run( | |
| ["crane", *args], | |
| capture_output=True, | |
| text=True, | |
| ) | |
| if result.returncode != 0: | |
| raise RuntimeError(result.stderr.strip() or f"crane {' '.join(args)} failed") | |
| return result.stdout | |
| def fetch_config(image: str) -> dict: | |
| raw = run_crane("config", image) | |
| return json.loads(raw) | |
| def env_value(config: dict, var: str) -> str | None: | |
| for entry in config.get("config", {}).get("Env", []) or []: | |
| name, _, value = entry.partition("=") | |
| if name == var: | |
| return value | |
| return None | |
| def created_at(config: dict) -> datetime | None: | |
| ts = config.get("created") | |
| if not ts: | |
| return None | |
| return datetime.fromisoformat(ts.replace("Z", "+00:00")) | |
| DURATION_RE = re.compile(r"^(\d+)([dwmy])$") | |
| def parse_duration(value: str) -> timedelta: | |
| m = DURATION_RE.match(value) | |
| if not m: | |
| raise click.BadParameter(f"expected <N>[d|w|m|y], got {value!r}") | |
| n, unit = int(m.group(1)), m.group(2) | |
| return { | |
| "d": timedelta(days=n), | |
| "w": timedelta(weeks=n), | |
| "m": timedelta(days=n * 30), | |
| "y": timedelta(days=n * 365), | |
| }[unit] | |
| MAX_CUDA_RE = re.compile(r"^(\d+)(?:\.(\d+))?$") | |
| def parse_max_cuda(value: str) -> tuple[int, int | None]: | |
| m = MAX_CUDA_RE.match(value) | |
| if not m: | |
| raise click.BadParameter(f"expected X or X.Y, got {value!r}") | |
| major = int(m.group(1)) | |
| minor = int(m.group(2)) if m.group(2) is not None else None | |
| return major, minor | |
| CUDA_VERSION_RE = re.compile(r"^(\d+)\.(\d+)") | |
| DRIVER_GE_RE = re.compile(r"driver>=(\d+)") | |
| def min_driver_allowed(config: dict) -> int | None: | |
| """Smallest `driver>=N` value advertised by NVIDIA_REQUIRE_CUDA.""" | |
| require = env_value(config, "NVIDIA_REQUIRE_CUDA") | |
| if not require: | |
| return None | |
| values = [int(m) for m in DRIVER_GE_RE.findall(require)] | |
| return min(values) if values else None | |
| def cuda_within(value: str, limit: tuple[int, int | None]) -> bool: | |
| m = CUDA_VERSION_RE.match(value) | |
| if not m: | |
| return False | |
| major, minor = int(m.group(1)), int(m.group(2)) | |
| limit_major, limit_minor = limit | |
| if major != limit_major: | |
| return False | |
| if limit_minor is None: | |
| return True | |
| return minor <= limit_minor | |
| @click.group(help=CLI_HELP) | |
| def cli() -> None: | |
| pass | |
| @cli.command() | |
| @click.argument("image") | |
| @click.option("--var", "var_name", default="CUDA_VERSION", show_default=True) | |
| def env(image: str, var_name: str) -> None: | |
| """Print a single env var from IMAGE (e.g. repo:tag).""" | |
| config = fetch_config(image) | |
| value = env_value(config, var_name) | |
| if value is None: | |
| click.echo(f"{var_name} not set", err=True) | |
| sys.exit(1) | |
| click.echo(value) | |
| @cli.command() | |
| @click.argument("repo") | |
| @click.option("--var", "var_name", default="CUDA_VERSION", show_default=True) | |
| @click.option("--tags", "tag_regex", default=None, help="ERE filter on tag names") | |
| @click.option("--max-cuda", "max_cuda", default=None, help="X or X.Y: major=X (& minor<=Y)") | |
| @click.option("--min-driver", "min_driver", default=None, type=int, | |
| help="keep tags whose NVIDIA_REQUIRE_CUDA permits this driver (e.g. 535)") | |
| @click.option("--since", "since", default=None, help="created >= now - DUR (e.g. 90d, 1y)") | |
| @click.option("--until", "until", default=None, help="created <= now - DUR") | |
| @click.option("--parallel", "parallel", default=8, show_default=True, type=int) | |
| @click.option("--limit", "limit", default=None, type=int, help="show only N newest rows") | |
| @click.option("--oldest-first", is_flag=True, help="sort ascending (default: newest first)") | |
| def scan( | |
| repo: str, | |
| var_name: str, | |
| tag_regex: str | None, | |
| max_cuda: str | None, | |
| min_driver: int | None, | |
| since: str | None, | |
| until: str | None, | |
| parallel: int, | |
| limit: int | None, | |
| oldest_first: bool, | |
| ) -> None: | |
| """List REPO tags with env var + created date, filtered. | |
| Output TSV: tag<TAB>created<TAB>min-driver<TAB>value | |
| """ | |
| tags_raw = run_crane("ls", repo).splitlines() | |
| tags = [t.strip() for t in tags_raw if t.strip()] | |
| if tag_regex: | |
| pattern = re.compile(tag_regex) | |
| tags = [t for t in tags if pattern.search(t)] | |
| max_cuda_tuple = parse_max_cuda(max_cuda) if max_cuda else None | |
| now = datetime.now(timezone.utc) | |
| since_ts = now - parse_duration(since) if since else None | |
| until_ts = now - parse_duration(until) if until else None | |
| def work(tag: str): | |
| try: | |
| config = fetch_config(f"{repo}:{tag}") | |
| except RuntimeError: | |
| return None | |
| return tag, created_at(config), env_value(config, var_name), min_driver_allowed(config) | |
| rows = [] | |
| with ThreadPoolExecutor(max_workers=parallel) as pool: | |
| futures = [pool.submit(work, t) for t in tags] | |
| for fut in as_completed(futures): | |
| r = fut.result() | |
| if r is not None: | |
| rows.append(r) | |
| def keep(row) -> bool: | |
| _, created, value, drv = row | |
| if max_cuda_tuple is not None: | |
| if value is None or not cuda_within(value, max_cuda_tuple): | |
| return False | |
| if min_driver is not None: | |
| if drv is None or drv > min_driver: | |
| return False | |
| if since_ts is not None and (created is None or created < since_ts): | |
| return False | |
| if until_ts is not None and (created is None or created > until_ts): | |
| return False | |
| return True | |
| rows = [r for r in rows if keep(r)] | |
| rows.sort( | |
| key=lambda r: (r[1] or datetime.min.replace(tzinfo=timezone.utc), r[0]), | |
| reverse=not oldest_first, | |
| ) | |
| if limit is not None: | |
| rows = rows[:limit] | |
| for tag, created, value, drv in rows: | |
| created_s = created.strftime("%Y-%m-%d") if created else "-" | |
| drv_s = str(drv) if drv is not None else "-" | |
| click.echo(f"{tag}\t{created_s}\t{drv_s}\t{value or '-'}") | |
| if __name__ == "__main__": | |
| cli() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment