Created
December 8, 2025 18:49
-
-
Save pokutuna/62816ca3e44b8bd99e5c9587e31ecff0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # | |
| # Check that current Google models on Vertex AI work with the global endpoint. | |
| # | |
| # Usage: | |
| # ./check_global_endpoint.sh PROJECT_ID | |
| # | |
| # Prerequisites: | |
| # - gcloud CLI installed and authenticated | |
| # - curl, jq | |
| # | |
| # Reference: | |
| # https://cloud.google.com/vertex-ai/generative-ai/docs/learn/model-versions | |
| set -euo pipefail | |
| PROJECT_ID="${1:-}" | |
| if [[ -z "$PROJECT_ID" ]]; then | |
| echo "Usage: $0 PROJECT_ID" | |
| exit 1 | |
| fi | |
| ENDPOINT="https://aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/global/publishers/google/models" | |
| TOKEN=$(gcloud auth print-access-token) | |
| # Current stable Google models on Vertex AI (as of 2025-12) | |
| # Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/model-versions | |
| # | |
| # Note: gemini-2.5-pro, gemini-2.5-flash, and gemini-3-pro-preview use "thinking" mode | |
| # which consumes tokens for internal reasoning. We need sufficient maxOutputTokens to | |
| # get a visible response. | |
| LLM_MODELS=( | |
| "gemini-2.5-pro" | |
| "gemini-2.5-flash" | |
| "gemini-2.5-flash-lite" | |
| "gemini-2.0-flash-001" | |
| "gemini-2.0-flash-lite-001" | |
| # Preview | |
| "gemini-3-pro-preview" | |
| ) | |
| EMBEDDING_MODELS=( | |
| "gemini-embedding-001" | |
| "text-embedding-005" | |
| "text-multilingual-embedding-002" | |
| ) | |
| echo "Project: ${PROJECT_ID}" | |
| echo "Location: global" | |
| echo "Endpoint: aiplatform.googleapis.com" | |
| echo "" | |
| FAILED=0 | |
| check_llm() { | |
| local model="$1" | |
| local url="${ENDPOINT}/${model}:generateContent" | |
| local response | |
| response=$(curl -s -X POST "$url" \ | |
| -H "Authorization: Bearer ${TOKEN}" \ | |
| -H "Content-Type: application/json" \ | |
| -d '{ | |
| "contents": [{"role": "user", "parts": [{"text": "Say ok"}]}], | |
| "generationConfig": {"maxOutputTokens": 200, "temperature": 0} | |
| }' 2>&1) | |
| if echo "$response" | jq -e '.candidates[0].content.parts[0].text' > /dev/null 2>&1; then | |
| local text | |
| text=$(echo "$response" | jq -r '.candidates[0].content.parts[0].text' | head -c 30) | |
| echo " ✓ ${model}: ${text}" | |
| else | |
| local error | |
| error=$(echo "$response" | jq -r '.error.message // .error // "unknown error"' 2>/dev/null | head -c 60) | |
| echo " ✗ ${model}: ${error}" | |
| FAILED=1 | |
| fi | |
| } | |
| check_embedding() { | |
| local model="$1" | |
| local url="${ENDPOINT}/${model}:predict" | |
| local response | |
| response=$(curl -s -X POST "$url" \ | |
| -H "Authorization: Bearer ${TOKEN}" \ | |
| -H "Content-Type: application/json" \ | |
| -d '{ | |
| "instances": [{"content": "Hello, world!"}] | |
| }' 2>&1) | |
| if echo "$response" | jq -e '.predictions[0].embeddings.values' > /dev/null 2>&1; then | |
| local dim | |
| dim=$(echo "$response" | jq '.predictions[0].embeddings.values | length') | |
| echo " ✓ ${model}: dim=${dim}" | |
| else | |
| local error | |
| error=$(echo "$response" | jq -r '.error.message // .error // "unknown error"' 2>/dev/null | head -c 60) | |
| echo " ✗ ${model}: ${error}" | |
| FAILED=1 | |
| fi | |
| } | |
| echo "=== LLM Models ===" | |
| for model in "${LLM_MODELS[@]}"; do | |
| check_llm "$model" | |
| done | |
| echo "" | |
| echo "=== Embedding Models ===" | |
| for model in "${EMBEDDING_MODELS[@]}"; do | |
| check_embedding "$model" | |
| done | |
| echo "" | |
| if [[ $FAILED -eq 0 ]]; then | |
| echo "All models passed!" | |
| exit 0 | |
| else | |
| echo "Some models failed." | |
| exit 1 | |
| fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment