Skip to content

Instantly share code, notes, and snippets.

@amosgyamfi
Created September 30, 2025 11:39
Show Gist options
  • Save amosgyamfi/0111174190c4c7015bd0e10f3a8f8db1 to your computer and use it in GitHub Desktop.
Save amosgyamfi/0111174190c4c7015bd0e10f3a8f8db1 to your computer and use it in GitHub Desktop.
from google import genai
from google.genai import types
from PIL import Image
# Initialize the GenAI client and specify the model
MODEL_ID = "gemini-robotics-er-1.5-preview"
PROMPT = """
Point to no more than 10 items in the image. The label returned
should be an identifying name for the object detected.
The answer should follow the json format: [{"point": <point>,
"label": <label1>}, ...]. The points are in [y, x] format
normalized to 0-1000.
"""
client = genai.Client()
# Load your image
img = Image.open("my-image.png")
img = img.resize((800, int(800 * img.size[1] / img.size[0])), Image.Resampling.LANCZOS) # Resizing to speed-up rendering
image_response = client.models.generate_content(
model=MODEL_ID,
contents=[
img,
PROMPT
],
config = types.GenerateContentConfig(
temperature=0.5,
thinking_config=types.ThinkingConfig(thinking_budget=0)
)
)
print(image_response.text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment