Created
September 30, 2025 11:39
-
-
Save amosgyamfi/0111174190c4c7015bd0e10f3a8f8db1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from google import genai | |
| from google.genai import types | |
| from PIL import Image | |
| # Initialize the GenAI client and specify the model | |
| MODEL_ID = "gemini-robotics-er-1.5-preview" | |
| PROMPT = """ | |
| Point to no more than 10 items in the image. The label returned | |
| should be an identifying name for the object detected. | |
| The answer should follow the json format: [{"point": <point>, | |
| "label": <label1>}, ...]. The points are in [y, x] format | |
| normalized to 0-1000. | |
| """ | |
| client = genai.Client() | |
| # Load your image | |
| img = Image.open("my-image.png") | |
| img = img.resize((800, int(800 * img.size[1] / img.size[0])), Image.Resampling.LANCZOS) # Resizing to speed-up rendering | |
| image_response = client.models.generate_content( | |
| model=MODEL_ID, | |
| contents=[ | |
| img, | |
| PROMPT | |
| ], | |
| config = types.GenerateContentConfig( | |
| temperature=0.5, | |
| thinking_config=types.ThinkingConfig(thinking_budget=0) | |
| ) | |
| ) | |
| print(image_response.text) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment