Created
November 6, 2025 15:18
-
-
Save amosgyamfi/ca7a24538d18b078cfbb232efe493027 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import asyncio | |
| import logging | |
| from uuid import uuid4 | |
| from dotenv import load_dotenv | |
| from vision_agents.core.edge.types import User | |
| from vision_agents.core.agents import Agent | |
| from vision_agents.plugins import getstream, deepgram, smart_turn, gemini, cartesia | |
| load_dotenv() | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s [call_id=%(call_id)s] %(name)s: %(message)s") | |
| logger = logging.getLogger(__name__) | |
| async def start_agent() -> None: | |
| """ | |
| Example demonstrating Cartesia Sonic 3 TTS integration with Vision Agents. | |
| This example creates an agent that uses: | |
| - Cartesia Sonic 3 for text-to-speech (TTS) | |
| - GetStream for edge/real-time communication | |
| - Gemini for language model | |
| Requirements: | |
| - CARTERSIA_API_KEY environment variable | |
| - STREAM_API_KEY and STREAM_API_SECRET environment variables | |
| - GEMINI_API_KEY environment variable | |
| - DEEPGRAM_API_KEY environment variable | |
| """ | |
| agent = Agent( | |
| edge=getstream.Edge(), | |
| agent_user=User(name="Friendly AI", id="agent"), | |
| instructions="You're a voice AI assistant. Short replies only, no special characters. Respond only in English using Sarah's voice.", | |
| # Uses Cartesia Sonic 3 for text-to-speech | |
| tts=cartesia.TTS(), | |
| stt=deepgram.STT(), | |
| llm=gemini.LLM("gemini-2.0-flash"), | |
| turn_detection=smart_turn.TurnDetection(), | |
| ) | |
| await agent.create_user() | |
| call = agent.edge.client.video.call("default", str(uuid4())) | |
| await agent.edge.open_demo(call) | |
| with await agent.join(call): | |
| await asyncio.sleep(5) | |
| await agent.llm.simple_response(text="Hello! I'm using Cartesia Sonic 3 for text-to-speech. How can I help you today?") | |
| await agent.finish() | |
| if __name__ == "__main__": | |
| asyncio.run(start_agent()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment