Created
October 29, 2025 10:18
-
-
Save amosgyamfi/955b1031ed26bcd4461ea3efb1d1a0cb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import asyncio | |
| import logging | |
| from uuid import uuid4 | |
| from dotenv import load_dotenv | |
| from vision_agents.core.edge.types import User | |
| from vision_agents.core.agents import Agent | |
| from vision_agents.plugins import fish, getstream, deepgram, smart_turn, xai | |
| load_dotenv() | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s [call_id=%(call_id)s] %(name)s: %(message)s") | |
| logger = logging.getLogger(__name__) | |
| async def start_agent() -> None: | |
| """ | |
| Example demonstrating Fish Audio TTS integration with Vision Agents. | |
| This example creates an agent that uses: | |
| - Fish Audio for text-to-speech (TTS) | |
| - Deepgram for speech-to-text (STT) | |
| - GetStream for edge/real-time communication | |
| - Smart Turn for turn detection | |
| - XAI Grok-4 for language model | |
| Requirements: | |
| - FISH_AUDIO_API_KEY environment variable | |
| - DEEPGRAM_API_KEY environment variable | |
| - STREAM_API_KEY and STREAM_API_SECRET environment variables | |
| - XAI_API_KEY environment variable | |
| """ | |
| agent = Agent( | |
| edge=getstream.Edge(), | |
| agent_user=User(name="Friendly AI", id="agent"), | |
| instructions="You're a voice AI assistant. Short replies only no special characters. Don't mention asterisk characters when speaking", | |
| tts=fish.TTS(), # Uses Fish Audio for text-to-speech | |
| stt=deepgram.STT(), | |
| llm=xai.LLM("grok-4"), | |
| turn_detection=smart_turn.TurnDetection(buffer_duration=2.0, confidence_threshold=0.5), | |
| ) | |
| await agent.create_user() | |
| call = agent.edge.client.video.call("default", str(uuid4())) | |
| await agent.edge.open_demo(call) | |
| with await agent.join(call): | |
| await asyncio.sleep(5) | |
| # The agent will greet the user using Fish Audio TTS | |
| await agent.llm.simple_response(text="Hello! I'm using Fish Audio for text-to-speech. How can I help you today?") | |
| await agent.finish() | |
| if __name__ == "__main__": | |
| asyncio.run(start_agent()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment