amosgyamfi · October 29, 2025 10:18
diff --git a/fish_tts_grok_example.py b/fish_tts_grok_example.py
 import asyncio
 import logging
 from uuid import uuid4

 from dotenv import load_dotenv

 from vision_agents.core.edge.types import User
 from vision_agents.core.agents import Agent
 from vision_agents.plugins import fish, getstream, deepgram, smart_turn, xai

 load_dotenv()

 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s [call_id=%(call_id)s] %(name)s: %(message)s")
 logger = logging.getLogger(__name__)


 async def start_agent() -> None:
    """
    Example demonstrating Fish Audio TTS integration with Vision Agents.
    
    This example creates an agent that uses:
    - Fish Audio for text-to-speech (TTS)
    - Deepgram for speech-to-text (STT)
    - GetStream for edge/real-time communication
    - Smart Turn for turn detection
    - XAI Grok-4 for language model
    
    Requirements:
    - FISH_AUDIO_API_KEY environment variable
    - DEEPGRAM_API_KEY environment variable
    - STREAM_API_KEY and STREAM_API_SECRET environment variables
    - XAI_API_KEY environment variable
    """
    agent = Agent(
        edge=getstream.Edge(),
        agent_user=User(name="Friendly AI", id="agent"),
        instructions="You're a voice AI assistant. Short replies only no special characters. Don't mention asterisk characters when speaking",
        tts=fish.TTS(),  # Uses Fish Audio for text-to-speech
        stt=deepgram.STT(),
        llm=xai.LLM("grok-4"),
        turn_detection=smart_turn.TurnDetection(buffer_duration=2.0, confidence_threshold=0.5),
    )
    await agent.create_user()

    call = agent.edge.client.video.call("default", str(uuid4()))
    await agent.edge.open_demo(call)

    with await agent.join(call):
        await asyncio.sleep(5)
        # The agent will greet the user using Fish Audio TTS
        await agent.llm.simple_response(text="Hello! I'm using Fish Audio for text-to-speech. How can I help you today?")
        await agent.finish()


 if __name__ == "__main__":
    asyncio.run(start_agent())
	import asyncio
	import logging
	from uuid import uuid4

	from dotenv import load_dotenv

	from vision_agents.core.edge.types import User
	from vision_agents.core.agents import Agent
	from vision_agents.plugins import fish, getstream, deepgram, smart_turn, xai

	load_dotenv()

	logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s [call_id=%(call_id)s] %(name)s: %(message)s")
	logger = logging.getLogger(__name__)


	async def start_agent() -> None:
	"""
	Example demonstrating Fish Audio TTS integration with Vision Agents.

	This example creates an agent that uses:
	- Fish Audio for text-to-speech (TTS)
	- Deepgram for speech-to-text (STT)
	- GetStream for edge/real-time communication
	- Smart Turn for turn detection
	- XAI Grok-4 for language model

	Requirements:
	- FISH_AUDIO_API_KEY environment variable
	- DEEPGRAM_API_KEY environment variable
	- STREAM_API_KEY and STREAM_API_SECRET environment variables
	- XAI_API_KEY environment variable
	"""
	agent = Agent(
	edge=getstream.Edge(),
	agent_user=User(name="Friendly AI", id="agent"),
	instructions="You're a voice AI assistant. Short replies only no special characters. Don't mention asterisk characters when speaking",
	tts=fish.TTS(), # Uses Fish Audio for text-to-speech
	stt=deepgram.STT(),
	llm=xai.LLM("grok-4"),
	turn_detection=smart_turn.TurnDetection(buffer_duration=2.0, confidence_threshold=0.5),
	)
	await agent.create_user()

	call = agent.edge.client.video.call("default", str(uuid4()))
	await agent.edge.open_demo(call)

	with await agent.join(call):
	await asyncio.sleep(5)
	# The agent will greet the user using Fish Audio TTS
	await agent.llm.simple_response(text="Hello! I'm using Fish Audio for text-to-speech. How can I help you today?")
	await agent.finish()


	if __name__ == "__main__":
	asyncio.run(start_agent())