amosgyamfi · October 17, 2025 06:33
diff --git a/vision_agents_quick_start.py b/vision_agents_quick_start.py
 import asyncio
 import logging
 from uuid import uuid4

 from dotenv import load_dotenv

 from vision_agents.core.edge.types import User
 from vision_agents.plugins import getstream, gemini 
 from vision_agents.core import agents, cli

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)

 async def start_agent() -> None:

    llm = gemini.Realtime()
    # create an agent to run with Stream's edge, Gemini llm
    agent = agents.Agent(
        edge=getstream.Edge(),  # low latency edge. clients for React, iOS, Android, RN, Flutter etc.
        agent_user=User(name="My happy AI friend", id="agent"),  # the user object for the agent (name, image etc)
        instructions="You're a voice AI assistant. Keep responses short and conversational. Don't use special characters or formatting. Be friendly and helpful.",
        processors=[],  # processors can fetch extra data, check images/audio data or transform video
        # llm with tts & stt. if you use a realtime (sts capable) llm the tts, stt and vad aren't needed
        llm=llm,
    )

    await agent.create_user()

    # Create a call
    call = agent.edge.client.video.call("default", str(uuid4()))

    # Open the demo UI
    await agent.edge.open_demo(call)

    # Have the agent join the call/room
    with await agent.join(call):
        # Example 1: standardized simple response
        await agent.llm.simple_response("chat with the user about the weather.")
        # run till the call ends
        await agent.finish()


 if __name__ == "__main__":
    asyncio.run(cli.start_dispatcher(start_agent))
	import asyncio
	import logging
	from uuid import uuid4

	from dotenv import load_dotenv

	from vision_agents.core.edge.types import User
	from vision_agents.plugins import getstream, gemini
	from vision_agents.core import agents, cli

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	async def start_agent() -> None:

	llm = gemini.Realtime()
	# create an agent to run with Stream's edge, Gemini llm
	agent = agents.Agent(
	edge=getstream.Edge(), # low latency edge. clients for React, iOS, Android, RN, Flutter etc.
	agent_user=User(name="My happy AI friend", id="agent"), # the user object for the agent (name, image etc)
	instructions="You're a voice AI assistant. Keep responses short and conversational. Don't use special characters or formatting. Be friendly and helpful.",
	processors=[], # processors can fetch extra data, check images/audio data or transform video
	# llm with tts & stt. if you use a realtime (sts capable) llm the tts, stt and vad aren't needed
	llm=llm,
	)

	await agent.create_user()

	# Create a call
	call = agent.edge.client.video.call("default", str(uuid4()))

	# Open the demo UI
	await agent.edge.open_demo(call)

	# Have the agent join the call/room
	with await agent.join(call):
	# Example 1: standardized simple response
	await agent.llm.simple_response("chat with the user about the weather.")
	# run till the call ends
	await agent.finish()


	if __name__ == "__main__":
	asyncio.run(cli.start_dispatcher(start_agent))