Skip to content

Instantly share code, notes, and snippets.

@patrickloeber
Last active September 23, 2025 08:16
Show Gist options
  • Select an option

  • Save patrickloeber/3bf3e5d5329aa45b242b3b2218973772 to your computer and use it in GitHub Desktop.

Select an option

Save patrickloeber/3bf3e5d5329aa45b242b3b2218973772 to your computer and use it in GitHub Desktop.
Native audio example
"""
Install dependencies:
brew install portaudio
pip install -U google-genai pyaudio
Use headphones to avoid echo cancellation, then talk to Gemini.
"""
import asyncio
import sys
import traceback
import pyaudio
from google import genai
if sys.version_info < (3, 11, 0):
import taskgroup, exceptiongroup
asyncio.TaskGroup = taskgroup.TaskGroup
asyncio.ExceptionGroup = exceptiongroup.ExceptionGroup
FORMAT = pyaudio.paInt16
CHANNELS = 1
SEND_SAMPLE_RATE = 16000
RECEIVE_SAMPLE_RATE = 24000
CHUNK_SIZE = 1024
pya = pyaudio.PyAudio()
client = genai.Client(http_options={"api_version": "v1alpha"}) # GEMINI_API_KEY must be set as env variable
system_instruction = """You are a helpful and friendly AI assistant.
Your default tone is helpful, engaging, and clear, with a touch of optimistic wit.
Anticipate user needs by clarifying ambiguous questions and always conclude your responses
with an engaging follow-up question to keep the conversation flowing."""
MODEL = "gemini-2.5-flash-native-audio-preview-09-2025"
CONFIG = {
"system_instruction": system_instruction,
"response_modalities": ["AUDIO"],
"proactivity": {'proactive_audio': True}
}
class AudioLoop:
def __init__(self):
self.audio_in_queue = None
self.out_queue = None
self.session = None
self.audio_stream = None
self.receive_audio_task = None
self.play_audio_task = None
async def listen_audio(self):
mic_info = pya.get_default_input_device_info()
self.audio_stream = await asyncio.to_thread(
pya.open,
format=FORMAT,
channels=CHANNELS,
rate=SEND_SAMPLE_RATE,
input=True,
input_device_index=mic_info["index"],
frames_per_buffer=CHUNK_SIZE,
)
kwargs = {"exception_on_overflow": False} if __debug__ else {}
while True:
data = await asyncio.to_thread(self.audio_stream.read, CHUNK_SIZE, **kwargs)
await self.out_queue.put({"data": data, "mime_type": "audio/pcm"})
async def send_realtime(self):
while True:
msg = await self.out_queue.get()
await self.session.send_realtime_input(audio=msg)
async def receive_audio(self):
"Background task to reads from the websocket and write pcm chunks to the output queue"
while True:
turn = self.session.receive()
async for response in turn:
if data := response.data:
self.audio_in_queue.put_nowait(data)
continue
if text := response.text:
print(text, end="")
while not self.audio_in_queue.empty():
self.audio_in_queue.get_nowait()
async def play_audio(self):
stream = await asyncio.to_thread(
pya.open,
format=FORMAT,
channels=CHANNELS,
rate=RECEIVE_SAMPLE_RATE,
output=True,
)
while True:
bytestream = await self.audio_in_queue.get()
await asyncio.to_thread(stream.write, bytestream)
async def run(self):
try:
async with (
client.aio.live.connect(model=MODEL, config=CONFIG) as session,
asyncio.TaskGroup() as tg,
):
self.session = session
self.audio_in_queue = asyncio.Queue()
self.out_queue = asyncio.Queue(maxsize=5)
tg.create_task(self.send_realtime())
tg.create_task(self.listen_audio())
tg.create_task(self.receive_audio())
tg.create_task(self.play_audio())
except asyncio.CancelledError:
pass
except Exception:
if self.audio_stream:
self.audio_stream.close()
print(traceback.format_exc())
if __name__ == "__main__":
loop = AudioLoop()
asyncio.run(loop.run())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment