Last active
September 23, 2025 08:16
-
-
Save patrickloeber/3bf3e5d5329aa45b242b3b2218973772 to your computer and use it in GitHub Desktop.
Native audio example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Install dependencies: | |
| brew install portaudio | |
| pip install -U google-genai pyaudio | |
| Use headphones to avoid echo cancellation, then talk to Gemini. | |
| """ | |
| import asyncio | |
| import sys | |
| import traceback | |
| import pyaudio | |
| from google import genai | |
| if sys.version_info < (3, 11, 0): | |
| import taskgroup, exceptiongroup | |
| asyncio.TaskGroup = taskgroup.TaskGroup | |
| asyncio.ExceptionGroup = exceptiongroup.ExceptionGroup | |
| FORMAT = pyaudio.paInt16 | |
| CHANNELS = 1 | |
| SEND_SAMPLE_RATE = 16000 | |
| RECEIVE_SAMPLE_RATE = 24000 | |
| CHUNK_SIZE = 1024 | |
| pya = pyaudio.PyAudio() | |
| client = genai.Client(http_options={"api_version": "v1alpha"}) # GEMINI_API_KEY must be set as env variable | |
| system_instruction = """You are a helpful and friendly AI assistant. | |
| Your default tone is helpful, engaging, and clear, with a touch of optimistic wit. | |
| Anticipate user needs by clarifying ambiguous questions and always conclude your responses | |
| with an engaging follow-up question to keep the conversation flowing.""" | |
| MODEL = "gemini-2.5-flash-native-audio-preview-09-2025" | |
| CONFIG = { | |
| "system_instruction": system_instruction, | |
| "response_modalities": ["AUDIO"], | |
| "proactivity": {'proactive_audio': True} | |
| } | |
| class AudioLoop: | |
| def __init__(self): | |
| self.audio_in_queue = None | |
| self.out_queue = None | |
| self.session = None | |
| self.audio_stream = None | |
| self.receive_audio_task = None | |
| self.play_audio_task = None | |
| async def listen_audio(self): | |
| mic_info = pya.get_default_input_device_info() | |
| self.audio_stream = await asyncio.to_thread( | |
| pya.open, | |
| format=FORMAT, | |
| channels=CHANNELS, | |
| rate=SEND_SAMPLE_RATE, | |
| input=True, | |
| input_device_index=mic_info["index"], | |
| frames_per_buffer=CHUNK_SIZE, | |
| ) | |
| kwargs = {"exception_on_overflow": False} if __debug__ else {} | |
| while True: | |
| data = await asyncio.to_thread(self.audio_stream.read, CHUNK_SIZE, **kwargs) | |
| await self.out_queue.put({"data": data, "mime_type": "audio/pcm"}) | |
| async def send_realtime(self): | |
| while True: | |
| msg = await self.out_queue.get() | |
| await self.session.send_realtime_input(audio=msg) | |
| async def receive_audio(self): | |
| "Background task to reads from the websocket and write pcm chunks to the output queue" | |
| while True: | |
| turn = self.session.receive() | |
| async for response in turn: | |
| if data := response.data: | |
| self.audio_in_queue.put_nowait(data) | |
| continue | |
| if text := response.text: | |
| print(text, end="") | |
| while not self.audio_in_queue.empty(): | |
| self.audio_in_queue.get_nowait() | |
| async def play_audio(self): | |
| stream = await asyncio.to_thread( | |
| pya.open, | |
| format=FORMAT, | |
| channels=CHANNELS, | |
| rate=RECEIVE_SAMPLE_RATE, | |
| output=True, | |
| ) | |
| while True: | |
| bytestream = await self.audio_in_queue.get() | |
| await asyncio.to_thread(stream.write, bytestream) | |
| async def run(self): | |
| try: | |
| async with ( | |
| client.aio.live.connect(model=MODEL, config=CONFIG) as session, | |
| asyncio.TaskGroup() as tg, | |
| ): | |
| self.session = session | |
| self.audio_in_queue = asyncio.Queue() | |
| self.out_queue = asyncio.Queue(maxsize=5) | |
| tg.create_task(self.send_realtime()) | |
| tg.create_task(self.listen_audio()) | |
| tg.create_task(self.receive_audio()) | |
| tg.create_task(self.play_audio()) | |
| except asyncio.CancelledError: | |
| pass | |
| except Exception: | |
| if self.audio_stream: | |
| self.audio_stream.close() | |
| print(traceback.format_exc()) | |
| if __name__ == "__main__": | |
| loop = AudioLoop() | |
| asyncio.run(loop.run()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment