Last active
May 1, 2025 09:35
-
-
Save mongonta0716/ac89d80d0b22aadb6aedee191ff851c6 to your computer and use it in GitHub Desktop.
M5Stack ModuleLLMでGeminiAPIを使うスクリプト
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import subprocess | |
import speech_recognition as sr | |
from gtts import gTTS | |
import os | |
import google.generativeai as genai | |
# ==== 設定 ==== | |
GEMINI_API_KEY = "YOUR_API_KEY" # ← あなたのAPIキーに変更 | |
genai.configure(api_key=GEMINI_API_KEY) | |
model = genai.GenerativeModel(model_name="models/gemini-2.0-flash") | |
# ==== 録音(arecord) ==== | |
def record_with_arecord(filename="input.wav", duration=10, device="plughw:0,0"): | |
print("🎙 arecordで録音中...") | |
subprocess.run(["arecord", "-D", device, "-f", "cd", "-t", "wav", "-d", str(duration), "-r", "16000", filename]) | |
# ==== 音声認識 ==== | |
def recognize_from_file(filename="input.wav"): | |
r = sr.Recognizer() | |
with sr.AudioFile(filename) as source: | |
audio = r.record(source) | |
try: | |
return r.recognize_google(audio, language="ja-JP") | |
except sr.UnknownValueError: | |
return None | |
except sr.RequestError as e: | |
print(f"🔌 Google音声認識エラー: {e}") | |
return None | |
# ==== Geminiへの問い合わせ ==== | |
def ask_gemini(user_prompt, system_prompt): | |
full_prompt = system_prompt + "\n" + user_prompt | |
response = model.generate_content(full_prompt) | |
return response.text | |
# ==== 音声合成と再生 ==== | |
def speak_text(text): | |
print("🗣 gTTSで音声合成...") | |
tts = gTTS(text=text, lang="ja") | |
tts.save("response.mp3") | |
subprocess.run(["ffmpeg", "-y", "-i", "response.mp3", "-ar", "44100", "response.wav"], | |
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | |
subprocess.run(["aplay", "response.wav"]) | |
os.remove("response.mp3") | |
os.remove("response.wav") | |
# ==== メインループ ==== | |
def main(): | |
device = "plughw:0,0" | |
system_prompt = "あなたは可愛い手乗りロボットです。回答は語尾に「ござる」を付けて30秒以内に収めてください。" | |
print("✅ Ctrl+C で終了します\n") | |
while True: | |
record_with_arecord(device=device) | |
user_text = recognize_from_file() | |
if user_text: | |
print(f"📝 認識: {user_text}") | |
reply = ask_gemini(user_text, system_prompt) | |
print(f"🤖 Gemini: {reply}") | |
speak_text(reply) | |
else: | |
print("❌ 音声が認識できませんでした") | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 他にもあるかもしれませんが、エラーを生成AIに聞くなどしてください。(教えていただけると助かりますm(_ _)m) | |
apt install ffmpeg portaudio19-dev alsa-utils flac python3-pip | |
pip install pyaudio speechrecognition gtts playsound google-generativeai |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment