mongonta0716 · May 1, 2025 09:35
diff --git a/gemini_arecord.py b/gemini_arecord.py
 import subprocess
 import speech_recognition as sr
 from gtts import gTTS
 import os
 import google.generativeai as genai

 # ==== 設定 ====
 GEMINI_API_KEY = "YOUR_API_KEY"  # ← あなたのAPIキーに変更
 genai.configure(api_key=GEMINI_API_KEY)
 model = genai.GenerativeModel(model_name="models/gemini-2.0-flash")

 # ==== 録音（arecord） ====
 def record_with_arecord(filename="input.wav", duration=10, device="plughw:0,0"):
    print("🎙 arecordで録音中...")
    subprocess.run(["arecord", "-D", device, "-f", "cd", "-t", "wav", "-d", str(duration), "-r", "16000", filename])

 # ==== 音声認識 ====
 def recognize_from_file(filename="input.wav"):
    r = sr.Recognizer()
    with sr.AudioFile(filename) as source:
        audio = r.record(source)
    try:
        return r.recognize_google(audio, language="ja-JP")
    except sr.UnknownValueError:
        return None
    except sr.RequestError as e:
        print(f"🔌 Google音声認識エラー: {e}")
        return None

 # ==== Geminiへの問い合わせ ====
 def ask_gemini(user_prompt, system_prompt):
    full_prompt = system_prompt + "\n" + user_prompt
    response = model.generate_content(full_prompt)
    return response.text

 # ==== 音声合成と再生 ====
 def speak_text(text):
    print("🗣 gTTSで音声合成...")
    tts = gTTS(text=text, lang="ja")
    tts.save("response.mp3")
    subprocess.run(["ffmpeg", "-y", "-i", "response.mp3", "-ar", "44100", "response.wav"],
                   stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    subprocess.run(["aplay", "response.wav"])
    os.remove("response.mp3")
    os.remove("response.wav")

 # ==== メインループ ====
 def main():
    device = "plughw:0,0"
    system_prompt = "あなたは可愛い手乗りロボットです。回答は語尾に「ござる」を付けて３０秒以内に収めてください。"
    print("✅ Ctrl+C で終了します\n")

    while True:
        record_with_arecord(device=device)
        user_text = recognize_from_file()
        if user_text:
            print(f"📝 認識: {user_text}")
            reply = ask_gemini(user_text, system_prompt)
            print(f"🤖 Gemini: {reply}")
            speak_text(reply)
        else:
            print("❌ 音声が認識できませんでした")

 if __name__ == "__main__":
    main()
diff --git a/インストール.txt b/インストール.txt
 # 他にもあるかもしれませんが、エラーを生成AIに聞くなどしてください。（教えていただけると助かりますm(_ _)m)

 apt install ffmpeg portaudio19-dev alsa-utils flac python3-pip 



 pip install pyaudio speechrecognition gtts playsound google-generativeai
	import subprocess
	import speech_recognition as sr
	from gtts import gTTS
	import os
	import google.generativeai as genai

	# ==== 設定 ====
	GEMINI_API_KEY = "YOUR_API_KEY" # ← あなたのAPIキーに変更
	genai.configure(api_key=GEMINI_API_KEY)
	model = genai.GenerativeModel(model_name="models/gemini-2.0-flash")

	# ==== 録音（arecord） ====
	def record_with_arecord(filename="input.wav", duration=10, device="plughw:0,0"):
	print("🎙 arecordで録音中...")
	subprocess.run(["arecord", "-D", device, "-f", "cd", "-t", "wav", "-d", str(duration), "-r", "16000", filename])

	# ==== 音声認識 ====
	def recognize_from_file(filename="input.wav"):
	r = sr.Recognizer()
	with sr.AudioFile(filename) as source:
	audio = r.record(source)
	try:
	return r.recognize_google(audio, language="ja-JP")
	except sr.UnknownValueError:
	return None
	except sr.RequestError as e:
	print(f"🔌 Google音声認識エラー: {e}")
	return None

	# ==== Geminiへの問い合わせ ====
	def ask_gemini(user_prompt, system_prompt):
	full_prompt = system_prompt + "\n" + user_prompt
	response = model.generate_content(full_prompt)
	return response.text

	# ==== 音声合成と再生 ====
	def speak_text(text):
	print("🗣 gTTSで音声合成...")
	tts = gTTS(text=text, lang="ja")
	tts.save("response.mp3")
	subprocess.run(["ffmpeg", "-y", "-i", "response.mp3", "-ar", "44100", "response.wav"],
	stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
	subprocess.run(["aplay", "response.wav"])
	os.remove("response.mp3")
	os.remove("response.wav")

	# ==== メインループ ====
	def main():
	device = "plughw:0,0"
	system_prompt = "あなたは可愛い手乗りロボットです。回答は語尾に「ござる」を付けて３０秒以内に収めてください。"
	print("✅ Ctrl+C で終了します\n")

	while True:
	record_with_arecord(device=device)
	user_text = recognize_from_file()
	if user_text:
	print(f"📝 認識: {user_text}")
	reply = ask_gemini(user_text, system_prompt)
	print(f"🤖 Gemini: {reply}")
	speak_text(reply)
	else:
	print("❌ 音声が認識できませんでした")

	if __name__ == "__main__":
	main()
	# 他にもあるかもしれませんが、エラーを生成AIに聞くなどしてください。（教えていただけると助かりますm(_ _)m)

	apt install ffmpeg portaudio19-dev alsa-utils flac python3-pip



	pip install pyaudio speechrecognition gtts playsound google-generativeai