Skip to content

Instantly share code, notes, and snippets.

@7shi
Created November 12, 2024 15:49
Show Gist options
  • Save 7shi/10557dcd1a354b6213228dde641db0ab to your computer and use it in GitHub Desktop.
Save 7shi/10557dcd1a354b6213228dde641db0ab to your computer and use it in GitHub Desktop.
[py] Upload a wave file and transcribe it
import os
import google.generativeai as genai
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
generation_config = {
"temperature": 1,
"top_p": 0.95,
"top_k": 40,
"max_output_tokens": 8192,
"response_mime_type": "text/plain",
}
model = genai.GenerativeModel(
model_name="models/gemini-1.5-flash-exp-0827",
generation_config=generation_config,
)
wav = "src.wav"
prompt = """
Please transcribe this conversation, in the table format of timecode, speaker, caption. Use speaker A, speaker B, etc. to identify speakers.
|timecode|speaker|caption|
|---|---|---|
|00:00|A|text|
""".lstrip()
file = None
try:
file = genai.upload_file(wav, mime_type="audio/wav")
print(f"Uploaded file '{file.display_name}' as: {file.uri}")
chat_session = model.start_chat(history=[
{"role": "user", "parts": [file]}
])
response = chat_session.send_message(prompt)
fn = os.path.splitext(wav)[0]
i = 1
md = f"{fn}.md"
while os.path.exists(md):
i += 1
md = f"{fn}_{i}.md"
with open(md, "w") as f:
f.write(response.text)
print(f"Transcription saved to: {md}")
except Exception as e:
print(e)
finally:
if file:
genai.delete_file(file.name)
print(f"Deleted file '{file.display_name}' from: {file.uri}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment