Forked from theoknock/Transcribing live audio using Speech.swift
Created
March 19, 2026 16:31
-
-
Save mvandermeulen/6f9bb0da98931e97ba1beb2a5f5d458f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import SwiftUI | |
| import Speech | |
| import AVFoundation | |
| import Combine | |
| import Observation | |
| @Observable | |
| class SpeechRecognizer { | |
| var transcription: String = "" | |
| var isTranscribing: Bool = false | |
| private var speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US")) | |
| private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? | |
| private var recognitionTask: SFSpeechRecognitionTask? | |
| private var audioEngine = AVAudioEngine() | |
| func startTranscribing() { | |
| SFSpeechRecognizer.requestAuthorization { authStatus in | |
| switch authStatus { | |
| case .authorized: | |
| DispatchQueue.main.async { | |
| self.isTranscribing = true | |
| } | |
| self.startRecording() | |
| case .denied, | |
| .restricted, | |
| .notDetermined: | |
| print("Speech recognition not authorized") | |
| @unknown default: | |
| fatalError("Unknown authorization status") | |
| } | |
| } | |
| } | |
| private func startRecording() { | |
| recognitionTask?.cancel() | |
| recognitionTask = nil | |
| let audioSession = AVAudioSession.sharedInstance() | |
| do { | |
| try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers) | |
| try audioSession.setActive(true, options: .notifyOthersOnDeactivation) | |
| } catch { | |
| print("Failed to set up audio session") | |
| return | |
| } | |
| recognitionRequest = SFSpeechAudioBufferRecognitionRequest() | |
| guard let recognitionRequest = recognitionRequest else { | |
| print("Unable to create a recognition request") | |
| return | |
| } | |
| recognitionRequest.shouldReportPartialResults = true | |
| let inputNode = audioEngine.inputNode | |
| let recordingFormat = inputNode.outputFormat(forBus: 0) | |
| inputNode.removeTap(onBus: 0) | |
| inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in | |
| self.recognitionRequest?.append(buffer) | |
| } | |
| audioEngine.prepare() | |
| do { | |
| try audioEngine.start() | |
| } catch { | |
| print("Audio engine couldn't start") | |
| return | |
| } | |
| recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest) { result, error in | |
| if let result = result { | |
| DispatchQueue.main.async { | |
| self.transcription = result.bestTranscription.formattedString | |
| } | |
| } | |
| if error != nil || result?.isFinal == true { | |
| self.stopTranscribing() | |
| } | |
| } | |
| } | |
| func stopTranscribing() { | |
| audioEngine.stop() | |
| audioEngine.inputNode.removeTap(onBus: 0) | |
| recognitionRequest?.endAudio() | |
| recognitionTask?.cancel() | |
| DispatchQueue.main.async { | |
| self.isTranscribing = false | |
| } | |
| } | |
| } | |
| struct ContentView: View { | |
| @State private var speechRecognizer = SpeechRecognizer() | |
| var body: some View { | |
| VStack { | |
| Text(speechRecognizer.transcription) | |
| .padding() | |
| HStack { | |
| Button(action: { | |
| speechRecognizer.startTranscribing() | |
| }) { | |
| Text("Start Transcribing") | |
| .padding() | |
| .background(speechRecognizer.isTranscribing ? Color.gray : Color.blue) | |
| .foregroundColor(.white) | |
| .cornerRadius(10) | |
| } | |
| .disabled(speechRecognizer.isTranscribing) | |
| Button(action: { | |
| speechRecognizer.stopTranscribing() | |
| }) { | |
| Text("Stop Transcribing") | |
| .padding() | |
| .background(speechRecognizer.isTranscribing ? Color.red : Color.gray) | |
| .foregroundColor(.white) | |
| .cornerRadius(10) | |
| } | |
| .disabled(!speechRecognizer.isTranscribing) | |
| } | |
| } | |
| .padding() | |
| } | |
| } | |
| struct ContentView_Previews: PreviewProvider { | |
| static var previews: some View { | |
| ContentView() | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment