Last active
March 24, 2024 21:16
-
-
Save aminnj/b521069ad7b98fc1cf8f34bb08aa442e to your computer and use it in GitHub Desktop.
TTS on Mac
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import objc | |
from Foundation import NSObject, NSRunLoop, NSDate | |
import AVFoundation | |
import Cocoa | |
# https://gitlab.com/gltd/say-cocoa/-/blob/master/test.py?ref_type=heads | |
def main1(): | |
class SpeechSynthesizerDelegate(NSObject): | |
def init(self): | |
self = objc.super(SpeechSynthesizerDelegate, self).init() | |
if self is None: return None | |
self.finished = False | |
return self | |
def speechSynthesizer_didFinishSpeechUtterance_(self, synthesizer, utterance): | |
self.finished = True | |
synth = AVFoundation.AVSpeechSynthesizer.alloc().init() | |
delegate = SpeechSynthesizerDelegate.alloc().init() | |
synth.setDelegate_(delegate) | |
utterance = AVFoundation.AVSpeechUtterance.speechUtteranceWithString_("The quick brown fox jumped over the lazy dog.") | |
# enable via Accessibility > Spoken Content > System voice | |
voice = AVFoundation.AVSpeechSynthesisVoice.voiceWithIdentifier_("com.apple.voice.premium.en-US.Zoe") | |
utterance.setVoice_(voice) | |
synth.speakUtterance_(utterance) | |
# Run the loop until the speech is finished | |
while not delegate.finished: | |
NSRunLoop.currentRunLoop().runMode_beforeDate_(Cocoa.NSDefaultRunLoopMode, NSDate.distantFuture()) | |
time.sleep(0.2) | |
# deprecated according to docs, but more friendly | |
# https://developer.apple.com/documentation/appkit/nsspeechsynthesizer | |
def main2(): | |
from AppKit import NSSpeechSynthesizer | |
voice = None | |
# enable via Accessibility > Spoken Content > System voice | |
voice = "com.apple.voice.premium.en-US.Zoe" | |
# leave as `None` to have a better fallback voice | |
# Apple doesn't allow picking a Siri voice for privacy reasons (an app could impersonate siri and phish user info) | |
# available_voices = NSSpeechSynthesizer.availableVoices() | |
# print("\n".join([v for v in available_voices if "premium" in v])) | |
text = "The quick brown fox jumped over the lazy dog." | |
synthesizer = NSSpeechSynthesizer.alloc().init() | |
if voice is not None: | |
synthesizer.setVoice_(voice) | |
synthesizer.startSpeakingString_(text) | |
while synthesizer.isSpeaking(): | |
time.sleep(0.2) | |
if __name__ == "__main__": | |
main1() | |
main2() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Personal voice
List the voices and you'll see one like
com.apple.speech.personalvoice.<UUID>
. In order to use it, you need to also give the terminal permission to use Personal voice.https://apple.stackexchange.com/questions/464984/how-do-i-access-macos-sonoma-personal-voice-from-command-line
Python-ified version of above is to just make a new script with
to trigger the permission popup.
For some reason, if I just do this in the script that does the TTS, then there will be a crash.