from json import dumps as json_stringify, loads as json_prase
from ws4py.client.threadedclient import WebSocketClient
from cryptography.fernet import Fernet
from xml.etree import ElementTree
from datetime import datetime
import ws4py
import time
import os
def get_file(path):
with open(path, 'r', encoding='utf-8') as f:
r = f.read()
return r
def decode_token(key, enc_token):
fernet = Fernet(key)
token = fernet.decrypt(enc_token).decode()
return token
# current date and time
now = str(datetime.timestamp(datetime.now()))
# מפתח ההצפנה
key = b'f5mFTzPJ7g-5ZyzmbM5wWVeJpIG5GtPkLxwizeKKCZ4='
# טוקן מוצפן
enc_token = \
b'gAAAAABglyvGOx1ZHrms20ckxjAgVqhC' \
+ b'eyLqOFXy3uQ0HyfVpkqfc3Yo18vgXY' \
+ b'Ids2jp2bYyVR2JheA9b3jAZmK2Pxhm' \
+ b'm7hfwGa_xfsgqjHQLq32LYIM4eRI4v' \
+ b'kaskhiOb2apeadLaOZ'
# פיענוח טוקן
token = decode_token(key, enc_token)
host = "wss://speech.platform.bing.com/"
path = "consumer/speech/synthesize/readaloud/edge/v1"
endpoint_url = host + path + "?" + "TrustedClientToken" + "=" + token
timestr = time.strftime("%Y%m%d-%H%M")
filename = 'sample-' + timestr + '.mp3'
file = []
class Microsofts_tts(WebSocketClient):
def __init__(self, url, text, voice_name='AvriNeural'):
self.text = text
self.voice_name = voice_name
super(Microsofts_tts, self).__init__(url)
def opened(self):
conf_txt = self.set_configs()
text = self.set_text(self.text)
self.send(conf_txt)
self.send(text)
def received_message(self, m):
r = self.make_response(m)
if r["headers"].get("Content-Type") == "audio/mpeg":
file.append(r["body"])
if r["headers"].get("Content-Type") == \
"application/json; charset=utf-8" and \
r["headers"]["Path"] == "turn.end":
with open(filename, "wb") as f:
for row in file:
f.write(row)
self.close()
os.startfile(os.getcwd() + '\\' + filename, 'open')
def make_response(self, m):
res = {
"headers": [],
"body": None
}
if type(m) == ws4py.messaging.TextMessage:
m_str = str(m)
splited_data = m_str.split("\r\n\r\n")
headers_arr = splited_data[0].split("\r\n")
headers = {}
for header_binary in headers_arr:
header_binary = header_binary.split(":")
headers[header_binary[0].strip()] = header_binary[1].strip()
body = json_prase(splited_data[1])
res["headers"] = headers
res["body"] = body
return res
if type(m) == ws4py.messaging.BinaryMessage:
data = m.data.replace(b"\x00\x80", b"")
splited_data = data.split(b"\r\n")
headers = {}
for header_binary in splited_data[:3]:
header = header_binary.decode("utf-8")
header = header.split(":")
headers[header[0].strip()] = header[1].strip()
res["headers"] = headers
if headers.get("Content-Type"):
res["body"] = (splited_data[4])
return res
def set_text(self, text):
# '(he-IL, Asaf)'
# engine = "(zh-CN, XiaoxiaoNeural)"
lang = 'he-IL'
engine = self.voice_name
engine = '(' + lang + ', ' + engine + ')'
xml_body = ElementTree.Element('speak', version='1.0')
xml_body.set('xmlns', 'https://www.w3.org/2001/10/synthesis')
xml_body.set('xml:lang', lang)
voice = ElementTree.SubElement(xml_body, 'voice')
voice.set(
'name', 'Microsoft Server Speech Text to Speech Voice '
+ engine)
prosody = ElementTree.SubElement(voice, 'prosody')
prosody.set('rate', '+0%')
prosody.set('pitch', '+0Hz')
prosody.set('volume', '+0%')
p = ElementTree.SubElement(prosody, 'p')
p.text = text
body = ElementTree.tostring(xml_body, 'unicode')
return "X-RequestId:fe83fbefb15c7739fe674d9f3e81d38f" + "\r\n" \
+ "Content-Type:application/ssml+xml" + "\r\n" \
+ "Path:ssml" + "\r\n" + "\r\n" + body
def set_configs(self, codec="audio-24khz-48kbitrate-mono-mp3"):
conf_obj = {
"context": {
"synthesis": {
"audio": {
"metadataoptions": {
"sentenceBoundaryEnabled": False,
"wordBoundaryEnabled": False
},
"outputFormat": codec
}
}
}
}
return "Content-Type:application/json; charset=utf-8" + "\r\n" \
+ "Path:speech.config" + "\r\n" + "\r\n" \
+ json_stringify(conf_obj)
if __name__ == '__main__':
text = "שלום ילדים, לילה טוב"
# text = get_file(os.getcwd() + '\\' + "test.txt")
engine = 'AvriNeural'
ws = Microsofts_tts(endpoint_url, text, engine)
ws.connect()
ws.run_forever()
Last active
May 17, 2022 11:35
-
-
Save MusiCode1/2db11a3e429e41e02bdf98febd239256 to your computer and use it in GitHub Desktop.
edge text to speech
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment