Created
August 30, 2016 09:42
-
-
Save RichLogan/60c7bbfda833387dadea5b6a80870ae6 to your computer and use it in GitHub Desktop.
HoloLens Spark -> Speech
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#if UNITY_WSA | |
using Cisco.Spark; | |
using HoloToolkit.Unity; | |
using System.Collections; | |
using System.Text; | |
using UnityEngine; | |
using UnityEngine.Windows.Speech; | |
public class SparkSpeechRecognition : MonoBehaviour | |
{ | |
[Tooltip("The parsed speech")] | |
public string Message; | |
private DictationRecognizer dictationRecognizer; | |
private StringBuilder textSoFar; | |
// Using an empty string specifies the default microphone. | |
private static string deviceName = string.Empty; | |
private int samplingRate; | |
private const int messageLength = 10; | |
private string lastMessage = ""; | |
void Awake() | |
{ | |
dictationRecognizer = new DictationRecognizer(); | |
// Register for dictationRecognizer.DictationHypothesis and implement DictationHypothesis below | |
// This event is fired while the user is talking. As the recognizer listens, it provides text of what it's heard so far. | |
dictationRecognizer.DictationHypothesis += DictationRecognizer_DictationHypothesis; | |
// Register for dictationRecognizer.DictationResult and implement DictationResult below | |
// This event is fired after the user pauses, typically at the end of a sentence. The full recognized string is returned here. | |
dictationRecognizer.DictationResult += DictationRecognizer_DictationResult; | |
// Register for dictationRecognizer.DictationComplete and implement DictationComplete below | |
// This event is fired when the recognizer stops, whether from Stop() being called, a timeout occurring, or some other error. | |
dictationRecognizer.DictationComplete += DictationRecognizer_DictationComplete; | |
// Register for dictationRecognizer.DictationError and implement DictationError below | |
// This event is fired when an error occurs. | |
dictationRecognizer.DictationError += DictationRecognizer_DictationError; | |
// Query the maximum frequency of the default microphone. Use 'unused' to ignore the minimum frequency. | |
int unused; | |
Microphone.GetDeviceCaps(deviceName, out unused, out samplingRate); | |
// Use this string to cache the text currently displayed in the text box. | |
textSoFar = new StringBuilder(); | |
} | |
void Update() | |
{ | |
// Add condition to check if dictationRecognizer.Status is Running | |
if (!Microphone.IsRecording(deviceName) && dictationRecognizer.Status == SpeechSystemStatus.Running) | |
{ | |
// StopRecording(); | |
} | |
} | |
/// <summary> | |
/// Turns on the dictation recognizer and begins recording audio from the default microphone. | |
/// </summary> | |
/// <returns>The audio clip recorded from the microphone.</returns> | |
public AudioClip StartRecording() | |
{ | |
// Stop keyword recognition and start speech recognition | |
PhraseRecognitionSystem.Shutdown(); | |
dictationRecognizer.Start(); | |
// Record (10 seconds) | |
return Microphone.Start(deviceName, false, messageLength, samplingRate); | |
} | |
/// <summary> | |
/// Ends the recording session. | |
/// </summary> | |
public void StopRecording() | |
{ | |
// Check if dictationRecognizer.Status is Running and stop it if so | |
if (dictationRecognizer.Status == SpeechSystemStatus.Running) | |
{ | |
dictationRecognizer.Stop(); | |
} | |
Microphone.End(deviceName); | |
} | |
/// <summary> | |
/// This event is fired while the user is talking. As the recognizer listens, it provides text of what it's heard so far. | |
/// </summary> | |
/// <param name="text">The currently hypothesized recognition.</param> | |
private void DictationRecognizer_DictationHypothesis(string text) | |
{ | |
// We don't want to append to textSoFar yet, because the hypothesis may have changed on the next event | |
Message = textSoFar.ToString() + " " + text; | |
} | |
/// <summary> | |
/// This event is fired after the user pauses, typically at the end of a sentence. The full recognized string is returned here. | |
/// </summary> | |
/// <param name="text">The text that was heard by the recognizer.</param> | |
/// <param name="confidence">A representation of how confident (rejected, low, medium, high) the recognizer is of this recognition.</param> | |
private void DictationRecognizer_DictationResult(string text, ConfidenceLevel confidence) | |
{ | |
textSoFar.Append(text); | |
// Remove any previous message | |
Debug.Log("Old message is: " + lastMessage); | |
if (lastMessage != "" && lastMessage != null) | |
{ | |
textSoFar = textSoFar.Replace(lastMessage.ToLower(), ""); | |
Debug.Log("Removing old gives:" + textSoFar); | |
} | |
lastMessage = textSoFar.ToString(); | |
// Capatalize first character for politeness :) | |
// textSoFar[0] = char.ToUpper(textSoFar[0]); | |
// Save final message | |
Message = textSoFar.ToString(); | |
// Now we should send to Spark | |
Message speechRecognizedMessage = new Message(); | |
// Hard Code Test Room | |
speechRecognizedMessage.RoomId = "Y2lzY29zcGFyazovL3VzL1JPT00vMjgyOTIyYTAtNTNmYi0xMWU2LThmZmMtYWJmMDM5MDkyZDM5"; | |
// Note that the message was recognised via Speech Recognition (and show off some Markdown!) | |
speechRecognizedMessage.Markdown = Message + "\n\n *(Via speech (Confidence: " + confidence.ToString() + "))*"; | |
// Send to Spark | |
Debug.Log("Sending to Spark"); | |
StartCoroutine(speechRecognizedMessage.Commit(message => { })); | |
// Stop Recording | |
StopRecording(); | |
// Restart Keyword Listener | |
StartCoroutine(RestartSpeechSystem()); | |
} | |
/// <summary> | |
/// This event is fired when the recognizer stops, whether from Stop() being called, a timeout occurring, or some other error. | |
/// Typically, this will simply return "Complete". In this case, we check to see if the recognizer timed out. | |
/// </summary> | |
/// <param name="cause">An enumerated reason for the session completing.</param> | |
private void DictationRecognizer_DictationComplete(DictationCompletionCause cause) | |
{ | |
// If Timeout occurs, the user has been silent for too long. | |
// With dictation, the default timeout after a recognition is 20 seconds. | |
// The default timeout with initial silence is 5 seconds. | |
if (cause == DictationCompletionCause.TimeoutExceeded) | |
{ | |
Microphone.End(deviceName); | |
// DictationDisplay = "Dictation has timed out. Please press the record button again."; | |
SendMessage("ResetAfterTimeout"); | |
} | |
} | |
/// <summary> | |
/// This event is fired when an error occurs. | |
/// </summary> | |
/// <param name="error">The string representation of the error reason.</param> | |
/// <param name="hresult">The int representation of the hresult.</param> | |
private void DictationRecognizer_DictationError(string error, int hresult) | |
{ | |
Debug.LogError(error + "\nHRESULT: " + hresult); | |
} | |
private IEnumerator RestartSpeechSystem(KeywordManager keywordToStart) | |
{ | |
while (dictationRecognizer != null && dictationRecognizer.Status == SpeechSystemStatus.Running) | |
{ | |
yield return null; | |
} | |
keywordToStart.StartKeywordRecognizer(); | |
} | |
private IEnumerator RestartSpeechSystem() | |
{ | |
while (dictationRecognizer != null && dictationRecognizer.Status == SpeechSystemStatus.Running) | |
{ | |
Debug.Log("Still running..."); | |
yield return null; | |
} | |
Debug.Log("Ready to restart keywords"); | |
// Debug.Log(FindObjectOfType<SpeechManager>().keywordRecognizer.IsRunning); | |
FindObjectOfType<SpeechManager>().keywordRecognizer.Start(); | |
Debug.Log("Successfully restarted keywords"); | |
} | |
} | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment