io.spokestack.spokestack.profile.TFWakewordGoogleASR Maven / Gradle / Ivy
package io.spokestack.spokestack.profile;
import io.spokestack.spokestack.PipelineProfile;
import io.spokestack.spokestack.SpeechPipeline;
import java.util.ArrayList;
import java.util.List;
/**
* A speech pipeline profile that uses TensorFlow Lite for wakeword detection
* and Google Speech for ASR. Properties related to signal processing are tuned
* for the "Spokestack" wakeword.
*
*
* Wakeword detection requires configuration to locate the models used for
* classification; these properties must be set separately from this profile:
*
*
*
* -
* wake-filter-path (string, required): file system path to the
* "filter" Tensorflow-Lite model, which is used to calculate a mel
* spectrogram frame from the linear STFT; its inputs should be shaped
* [fft-width], and its outputs [mel-width]
*
* -
* wake-encode-path (string, required): file system path to the
* "encode" Tensorflow-Lite model, which is used to perform each
* autoregressive step over the mel frames; its inputs should be shaped
* [mel-length, mel-width], and its outputs [encode-width], with an
* additional state input/output shaped [state-width]
*
* -
* wake-detect-path (string, required): file system path to the
* "detect" Tensorflow-Lite model; its inputs shoudld be shaped
* [encode-length, encode-width], and its outputs [1]
*
*
*
*
* Google Speech also requires configuration:
*
*
*
* -
* google-credentials (string): json-stringified google service
* account credentials, used to authenticate with the speech API
*
* -
* locale (string): language code for speech recognition
*
*
*
* @see io.spokestack.spokestack.wakeword.WakewordTrigger
* @see io.spokestack.spokestack.google.GoogleSpeechRecognizer
*/
public class TFWakewordGoogleASR implements PipelineProfile {
@Override
public SpeechPipeline.Builder apply(SpeechPipeline.Builder builder) {
List stages = new ArrayList<>();
stages.add("io.spokestack.spokestack.webrtc.AutomaticGainControl");
stages.add("io.spokestack.spokestack.webrtc.AcousticNoiseSuppressor");
stages.add("io.spokestack.spokestack.webrtc.VoiceActivityDetector");
stages.add("io.spokestack.spokestack.wakeword.WakewordTrigger");
stages.add("io.spokestack.spokestack.ActivationTimeout");
stages.add("io.spokestack.spokestack.google.GoogleSpeechRecognizer");
return builder
.setInputClass(
"io.spokestack.spokestack.android.MicrophoneInput")
.setProperty("ans-policy", "aggressive")
.setProperty("vad-mode", "very-aggressive")
.setProperty("vad-fall-delay", 800)
.setProperty("wake-threshold", 0.9)
.setProperty("pre-emphasis", 0.97)
.setProperty("wake-active-min", 2000)
.setStageClasses(stages);
}
}