All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.spokestack.spokestack.profile.TFWakewordGoogleASR Maven / Gradle / Ivy

package io.spokestack.spokestack.profile;

import io.spokestack.spokestack.PipelineProfile;
import io.spokestack.spokestack.SpeechPipeline;

import java.util.ArrayList;
import java.util.List;

/**
 * A speech pipeline profile that uses TensorFlow Lite for wakeword detection
 * and Google Speech for ASR. Properties related to signal processing are tuned
 * for the "Spokestack" wakeword.
 *
 * 

* Wakeword detection requires configuration to locate the models used for * classification; these properties must be set separately from this profile: *

* *
    *
  • * wake-filter-path (string, required): file system path to the * "filter" Tensorflow-Lite model, which is used to calculate a mel * spectrogram frame from the linear STFT; its inputs should be shaped * [fft-width], and its outputs [mel-width] *
  • *
  • * wake-encode-path (string, required): file system path to the * "encode" Tensorflow-Lite model, which is used to perform each * autoregressive step over the mel frames; its inputs should be shaped * [mel-length, mel-width], and its outputs [encode-width], with an * additional state input/output shaped [state-width] *
  • *
  • * wake-detect-path (string, required): file system path to the * "detect" Tensorflow-Lite model; its inputs shoudld be shaped * [encode-length, encode-width], and its outputs [1] *
  • *
* *

* Google Speech also requires configuration: *

* *
    *
  • * google-credentials (string): json-stringified google service * account credentials, used to authenticate with the speech API *
  • *
  • * locale (string): language code for speech recognition *
  • *
* * @see io.spokestack.spokestack.wakeword.WakewordTrigger * @see io.spokestack.spokestack.google.GoogleSpeechRecognizer */ public class TFWakewordGoogleASR implements PipelineProfile { @Override public SpeechPipeline.Builder apply(SpeechPipeline.Builder builder) { List stages = new ArrayList<>(); stages.add("io.spokestack.spokestack.webrtc.AutomaticGainControl"); stages.add("io.spokestack.spokestack.webrtc.AcousticNoiseSuppressor"); stages.add("io.spokestack.spokestack.webrtc.VoiceActivityDetector"); stages.add("io.spokestack.spokestack.wakeword.WakewordTrigger"); stages.add("io.spokestack.spokestack.ActivationTimeout"); stages.add("io.spokestack.spokestack.google.GoogleSpeechRecognizer"); return builder .setInputClass( "io.spokestack.spokestack.android.MicrophoneInput") .setProperty("ans-policy", "aggressive") .setProperty("vad-mode", "very-aggressive") .setProperty("vad-fall-delay", 800) .setProperty("wake-threshold", 0.9) .setProperty("pre-emphasis", 0.97) .setProperty("wake-active-min", 2000) .setStageClasses(stages); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy