com.google.cloud.speech.v1.RecognitionConfigOrBuilder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of proto-google-cloud-speech-v1 Show documentation
Show all versions of proto-google-cloud-speech-v1 Show documentation
PROTO library for proto-google-cloud-speech-v1
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Generated by the protocol buffer compiler. DO NOT EDIT!
// source: google/cloud/speech/v1/cloud_speech.proto
// Protobuf Java Version: 3.25.2
package com.google.cloud.speech.v1;
public interface RecognitionConfigOrBuilder
extends
// @@protoc_insertion_point(interface_extends:google.cloud.speech.v1.RecognitionConfig)
com.google.protobuf.MessageOrBuilder {
/**
*
*
*
* Encoding of audio data sent in all `RecognitionAudio` messages.
* This field is optional for `FLAC` and `WAV` audio files and required
* for all other audio formats. For details, see
* [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
*
*
* .google.cloud.speech.v1.RecognitionConfig.AudioEncoding encoding = 1;
*
* @return The enum numeric value on the wire for encoding.
*/
int getEncodingValue();
/**
*
*
*
* Encoding of audio data sent in all `RecognitionAudio` messages.
* This field is optional for `FLAC` and `WAV` audio files and required
* for all other audio formats. For details, see
* [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
*
*
* .google.cloud.speech.v1.RecognitionConfig.AudioEncoding encoding = 1;
*
* @return The encoding.
*/
com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding getEncoding();
/**
*
*
*
* Sample rate in Hertz of the audio data sent in all
* `RecognitionAudio` messages. Valid values are: 8000-48000.
* 16000 is optimal. For best results, set the sampling rate of the audio
* source to 16000 Hz. If that's not possible, use the native sample rate of
* the audio source (instead of re-sampling).
* This field is optional for FLAC and WAV audio files, but is
* required for all other audio formats. For details, see
* [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
*
*
* int32 sample_rate_hertz = 2;
*
* @return The sampleRateHertz.
*/
int getSampleRateHertz();
/**
*
*
*
* The number of channels in the input audio data.
* ONLY set this for MULTI-CHANNEL recognition.
* Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`.
* Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`.
* If `0` or omitted, defaults to one channel (mono).
* Note: We only recognize the first channel by default.
* To perform independent recognition on each channel set
* `enable_separate_recognition_per_channel` to 'true'.
*
*
* int32 audio_channel_count = 7;
*
* @return The audioChannelCount.
*/
int getAudioChannelCount();
/**
*
*
*
* This needs to be set to `true` explicitly and `audio_channel_count` > 1
* to get each channel recognized separately. The recognition result will
* contain a `channel_tag` field to state which channel that result belongs
* to. If this is not true, we will only recognize the first channel. The
* request is billed cumulatively for all channels recognized:
* `audio_channel_count` multiplied by the length of the audio.
*
*
* bool enable_separate_recognition_per_channel = 12;
*
* @return The enableSeparateRecognitionPerChannel.
*/
boolean getEnableSeparateRecognitionPerChannel();
/**
*
*
*
* Required. The language of the supplied audio as a
* [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
* Example: "en-US".
* See [Language
* Support](https://cloud.google.com/speech-to-text/docs/languages) for a list
* of the currently supported language codes.
*
*
* string language_code = 3 [(.google.api.field_behavior) = REQUIRED];
*
* @return The languageCode.
*/
java.lang.String getLanguageCode();
/**
*
*
*
* Required. The language of the supplied audio as a
* [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
* Example: "en-US".
* See [Language
* Support](https://cloud.google.com/speech-to-text/docs/languages) for a list
* of the currently supported language codes.
*
*
* string language_code = 3 [(.google.api.field_behavior) = REQUIRED];
*
* @return The bytes for languageCode.
*/
com.google.protobuf.ByteString getLanguageCodeBytes();
/**
*
*
*
* A list of up to 3 additional
* [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags,
* listing possible alternative languages of the supplied audio.
* See [Language
* Support](https://cloud.google.com/speech-to-text/docs/languages) for a list
* of the currently supported language codes. If alternative languages are
* listed, recognition result will contain recognition in the most likely
* language detected including the main language_code. The recognition result
* will include the language tag of the language detected in the audio. Note:
* This feature is only supported for Voice Command and Voice Search use cases
* and performance may vary for other use cases (e.g., phone call
* transcription).
*
*
* repeated string alternative_language_codes = 18;
*
* @return A list containing the alternativeLanguageCodes.
*/
java.util.List getAlternativeLanguageCodesList();
/**
*
*
*
* A list of up to 3 additional
* [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags,
* listing possible alternative languages of the supplied audio.
* See [Language
* Support](https://cloud.google.com/speech-to-text/docs/languages) for a list
* of the currently supported language codes. If alternative languages are
* listed, recognition result will contain recognition in the most likely
* language detected including the main language_code. The recognition result
* will include the language tag of the language detected in the audio. Note:
* This feature is only supported for Voice Command and Voice Search use cases
* and performance may vary for other use cases (e.g., phone call
* transcription).
*
*
* repeated string alternative_language_codes = 18;
*
* @return The count of alternativeLanguageCodes.
*/
int getAlternativeLanguageCodesCount();
/**
*
*
*
* A list of up to 3 additional
* [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags,
* listing possible alternative languages of the supplied audio.
* See [Language
* Support](https://cloud.google.com/speech-to-text/docs/languages) for a list
* of the currently supported language codes. If alternative languages are
* listed, recognition result will contain recognition in the most likely
* language detected including the main language_code. The recognition result
* will include the language tag of the language detected in the audio. Note:
* This feature is only supported for Voice Command and Voice Search use cases
* and performance may vary for other use cases (e.g., phone call
* transcription).
*
*
* repeated string alternative_language_codes = 18;
*
* @param index The index of the element to return.
* @return The alternativeLanguageCodes at the given index.
*/
java.lang.String getAlternativeLanguageCodes(int index);
/**
*
*
*
* A list of up to 3 additional
* [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags,
* listing possible alternative languages of the supplied audio.
* See [Language
* Support](https://cloud.google.com/speech-to-text/docs/languages) for a list
* of the currently supported language codes. If alternative languages are
* listed, recognition result will contain recognition in the most likely
* language detected including the main language_code. The recognition result
* will include the language tag of the language detected in the audio. Note:
* This feature is only supported for Voice Command and Voice Search use cases
* and performance may vary for other use cases (e.g., phone call
* transcription).
*
*
* repeated string alternative_language_codes = 18;
*
* @param index The index of the value to return.
* @return The bytes of the alternativeLanguageCodes at the given index.
*/
com.google.protobuf.ByteString getAlternativeLanguageCodesBytes(int index);
/**
*
*
*
* Maximum number of recognition hypotheses to be returned.
* Specifically, the maximum number of `SpeechRecognitionAlternative` messages
* within each `SpeechRecognitionResult`.
* The server may return fewer than `max_alternatives`.
* Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of
* one. If omitted, will return a maximum of one.
*
*
* int32 max_alternatives = 4;
*
* @return The maxAlternatives.
*/
int getMaxAlternatives();
/**
*
*
*
* If set to `true`, the server will attempt to filter out
* profanities, replacing all but the initial character in each filtered word
* with asterisks, e.g. "f***". If set to `false` or omitted, profanities
* won't be filtered out.
*
*
* bool profanity_filter = 5;
*
* @return The profanityFilter.
*/
boolean getProfanityFilter();
/**
*
*
*
* Speech adaptation configuration improves the accuracy of speech
* recognition. For more information, see the [speech
* adaptation](https://cloud.google.com/speech-to-text/docs/adaptation)
* documentation.
* When speech adaptation is set it supersedes the `speech_contexts` field.
*
*
* .google.cloud.speech.v1.SpeechAdaptation adaptation = 20;
*
* @return Whether the adaptation field is set.
*/
boolean hasAdaptation();
/**
*
*
*
* Speech adaptation configuration improves the accuracy of speech
* recognition. For more information, see the [speech
* adaptation](https://cloud.google.com/speech-to-text/docs/adaptation)
* documentation.
* When speech adaptation is set it supersedes the `speech_contexts` field.
*
*
* .google.cloud.speech.v1.SpeechAdaptation adaptation = 20;
*
* @return The adaptation.
*/
com.google.cloud.speech.v1.SpeechAdaptation getAdaptation();
/**
*
*
*
* Speech adaptation configuration improves the accuracy of speech
* recognition. For more information, see the [speech
* adaptation](https://cloud.google.com/speech-to-text/docs/adaptation)
* documentation.
* When speech adaptation is set it supersedes the `speech_contexts` field.
*
*
* .google.cloud.speech.v1.SpeechAdaptation adaptation = 20;
*/
com.google.cloud.speech.v1.SpeechAdaptationOrBuilder getAdaptationOrBuilder();
/**
*
*
*
* Optional. Use transcription normalization to automatically replace parts of
* the transcript with phrases of your choosing. For StreamingRecognize, this
* normalization only applies to stable partial transcripts (stability > 0.8)
* and final transcripts.
*
*
*
* .google.cloud.speech.v1.TranscriptNormalization transcript_normalization = 24 [(.google.api.field_behavior) = OPTIONAL];
*
*
* @return Whether the transcriptNormalization field is set.
*/
boolean hasTranscriptNormalization();
/**
*
*
*
* Optional. Use transcription normalization to automatically replace parts of
* the transcript with phrases of your choosing. For StreamingRecognize, this
* normalization only applies to stable partial transcripts (stability > 0.8)
* and final transcripts.
*
*
*
* .google.cloud.speech.v1.TranscriptNormalization transcript_normalization = 24 [(.google.api.field_behavior) = OPTIONAL];
*
*
* @return The transcriptNormalization.
*/
com.google.cloud.speech.v1.TranscriptNormalization getTranscriptNormalization();
/**
*
*
*
* Optional. Use transcription normalization to automatically replace parts of
* the transcript with phrases of your choosing. For StreamingRecognize, this
* normalization only applies to stable partial transcripts (stability > 0.8)
* and final transcripts.
*
*
*
* .google.cloud.speech.v1.TranscriptNormalization transcript_normalization = 24 [(.google.api.field_behavior) = OPTIONAL];
*
*/
com.google.cloud.speech.v1.TranscriptNormalizationOrBuilder getTranscriptNormalizationOrBuilder();
/**
*
*
*
* Array of [SpeechContext][google.cloud.speech.v1.SpeechContext].
* A means to provide context to assist the speech recognition. For more
* information, see
* [speech
* adaptation](https://cloud.google.com/speech-to-text/docs/adaptation).
*
*
* repeated .google.cloud.speech.v1.SpeechContext speech_contexts = 6;
*/
java.util.List getSpeechContextsList();
/**
*
*
*
* Array of [SpeechContext][google.cloud.speech.v1.SpeechContext].
* A means to provide context to assist the speech recognition. For more
* information, see
* [speech
* adaptation](https://cloud.google.com/speech-to-text/docs/adaptation).
*
*
* repeated .google.cloud.speech.v1.SpeechContext speech_contexts = 6;
*/
com.google.cloud.speech.v1.SpeechContext getSpeechContexts(int index);
/**
*
*
*
* Array of [SpeechContext][google.cloud.speech.v1.SpeechContext].
* A means to provide context to assist the speech recognition. For more
* information, see
* [speech
* adaptation](https://cloud.google.com/speech-to-text/docs/adaptation).
*
*
* repeated .google.cloud.speech.v1.SpeechContext speech_contexts = 6;
*/
int getSpeechContextsCount();
/**
*
*
*
* Array of [SpeechContext][google.cloud.speech.v1.SpeechContext].
* A means to provide context to assist the speech recognition. For more
* information, see
* [speech
* adaptation](https://cloud.google.com/speech-to-text/docs/adaptation).
*
*
* repeated .google.cloud.speech.v1.SpeechContext speech_contexts = 6;
*/
java.util.List extends com.google.cloud.speech.v1.SpeechContextOrBuilder>
getSpeechContextsOrBuilderList();
/**
*
*
*
* Array of [SpeechContext][google.cloud.speech.v1.SpeechContext].
* A means to provide context to assist the speech recognition. For more
* information, see
* [speech
* adaptation](https://cloud.google.com/speech-to-text/docs/adaptation).
*
*
* repeated .google.cloud.speech.v1.SpeechContext speech_contexts = 6;
*/
com.google.cloud.speech.v1.SpeechContextOrBuilder getSpeechContextsOrBuilder(int index);
/**
*
*
*
* If `true`, the top result includes a list of words and
* the start and end time offsets (timestamps) for those words. If
* `false`, no word-level time offset information is returned. The default is
* `false`.
*
*
* bool enable_word_time_offsets = 8;
*
* @return The enableWordTimeOffsets.
*/
boolean getEnableWordTimeOffsets();
/**
*
*
*
* If `true`, the top result includes a list of words and the
* confidence for those words. If `false`, no word-level confidence
* information is returned. The default is `false`.
*
*
* bool enable_word_confidence = 15;
*
* @return The enableWordConfidence.
*/
boolean getEnableWordConfidence();
/**
*
*
*
* If 'true', adds punctuation to recognition result hypotheses.
* This feature is only available in select languages. Setting this for
* requests in other languages has no effect at all.
* The default 'false' value does not add punctuation to result hypotheses.
*
*
* bool enable_automatic_punctuation = 11;
*
* @return The enableAutomaticPunctuation.
*/
boolean getEnableAutomaticPunctuation();
/**
*
*
*
* The spoken punctuation behavior for the call
* If not set, uses default behavior based on model of choice
* e.g. command_and_search will enable spoken punctuation by default
* If 'true', replaces spoken punctuation with the corresponding symbols in
* the request. For example, "how are you question mark" becomes "how are
* you?". See https://cloud.google.com/speech-to-text/docs/spoken-punctuation
* for support. If 'false', spoken punctuation is not replaced.
*
*
* .google.protobuf.BoolValue enable_spoken_punctuation = 22;
*
* @return Whether the enableSpokenPunctuation field is set.
*/
boolean hasEnableSpokenPunctuation();
/**
*
*
*
* The spoken punctuation behavior for the call
* If not set, uses default behavior based on model of choice
* e.g. command_and_search will enable spoken punctuation by default
* If 'true', replaces spoken punctuation with the corresponding symbols in
* the request. For example, "how are you question mark" becomes "how are
* you?". See https://cloud.google.com/speech-to-text/docs/spoken-punctuation
* for support. If 'false', spoken punctuation is not replaced.
*
*
* .google.protobuf.BoolValue enable_spoken_punctuation = 22;
*
* @return The enableSpokenPunctuation.
*/
com.google.protobuf.BoolValue getEnableSpokenPunctuation();
/**
*
*
*
* The spoken punctuation behavior for the call
* If not set, uses default behavior based on model of choice
* e.g. command_and_search will enable spoken punctuation by default
* If 'true', replaces spoken punctuation with the corresponding symbols in
* the request. For example, "how are you question mark" becomes "how are
* you?". See https://cloud.google.com/speech-to-text/docs/spoken-punctuation
* for support. If 'false', spoken punctuation is not replaced.
*
*
* .google.protobuf.BoolValue enable_spoken_punctuation = 22;
*/
com.google.protobuf.BoolValueOrBuilder getEnableSpokenPunctuationOrBuilder();
/**
*
*
*
* The spoken emoji behavior for the call
* If not set, uses default behavior based on model of choice
* If 'true', adds spoken emoji formatting for the request. This will replace
* spoken emojis with the corresponding Unicode symbols in the final
* transcript. If 'false', spoken emojis are not replaced.
*
*
* .google.protobuf.BoolValue enable_spoken_emojis = 23;
*
* @return Whether the enableSpokenEmojis field is set.
*/
boolean hasEnableSpokenEmojis();
/**
*
*
*
* The spoken emoji behavior for the call
* If not set, uses default behavior based on model of choice
* If 'true', adds spoken emoji formatting for the request. This will replace
* spoken emojis with the corresponding Unicode symbols in the final
* transcript. If 'false', spoken emojis are not replaced.
*
*
* .google.protobuf.BoolValue enable_spoken_emojis = 23;
*
* @return The enableSpokenEmojis.
*/
com.google.protobuf.BoolValue getEnableSpokenEmojis();
/**
*
*
*
* The spoken emoji behavior for the call
* If not set, uses default behavior based on model of choice
* If 'true', adds spoken emoji formatting for the request. This will replace
* spoken emojis with the corresponding Unicode symbols in the final
* transcript. If 'false', spoken emojis are not replaced.
*
*
* .google.protobuf.BoolValue enable_spoken_emojis = 23;
*/
com.google.protobuf.BoolValueOrBuilder getEnableSpokenEmojisOrBuilder();
/**
*
*
*
* Config to enable speaker diarization and set additional
* parameters to make diarization better suited for your application.
* Note: When this is enabled, we send all the words from the beginning of the
* audio for the top alternative in every consecutive STREAMING responses.
* This is done in order to improve our speaker tags as our models learn to
* identify the speakers in the conversation over time.
* For non-streaming requests, the diarization results will be provided only
* in the top alternative of the FINAL SpeechRecognitionResult.
*
*
* .google.cloud.speech.v1.SpeakerDiarizationConfig diarization_config = 19;
*
* @return Whether the diarizationConfig field is set.
*/
boolean hasDiarizationConfig();
/**
*
*
*
* Config to enable speaker diarization and set additional
* parameters to make diarization better suited for your application.
* Note: When this is enabled, we send all the words from the beginning of the
* audio for the top alternative in every consecutive STREAMING responses.
* This is done in order to improve our speaker tags as our models learn to
* identify the speakers in the conversation over time.
* For non-streaming requests, the diarization results will be provided only
* in the top alternative of the FINAL SpeechRecognitionResult.
*
*
* .google.cloud.speech.v1.SpeakerDiarizationConfig diarization_config = 19;
*
* @return The diarizationConfig.
*/
com.google.cloud.speech.v1.SpeakerDiarizationConfig getDiarizationConfig();
/**
*
*
*
* Config to enable speaker diarization and set additional
* parameters to make diarization better suited for your application.
* Note: When this is enabled, we send all the words from the beginning of the
* audio for the top alternative in every consecutive STREAMING responses.
* This is done in order to improve our speaker tags as our models learn to
* identify the speakers in the conversation over time.
* For non-streaming requests, the diarization results will be provided only
* in the top alternative of the FINAL SpeechRecognitionResult.
*
*
* .google.cloud.speech.v1.SpeakerDiarizationConfig diarization_config = 19;
*/
com.google.cloud.speech.v1.SpeakerDiarizationConfigOrBuilder getDiarizationConfigOrBuilder();
/**
*
*
*
* Metadata regarding this request.
*
*
* .google.cloud.speech.v1.RecognitionMetadata metadata = 9;
*
* @return Whether the metadata field is set.
*/
boolean hasMetadata();
/**
*
*
*
* Metadata regarding this request.
*
*
* .google.cloud.speech.v1.RecognitionMetadata metadata = 9;
*
* @return The metadata.
*/
com.google.cloud.speech.v1.RecognitionMetadata getMetadata();
/**
*
*
*
* Metadata regarding this request.
*
*
* .google.cloud.speech.v1.RecognitionMetadata metadata = 9;
*/
com.google.cloud.speech.v1.RecognitionMetadataOrBuilder getMetadataOrBuilder();
/**
*
*
*
* Which model to select for the given request. Select the model
* best suited to your domain to get best results. If a model is not
* explicitly specified, then we auto-select a model based on the parameters
* in the RecognitionConfig.
* <table>
* <tr>
* <td><b>Model</b></td>
* <td><b>Description</b></td>
* </tr>
* <tr>
* <td><code>latest_long</code></td>
* <td>Best for long form content like media or conversation.</td>
* </tr>
* <tr>
* <td><code>latest_short</code></td>
* <td>Best for short form content like commands or single shot directed
* speech.</td>
* </tr>
* <tr>
* <td><code>command_and_search</code></td>
* <td>Best for short queries such as voice commands or voice search.</td>
* </tr>
* <tr>
* <td><code>phone_call</code></td>
* <td>Best for audio that originated from a phone call (typically
* recorded at an 8khz sampling rate).</td>
* </tr>
* <tr>
* <td><code>video</code></td>
* <td>Best for audio that originated from video or includes multiple
* speakers. Ideally the audio is recorded at a 16khz or greater
* sampling rate. This is a premium model that costs more than the
* standard rate.</td>
* </tr>
* <tr>
* <td><code>default</code></td>
* <td>Best for audio that is not one of the specific audio models.
* For example, long-form audio. Ideally the audio is high-fidelity,
* recorded at a 16khz or greater sampling rate.</td>
* </tr>
* <tr>
* <td><code>medical_conversation</code></td>
* <td>Best for audio that originated from a conversation between a
* medical provider and patient.</td>
* </tr>
* <tr>
* <td><code>medical_dictation</code></td>
* <td>Best for audio that originated from dictation notes by a medical
* provider.</td>
* </tr>
* </table>
*
*
* string model = 13;
*
* @return The model.
*/
java.lang.String getModel();
/**
*
*
*
* Which model to select for the given request. Select the model
* best suited to your domain to get best results. If a model is not
* explicitly specified, then we auto-select a model based on the parameters
* in the RecognitionConfig.
* <table>
* <tr>
* <td><b>Model</b></td>
* <td><b>Description</b></td>
* </tr>
* <tr>
* <td><code>latest_long</code></td>
* <td>Best for long form content like media or conversation.</td>
* </tr>
* <tr>
* <td><code>latest_short</code></td>
* <td>Best for short form content like commands or single shot directed
* speech.</td>
* </tr>
* <tr>
* <td><code>command_and_search</code></td>
* <td>Best for short queries such as voice commands or voice search.</td>
* </tr>
* <tr>
* <td><code>phone_call</code></td>
* <td>Best for audio that originated from a phone call (typically
* recorded at an 8khz sampling rate).</td>
* </tr>
* <tr>
* <td><code>video</code></td>
* <td>Best for audio that originated from video or includes multiple
* speakers. Ideally the audio is recorded at a 16khz or greater
* sampling rate. This is a premium model that costs more than the
* standard rate.</td>
* </tr>
* <tr>
* <td><code>default</code></td>
* <td>Best for audio that is not one of the specific audio models.
* For example, long-form audio. Ideally the audio is high-fidelity,
* recorded at a 16khz or greater sampling rate.</td>
* </tr>
* <tr>
* <td><code>medical_conversation</code></td>
* <td>Best for audio that originated from a conversation between a
* medical provider and patient.</td>
* </tr>
* <tr>
* <td><code>medical_dictation</code></td>
* <td>Best for audio that originated from dictation notes by a medical
* provider.</td>
* </tr>
* </table>
*
*
* string model = 13;
*
* @return The bytes for model.
*/
com.google.protobuf.ByteString getModelBytes();
/**
*
*
*
* Set to true to use an enhanced model for speech recognition.
* If `use_enhanced` is set to true and the `model` field is not set, then
* an appropriate enhanced model is chosen if an enhanced model exists for
* the audio.
*
* If `use_enhanced` is true and an enhanced version of the specified model
* does not exist, then the speech is recognized using the standard version
* of the specified model.
*
*
* bool use_enhanced = 14;
*
* @return The useEnhanced.
*/
boolean getUseEnhanced();
}