All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.watson.speech_to_text.v1.SpeechToText Maven / Gradle / Ivy

There is a newer version: 13.0.0
Show newest version
/*
 * (C) Copyright IBM Corp. 2016, 2020.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */
package com.ibm.watson.speech_to_text.v1;

import com.google.gson.JsonObject;
import com.ibm.cloud.sdk.core.http.RequestBuilder;
import com.ibm.cloud.sdk.core.http.ResponseConverter;
import com.ibm.cloud.sdk.core.http.ServiceCall;
import com.ibm.cloud.sdk.core.security.Authenticator;
import com.ibm.cloud.sdk.core.security.ConfigBasedAuthenticatorFactory;
import com.ibm.cloud.sdk.core.service.BaseService;
import com.ibm.cloud.sdk.core.util.RequestUtils;
import com.ibm.cloud.sdk.core.util.ResponseConverterUtils;
import com.ibm.watson.common.SdkCommon;
import com.ibm.watson.speech_to_text.v1.model.AcousticModel;
import com.ibm.watson.speech_to_text.v1.model.AcousticModels;
import com.ibm.watson.speech_to_text.v1.model.AddAudioOptions;
import com.ibm.watson.speech_to_text.v1.model.AddCorpusOptions;
import com.ibm.watson.speech_to_text.v1.model.AddGrammarOptions;
import com.ibm.watson.speech_to_text.v1.model.AddWordOptions;
import com.ibm.watson.speech_to_text.v1.model.AddWordsOptions;
import com.ibm.watson.speech_to_text.v1.model.AudioListing;
import com.ibm.watson.speech_to_text.v1.model.AudioResources;
import com.ibm.watson.speech_to_text.v1.model.CheckJobOptions;
import com.ibm.watson.speech_to_text.v1.model.CheckJobsOptions;
import com.ibm.watson.speech_to_text.v1.model.Corpora;
import com.ibm.watson.speech_to_text.v1.model.Corpus;
import com.ibm.watson.speech_to_text.v1.model.CreateAcousticModelOptions;
import com.ibm.watson.speech_to_text.v1.model.CreateJobOptions;
import com.ibm.watson.speech_to_text.v1.model.CreateLanguageModelOptions;
import com.ibm.watson.speech_to_text.v1.model.DeleteAcousticModelOptions;
import com.ibm.watson.speech_to_text.v1.model.DeleteAudioOptions;
import com.ibm.watson.speech_to_text.v1.model.DeleteCorpusOptions;
import com.ibm.watson.speech_to_text.v1.model.DeleteGrammarOptions;
import com.ibm.watson.speech_to_text.v1.model.DeleteJobOptions;
import com.ibm.watson.speech_to_text.v1.model.DeleteLanguageModelOptions;
import com.ibm.watson.speech_to_text.v1.model.DeleteUserDataOptions;
import com.ibm.watson.speech_to_text.v1.model.DeleteWordOptions;
import com.ibm.watson.speech_to_text.v1.model.GetAcousticModelOptions;
import com.ibm.watson.speech_to_text.v1.model.GetAudioOptions;
import com.ibm.watson.speech_to_text.v1.model.GetCorpusOptions;
import com.ibm.watson.speech_to_text.v1.model.GetGrammarOptions;
import com.ibm.watson.speech_to_text.v1.model.GetLanguageModelOptions;
import com.ibm.watson.speech_to_text.v1.model.GetModelOptions;
import com.ibm.watson.speech_to_text.v1.model.GetWordOptions;
import com.ibm.watson.speech_to_text.v1.model.Grammar;
import com.ibm.watson.speech_to_text.v1.model.Grammars;
import com.ibm.watson.speech_to_text.v1.model.LanguageModel;
import com.ibm.watson.speech_to_text.v1.model.LanguageModels;
import com.ibm.watson.speech_to_text.v1.model.ListAcousticModelsOptions;
import com.ibm.watson.speech_to_text.v1.model.ListAudioOptions;
import com.ibm.watson.speech_to_text.v1.model.ListCorporaOptions;
import com.ibm.watson.speech_to_text.v1.model.ListGrammarsOptions;
import com.ibm.watson.speech_to_text.v1.model.ListLanguageModelsOptions;
import com.ibm.watson.speech_to_text.v1.model.ListModelsOptions;
import com.ibm.watson.speech_to_text.v1.model.ListWordsOptions;
import com.ibm.watson.speech_to_text.v1.model.RecognitionJob;
import com.ibm.watson.speech_to_text.v1.model.RecognitionJobs;
import com.ibm.watson.speech_to_text.v1.model.RecognizeOptions;
import com.ibm.watson.speech_to_text.v1.model.RegisterCallbackOptions;
import com.ibm.watson.speech_to_text.v1.model.RegisterStatus;
import com.ibm.watson.speech_to_text.v1.model.ResetAcousticModelOptions;
import com.ibm.watson.speech_to_text.v1.model.ResetLanguageModelOptions;
import com.ibm.watson.speech_to_text.v1.model.SpeechModel;
import com.ibm.watson.speech_to_text.v1.model.SpeechModels;
import com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults;
import com.ibm.watson.speech_to_text.v1.model.TrainAcousticModelOptions;
import com.ibm.watson.speech_to_text.v1.model.TrainLanguageModelOptions;
import com.ibm.watson.speech_to_text.v1.model.TrainingResponse;
import com.ibm.watson.speech_to_text.v1.model.UnregisterCallbackOptions;
import com.ibm.watson.speech_to_text.v1.model.UpgradeAcousticModelOptions;
import com.ibm.watson.speech_to_text.v1.model.UpgradeLanguageModelOptions;
import com.ibm.watson.speech_to_text.v1.model.Word;
import com.ibm.watson.speech_to_text.v1.model.Words;
import com.ibm.watson.speech_to_text.v1.websocket.RecognizeCallback;
import com.ibm.watson.speech_to_text.v1.websocket.SpeechToTextWebSocketListener;
import java.util.Map;
import java.util.Map.Entry;
import okhttp3.HttpUrl;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.WebSocket;

/**
 * The IBM® Speech to Text service provides APIs that use IBM's speech-recognition capabilities
 * to produce transcripts of spoken audio. The service can transcribe speech from various languages
 * and audio formats. In addition to basic transcription, the service can produce detailed
 * information about many different aspects of the audio. For most languages, the service supports
 * two sampling rates, broadband and narrowband. It returns all JSON response content in the UTF-8
 * character set.
 *
 * 

For speech recognition, the service supports synchronous and asynchronous HTTP * Representational State Transfer (REST) interfaces. It also supports a WebSocket interface that * provides a full-duplex, low-latency communication channel: Clients send requests and audio to the * service and receive results over a single connection asynchronously. * *

The service also offers two customization interfaces. Use language model customization to * expand the vocabulary of a base model with domain-specific terminology. Use acoustic model * customization to adapt a base model for the acoustic characteristics of your audio. For language * model customization, the service also supports grammars. A grammar is a formal language * specification that lets you restrict the phrases that the service can recognize. * *

Language model customization and acoustic model customization are generally available for * production use with all language models that are generally available. Grammars are beta * functionality for all language models that support language model customization. * * @version v1 * @see Speech to Text */ public class SpeechToText extends BaseService { private static final String DEFAULT_SERVICE_NAME = "speech_to_text"; private static final String DEFAULT_SERVICE_URL = "https://stream.watsonplatform.net/speech-to-text/api"; /** Constructs a new `SpeechToText` client using the DEFAULT_SERVICE_NAME. */ public SpeechToText() { this( DEFAULT_SERVICE_NAME, ConfigBasedAuthenticatorFactory.getAuthenticator(DEFAULT_SERVICE_NAME)); } /** * Constructs a new `SpeechToText` client with the DEFAULT_SERVICE_NAME and the specified * Authenticator. * * @param authenticator the Authenticator instance to be configured for this service */ public SpeechToText(Authenticator authenticator) { this(DEFAULT_SERVICE_NAME, authenticator); } /** * Constructs a new `SpeechToText` client with the specified serviceName. * * @param serviceName The name of the service to configure. */ public SpeechToText(String serviceName) { this(serviceName, ConfigBasedAuthenticatorFactory.getAuthenticator(serviceName)); } /** * Constructs a new `SpeechToText` client with the specified Authenticator and serviceName. * * @param serviceName The name of the service to configure. * @param authenticator the Authenticator instance to be configured for this service */ public SpeechToText(String serviceName, Authenticator authenticator) { super(serviceName, authenticator); setServiceUrl(DEFAULT_SERVICE_URL); this.configureService(serviceName); } /** * List models. * *

Lists all language models that are available for use with the service. The information * includes the name of the model and its minimum sampling rate in Hertz, among other things. * *

**See also:** [Languages and * models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models). * * @param listModelsOptions the {@link ListModelsOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of {@link SpeechModels} */ public ServiceCall listModels(ListModelsOptions listModelsOptions) { String[] pathSegments = {"v1/models"}; RequestBuilder builder = RequestBuilder.get(RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "listModels"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); if (listModelsOptions != null) {} ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * List models. * *

Lists all language models that are available for use with the service. The information * includes the name of the model and its minimum sampling rate in Hertz, among other things. * *

**See also:** [Languages and * models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models). * * @return a {@link ServiceCall} with a response type of {@link SpeechModels} */ public ServiceCall listModels() { return listModels(null); } /** * Get a model. * *

Gets information for a single specified language model that is available for use with the * service. The information includes the name of the model and its minimum sampling rate in Hertz, * among other things. * *

**See also:** [Languages and * models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models#models). * * @param getModelOptions the {@link GetModelOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of {@link SpeechModel} */ public ServiceCall getModel(GetModelOptions getModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( getModelOptions, "getModelOptions cannot be null"); String[] pathSegments = {"v1/models"}; String[] pathParameters = {getModelOptions.modelId()}; RequestBuilder builder = RequestBuilder.get( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "getModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Recognize audio. * *

Sends audio and returns transcription results for a recognition request. You can pass a * maximum of 100 MB and a minimum of 100 bytes of audio with a request. The service automatically * detects the endianness of the incoming audio and, for audio that includes multiple channels, * downmixes the audio to one-channel mono during transcoding. The method returns only final * results; to enable interim results, use the WebSocket API. (With the `curl` command, use the * `--data-binary` option to upload the file for the request.) * *

**See also:** [Making a basic HTTP * request](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-http#HTTP-basic). * *

### Streaming mode * *

For requests to transcribe live audio as it becomes available, you must set the * `Transfer-Encoding` header to `chunked` to use streaming mode. In streaming mode, the service * closes the connection (status code 408) if it does not receive at least 15 seconds of audio * (including silence) in any 30-second period. The service also closes the connection (status * code 400) if it detects no speech for `inactivity_timeout` seconds of streaming audio; use the * `inactivity_timeout` parameter to change the default of 30 seconds. * *

**See also:** * [Audio * transmission](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#transmission) * * [Timeouts](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#timeouts) * *

### Audio formats (content types) * *

The service accepts audio in the following formats (MIME types). * For formats that are * labeled **Required**, you must use the `Content-Type` header with the request to specify the * format of the audio. * For all other formats, you can omit the `Content-Type` header or specify * `application/octet-stream` with the header to have the service automatically detect the format * of the audio. (With the `curl` command, you can specify either `"Content-Type:"` or * `"Content-Type: application/octet-stream"`.) * *

Where indicated, the format that you specify must include the sampling rate and can * optionally include the number of channels and the endianness of the audio. * `audio/alaw` * (**Required.** Specify the sampling rate (`rate`) of the audio.) * `audio/basic` (**Required.** * Use only with narrowband models.) * `audio/flac` * `audio/g729` (Use only with narrowband * models.) * `audio/l16` (**Required.** Specify the sampling rate (`rate`) and optionally the * number of channels (`channels`) and endianness (`endianness`) of the audio.) * `audio/mp3` * * `audio/mpeg` * `audio/mulaw` (**Required.** Specify the sampling rate (`rate`) of the audio.) * * `audio/ogg` (The service automatically detects the codec of the input audio.) * * `audio/ogg;codecs=opus` * `audio/ogg;codecs=vorbis` * `audio/wav` (Provide audio with a maximum * of nine channels.) * `audio/webm` (The service automatically detects the codec of the input * audio.) * `audio/webm;codecs=opus` * `audio/webm;codecs=vorbis` * *

The sampling rate of the audio must match the sampling rate of the model for the recognition * request: for broadband models, at least 16 kHz; for narrowband models, at least 8 kHz. If the * sampling rate of the audio is higher than the minimum required rate, the service down-samples * the audio to the appropriate rate. If the sampling rate of the audio is lower than the minimum * required rate, the request fails. * *

**See also:** [Audio * formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats#audio-formats). * *

### Multipart speech recognition * *

**Note:** The Watson SDKs do not support multipart speech recognition. * *

The HTTP `POST` method of the service also supports multipart speech recognition. With * multipart requests, you pass all audio data as multipart form data. You specify some parameters * as request headers and query parameters, but you pass JSON metadata as form data to control * most aspects of the transcription. You can use multipart recognition to pass multiple audio * files with a single request. * *

Use the multipart approach with browsers for which JavaScript is disabled or when the * parameters used with the request are greater than the 8 KB limit imposed by most HTTP servers * and proxies. You can encounter this limit, for example, if you want to spot a very large number * of keywords. * *

**See also:** [Making a multipart HTTP * request](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-http#HTTP-multi). * * @param recognizeOptions the {@link RecognizeOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of {@link SpeechRecognitionResults} */ public ServiceCall recognize(RecognizeOptions recognizeOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( recognizeOptions, "recognizeOptions cannot be null"); String[] pathSegments = {"v1/recognize"}; RequestBuilder builder = RequestBuilder.post(RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "recognize"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); if (recognizeOptions.contentType() != null) { builder.header("Content-Type", recognizeOptions.contentType()); } if (recognizeOptions.model() != null) { builder.query("model", recognizeOptions.model()); } if (recognizeOptions.languageCustomizationId() != null) { builder.query("language_customization_id", recognizeOptions.languageCustomizationId()); } if (recognizeOptions.acousticCustomizationId() != null) { builder.query("acoustic_customization_id", recognizeOptions.acousticCustomizationId()); } if (recognizeOptions.baseModelVersion() != null) { builder.query("base_model_version", recognizeOptions.baseModelVersion()); } if (recognizeOptions.customizationWeight() != null) { builder.query("customization_weight", String.valueOf(recognizeOptions.customizationWeight())); } if (recognizeOptions.inactivityTimeout() != null) { builder.query("inactivity_timeout", String.valueOf(recognizeOptions.inactivityTimeout())); } if (recognizeOptions.keywords() != null) { builder.query("keywords", RequestUtils.join(recognizeOptions.keywords(), ",")); } if (recognizeOptions.keywordsThreshold() != null) { builder.query("keywords_threshold", String.valueOf(recognizeOptions.keywordsThreshold())); } if (recognizeOptions.maxAlternatives() != null) { builder.query("max_alternatives", String.valueOf(recognizeOptions.maxAlternatives())); } if (recognizeOptions.wordAlternativesThreshold() != null) { builder.query( "word_alternatives_threshold", String.valueOf(recognizeOptions.wordAlternativesThreshold())); } if (recognizeOptions.wordConfidence() != null) { builder.query("word_confidence", String.valueOf(recognizeOptions.wordConfidence())); } if (recognizeOptions.timestamps() != null) { builder.query("timestamps", String.valueOf(recognizeOptions.timestamps())); } if (recognizeOptions.profanityFilter() != null) { builder.query("profanity_filter", String.valueOf(recognizeOptions.profanityFilter())); } if (recognizeOptions.smartFormatting() != null) { builder.query("smart_formatting", String.valueOf(recognizeOptions.smartFormatting())); } if (recognizeOptions.speakerLabels() != null) { builder.query("speaker_labels", String.valueOf(recognizeOptions.speakerLabels())); } if (recognizeOptions.customizationId() != null) { builder.query("customization_id", recognizeOptions.customizationId()); } if (recognizeOptions.grammarName() != null) { builder.query("grammar_name", recognizeOptions.grammarName()); } if (recognizeOptions.redaction() != null) { builder.query("redaction", String.valueOf(recognizeOptions.redaction())); } if (recognizeOptions.audioMetrics() != null) { builder.query("audio_metrics", String.valueOf(recognizeOptions.audioMetrics())); } if (recognizeOptions.endOfPhraseSilenceTime() != null) { builder.query( "end_of_phrase_silence_time", String.valueOf(recognizeOptions.endOfPhraseSilenceTime())); } if (recognizeOptions.splitTranscriptAtPhraseEnd() != null) { builder.query( "split_transcript_at_phrase_end", String.valueOf(recognizeOptions.splitTranscriptAtPhraseEnd())); } if (recognizeOptions.speechDetectorSensitivity() != null) { builder.query( "speech_detector_sensitivity", String.valueOf(recognizeOptions.speechDetectorSensitivity())); } if (recognizeOptions.backgroundAudioSuppression() != null) { builder.query( "background_audio_suppression", String.valueOf(recognizeOptions.backgroundAudioSuppression())); } builder.bodyContent(recognizeOptions.contentType(), null, null, recognizeOptions.audio()); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Sends audio and returns transcription results for recognition requests over a WebSocket * connection. Requests and responses are enabled over a single TCP connection that abstracts much * of the complexity of the request to offer efficient implementation, low latency, high * throughput, and an asynchronous response. By default, only final results are returned for any * request; to enable interim results, set the interimResults parameter to true. * *

The service imposes a data size limit of 100 MB per utterance (per recognition request). You * can send multiple utterances over a single WebSocket connection. The service automatically * detects the endianness of the incoming audio and, for audio that includes multiple channels, * downmixes the audio to one-channel mono during transcoding. (For the audio/l16 format, you can * specify the endianness.) * * @param recognizeOptions the recognize options * @param callback the {@link RecognizeCallback} instance where results will be sent * @return the {@link WebSocket} */ public WebSocket recognizeUsingWebSocket( RecognizeOptions recognizeOptions, RecognizeCallback callback) { com.ibm.cloud.sdk.core.util.Validator.notNull( recognizeOptions, "recognizeOptions cannot be null"); com.ibm.cloud.sdk.core.util.Validator.notNull(recognizeOptions.audio(), "audio cannot be null"); com.ibm.cloud.sdk.core.util.Validator.notNull(callback, "callback cannot be null"); HttpUrl.Builder urlBuilder = HttpUrl.parse(getServiceUrl() + "/v1/recognize").newBuilder(); if (recognizeOptions.model() != null) { urlBuilder.addQueryParameter("model", recognizeOptions.model()); } if (recognizeOptions.customizationId() != null) { urlBuilder.addQueryParameter("customization_id", recognizeOptions.customizationId()); } if (recognizeOptions.languageCustomizationId() != null) { urlBuilder.addQueryParameter( "language_customization_id", recognizeOptions.languageCustomizationId()); } if (recognizeOptions.acousticCustomizationId() != null) { urlBuilder.addQueryParameter( "acoustic_customization_id", recognizeOptions.acousticCustomizationId()); } if (recognizeOptions.baseModelVersion() != null) { urlBuilder.addQueryParameter("base_model_version", recognizeOptions.baseModelVersion()); } String url = urlBuilder.toString().replace("https://", "wss://"); Request.Builder builder = new Request.Builder().url(url); setAuthentication(builder); setDefaultHeaders(builder); OkHttpClient client = configureHttpClient(); return client.newWebSocket( builder.build(), new SpeechToTextWebSocketListener(recognizeOptions, callback)); } /** * Register a callback. * *

Registers a callback URL with the service for use with subsequent asynchronous recognition * requests. The service attempts to register, or white-list, the callback URL if it is not * already registered by sending a `GET` request to the callback URL. The service passes a random * alphanumeric challenge string via the `challenge_string` parameter of the request. The request * includes an `Accept` header that specifies `text/plain` as the required response type. * *

To be registered successfully, the callback URL must respond to the `GET` request from the * service. The response must send status code 200 and must include the challenge string in its * body. Set the `Content-Type` response header to `text/plain`. Upon receiving this response, the * service responds to the original registration request with response code 201. * *

The service sends only a single `GET` request to the callback URL. If the service does not * receive a reply with a response code of 200 and a body that echoes the challenge string sent by * the service within five seconds, it does not white-list the URL; it instead sends status code * 400 in response to the **Register a callback** request. If the requested callback URL is * already white-listed, the service responds to the initial registration request with response * code 200. * *

If you specify a user secret with the request, the service uses it as a key to calculate an * HMAC-SHA1 signature of the challenge string in its response to the `POST` request. It sends * this signature in the `X-Callback-Signature` header of its `GET` request to the URL during * registration. It also uses the secret to calculate a signature over the payload of every * callback notification that uses the URL. The signature provides authentication and data * integrity for HTTP communications. * *

After you successfully register a callback URL, you can use it with an indefinite number of * recognition requests. You can register a maximum of 20 callback URLS in a one-hour span of * time. * *

**See also:** [Registering a callback * URL](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#register). * * @param registerCallbackOptions the {@link RegisterCallbackOptions} containing the options for * the call * @return a {@link ServiceCall} with a response type of {@link RegisterStatus} */ public ServiceCall registerCallback( RegisterCallbackOptions registerCallbackOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( registerCallbackOptions, "registerCallbackOptions cannot be null"); String[] pathSegments = {"v1/register_callback"}; RequestBuilder builder = RequestBuilder.post(RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "registerCallback"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); builder.query("callback_url", registerCallbackOptions.callbackUrl()); if (registerCallbackOptions.userSecret() != null) { builder.query("user_secret", registerCallbackOptions.userSecret()); } ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Unregister a callback. * *

Unregisters a callback URL that was previously white-listed with a **Register a callback** * request for use with the asynchronous interface. Once unregistered, the URL can no longer be * used with asynchronous recognition requests. * *

**See also:** [Unregistering a callback * URL](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#unregister). * * @param unregisterCallbackOptions the {@link UnregisterCallbackOptions} containing the options * for the call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall unregisterCallback(UnregisterCallbackOptions unregisterCallbackOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( unregisterCallbackOptions, "unregisterCallbackOptions cannot be null"); String[] pathSegments = {"v1/unregister_callback"}; RequestBuilder builder = RequestBuilder.post(RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "unregisterCallback"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.query("callback_url", unregisterCallbackOptions.callbackUrl()); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * Create a job. * *

Creates a job for a new asynchronous recognition request. The job is owned by the instance * of the service whose credentials are used to create it. How you learn the status and results of * a job depends on the parameters you include with the job creation request: * By callback * notification: Include the `callback_url` parameter to specify a URL to which the service is to * send callback notifications when the status of the job changes. Optionally, you can also * include the `events` and `user_token` parameters to subscribe to specific events and to specify * a string that is to be included with each notification for the job. * By polling the service: * Omit the `callback_url`, `events`, and `user_token` parameters. You must then use the **Check * jobs** or **Check a job** methods to check the status of the job, using the latter to retrieve * the results when the job is complete. * *

The two approaches are not mutually exclusive. You can poll the service for job status or * obtain results from the service manually even if you include a callback URL. In both cases, you * can include the `results_ttl` parameter to specify how long the results are to remain available * after the job is complete. Using the HTTPS **Check a job** method to retrieve results is more * secure than receiving them via callback notification over HTTP because it provides * confidentiality in addition to authentication and data integrity. * *

The method supports the same basic parameters as other HTTP and WebSocket recognition * requests. It also supports the following parameters specific to the asynchronous interface: * * `callback_url` * `events` * `user_token` * `results_ttl` * *

You can pass a maximum of 1 GB and a minimum of 100 bytes of audio with a request. The * service automatically detects the endianness of the incoming audio and, for audio that includes * multiple channels, downmixes the audio to one-channel mono during transcoding. The method * returns only final results; to enable interim results, use the WebSocket API. (With the `curl` * command, use the `--data-binary` option to upload the file for the request.) * *

**See also:** [Creating a * job](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#create). * *

### Streaming mode * *

For requests to transcribe live audio as it becomes available, you must set the * `Transfer-Encoding` header to `chunked` to use streaming mode. In streaming mode, the service * closes the connection (status code 408) if it does not receive at least 15 seconds of audio * (including silence) in any 30-second period. The service also closes the connection (status * code 400) if it detects no speech for `inactivity_timeout` seconds of streaming audio; use the * `inactivity_timeout` parameter to change the default of 30 seconds. * *

**See also:** * [Audio * transmission](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#transmission) * * [Timeouts](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#timeouts) * *

### Audio formats (content types) * *

The service accepts audio in the following formats (MIME types). * For formats that are * labeled **Required**, you must use the `Content-Type` header with the request to specify the * format of the audio. * For all other formats, you can omit the `Content-Type` header or specify * `application/octet-stream` with the header to have the service automatically detect the format * of the audio. (With the `curl` command, you can specify either `"Content-Type:"` or * `"Content-Type: application/octet-stream"`.) * *

Where indicated, the format that you specify must include the sampling rate and can * optionally include the number of channels and the endianness of the audio. * `audio/alaw` * (**Required.** Specify the sampling rate (`rate`) of the audio.) * `audio/basic` (**Required.** * Use only with narrowband models.) * `audio/flac` * `audio/g729` (Use only with narrowband * models.) * `audio/l16` (**Required.** Specify the sampling rate (`rate`) and optionally the * number of channels (`channels`) and endianness (`endianness`) of the audio.) * `audio/mp3` * * `audio/mpeg` * `audio/mulaw` (**Required.** Specify the sampling rate (`rate`) of the audio.) * * `audio/ogg` (The service automatically detects the codec of the input audio.) * * `audio/ogg;codecs=opus` * `audio/ogg;codecs=vorbis` * `audio/wav` (Provide audio with a maximum * of nine channels.) * `audio/webm` (The service automatically detects the codec of the input * audio.) * `audio/webm;codecs=opus` * `audio/webm;codecs=vorbis` * *

The sampling rate of the audio must match the sampling rate of the model for the recognition * request: for broadband models, at least 16 kHz; for narrowband models, at least 8 kHz. If the * sampling rate of the audio is higher than the minimum required rate, the service down-samples * the audio to the appropriate rate. If the sampling rate of the audio is lower than the minimum * required rate, the request fails. * *

**See also:** [Audio * formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats#audio-formats). * * @param createJobOptions the {@link CreateJobOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of {@link RecognitionJob} */ public ServiceCall createJob(CreateJobOptions createJobOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( createJobOptions, "createJobOptions cannot be null"); String[] pathSegments = {"v1/recognitions"}; RequestBuilder builder = RequestBuilder.post(RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "createJob"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); if (createJobOptions.contentType() != null) { builder.header("Content-Type", createJobOptions.contentType()); } if (createJobOptions.model() != null) { builder.query("model", createJobOptions.model()); } if (createJobOptions.callbackUrl() != null) { builder.query("callback_url", createJobOptions.callbackUrl()); } if (createJobOptions.events() != null) { builder.query("events", createJobOptions.events()); } if (createJobOptions.userToken() != null) { builder.query("user_token", createJobOptions.userToken()); } if (createJobOptions.resultsTtl() != null) { builder.query("results_ttl", String.valueOf(createJobOptions.resultsTtl())); } if (createJobOptions.languageCustomizationId() != null) { builder.query("language_customization_id", createJobOptions.languageCustomizationId()); } if (createJobOptions.acousticCustomizationId() != null) { builder.query("acoustic_customization_id", createJobOptions.acousticCustomizationId()); } if (createJobOptions.baseModelVersion() != null) { builder.query("base_model_version", createJobOptions.baseModelVersion()); } if (createJobOptions.customizationWeight() != null) { builder.query("customization_weight", String.valueOf(createJobOptions.customizationWeight())); } if (createJobOptions.inactivityTimeout() != null) { builder.query("inactivity_timeout", String.valueOf(createJobOptions.inactivityTimeout())); } if (createJobOptions.keywords() != null) { builder.query("keywords", RequestUtils.join(createJobOptions.keywords(), ",")); } if (createJobOptions.keywordsThreshold() != null) { builder.query("keywords_threshold", String.valueOf(createJobOptions.keywordsThreshold())); } if (createJobOptions.maxAlternatives() != null) { builder.query("max_alternatives", String.valueOf(createJobOptions.maxAlternatives())); } if (createJobOptions.wordAlternativesThreshold() != null) { builder.query( "word_alternatives_threshold", String.valueOf(createJobOptions.wordAlternativesThreshold())); } if (createJobOptions.wordConfidence() != null) { builder.query("word_confidence", String.valueOf(createJobOptions.wordConfidence())); } if (createJobOptions.timestamps() != null) { builder.query("timestamps", String.valueOf(createJobOptions.timestamps())); } if (createJobOptions.profanityFilter() != null) { builder.query("profanity_filter", String.valueOf(createJobOptions.profanityFilter())); } if (createJobOptions.smartFormatting() != null) { builder.query("smart_formatting", String.valueOf(createJobOptions.smartFormatting())); } if (createJobOptions.speakerLabels() != null) { builder.query("speaker_labels", String.valueOf(createJobOptions.speakerLabels())); } if (createJobOptions.customizationId() != null) { builder.query("customization_id", createJobOptions.customizationId()); } if (createJobOptions.grammarName() != null) { builder.query("grammar_name", createJobOptions.grammarName()); } if (createJobOptions.redaction() != null) { builder.query("redaction", String.valueOf(createJobOptions.redaction())); } if (createJobOptions.processingMetrics() != null) { builder.query("processing_metrics", String.valueOf(createJobOptions.processingMetrics())); } if (createJobOptions.processingMetricsInterval() != null) { builder.query( "processing_metrics_interval", String.valueOf(createJobOptions.processingMetricsInterval())); } if (createJobOptions.audioMetrics() != null) { builder.query("audio_metrics", String.valueOf(createJobOptions.audioMetrics())); } if (createJobOptions.endOfPhraseSilenceTime() != null) { builder.query( "end_of_phrase_silence_time", String.valueOf(createJobOptions.endOfPhraseSilenceTime())); } if (createJobOptions.splitTranscriptAtPhraseEnd() != null) { builder.query( "split_transcript_at_phrase_end", String.valueOf(createJobOptions.splitTranscriptAtPhraseEnd())); } if (createJobOptions.speechDetectorSensitivity() != null) { builder.query( "speech_detector_sensitivity", String.valueOf(createJobOptions.speechDetectorSensitivity())); } if (createJobOptions.backgroundAudioSuppression() != null) { builder.query( "background_audio_suppression", String.valueOf(createJobOptions.backgroundAudioSuppression())); } builder.bodyContent(createJobOptions.contentType(), null, null, createJobOptions.audio()); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Check jobs. * *

Returns the ID and status of the latest 100 outstanding jobs associated with the credentials * with which it is called. The method also returns the creation and update times of each job, * and, if a job was created with a callback URL and a user token, the user token for the job. To * obtain the results for a job whose status is `completed` or not one of the latest 100 * outstanding jobs, use the **Check a job** method. A job and its results remain available until * you delete them with the **Delete a job** method or until the job's time to live expires, * whichever comes first. * *

**See also:** [Checking the status of the latest * jobs](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#jobs). * * @param checkJobsOptions the {@link CheckJobsOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of {@link RecognitionJobs} */ public ServiceCall checkJobs(CheckJobsOptions checkJobsOptions) { String[] pathSegments = {"v1/recognitions"}; RequestBuilder builder = RequestBuilder.get(RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "checkJobs"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); if (checkJobsOptions != null) {} ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Check jobs. * *

Returns the ID and status of the latest 100 outstanding jobs associated with the credentials * with which it is called. The method also returns the creation and update times of each job, * and, if a job was created with a callback URL and a user token, the user token for the job. To * obtain the results for a job whose status is `completed` or not one of the latest 100 * outstanding jobs, use the **Check a job** method. A job and its results remain available until * you delete them with the **Delete a job** method or until the job's time to live expires, * whichever comes first. * *

**See also:** [Checking the status of the latest * jobs](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#jobs). * * @return a {@link ServiceCall} with a response type of {@link RecognitionJobs} */ public ServiceCall checkJobs() { return checkJobs(null); } /** * Check a job. * *

Returns information about the specified job. The response always includes the status of the * job and its creation and update times. If the status is `completed`, the response includes the * results of the recognition request. You must use credentials for the instance of the service * that owns a job to list information about it. * *

You can use the method to retrieve the results of any job, regardless of whether it was * submitted with a callback URL and the `recognitions.completed_with_results` event, and you can * retrieve the results multiple times for as long as they remain available. Use the **Check * jobs** method to request information about the most recent jobs associated with the calling * credentials. * *

**See also:** [Checking the status and retrieving the results of a * job](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#job). * * @param checkJobOptions the {@link CheckJobOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of {@link RecognitionJob} */ public ServiceCall checkJob(CheckJobOptions checkJobOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( checkJobOptions, "checkJobOptions cannot be null"); String[] pathSegments = {"v1/recognitions"}; String[] pathParameters = {checkJobOptions.id()}; RequestBuilder builder = RequestBuilder.get( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "checkJob"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Delete a job. * *

Deletes the specified job. You cannot delete a job that the service is actively processing. * Once you delete a job, its results are no longer available. The service automatically deletes a * job and its results when the time to live for the results expires. You must use credentials for * the instance of the service that owns a job to delete it. * *

**See also:** [Deleting a * job](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#delete-async). * * @param deleteJobOptions the {@link DeleteJobOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall deleteJob(DeleteJobOptions deleteJobOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( deleteJobOptions, "deleteJobOptions cannot be null"); String[] pathSegments = {"v1/recognitions"}; String[] pathParameters = {deleteJobOptions.id()}; RequestBuilder builder = RequestBuilder.delete( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "deleteJob"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * Create a custom language model. * *

Creates a new custom language model for a specified base model. The custom language model * can be used only with the base model for which it is created. The model is owned by the * instance of the service whose credentials are used to create it. * *

You can create a maximum of 1024 custom language models per owning credentials. The service * returns an error if you attempt to create more than 1024 models. You do not lose any models, * but you cannot create any more until your model count is below the limit. * *

**See also:** [Create a custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#createModel-language). * * @param createLanguageModelOptions the {@link CreateLanguageModelOptions} containing the options * for the call * @return a {@link ServiceCall} with a response type of {@link LanguageModel} */ public ServiceCall createLanguageModel( CreateLanguageModelOptions createLanguageModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( createLanguageModelOptions, "createLanguageModelOptions cannot be null"); String[] pathSegments = {"v1/customizations"}; RequestBuilder builder = RequestBuilder.post(RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "createLanguageModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); final JsonObject contentJson = new JsonObject(); contentJson.addProperty("name", createLanguageModelOptions.name()); contentJson.addProperty("base_model_name", createLanguageModelOptions.baseModelName()); if (createLanguageModelOptions.dialect() != null) { contentJson.addProperty("dialect", createLanguageModelOptions.dialect()); } if (createLanguageModelOptions.description() != null) { contentJson.addProperty("description", createLanguageModelOptions.description()); } builder.bodyJson(contentJson); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * List custom language models. * *

Lists information about all custom language models that are owned by an instance of the * service. Use the `language` parameter to see all custom language models for the specified * language. Omit the parameter to see all custom language models for all languages. You must use * credentials for the instance of the service that owns a model to list information about it. * *

**See also:** [Listing custom language * models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#listModels-language). * * @param listLanguageModelsOptions the {@link ListLanguageModelsOptions} containing the options * for the call * @return a {@link ServiceCall} with a response type of {@link LanguageModels} */ public ServiceCall listLanguageModels( ListLanguageModelsOptions listLanguageModelsOptions) { String[] pathSegments = {"v1/customizations"}; RequestBuilder builder = RequestBuilder.get(RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "listLanguageModels"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); if (listLanguageModelsOptions != null) { if (listLanguageModelsOptions.language() != null) { builder.query("language", listLanguageModelsOptions.language()); } } ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * List custom language models. * *

Lists information about all custom language models that are owned by an instance of the * service. Use the `language` parameter to see all custom language models for the specified * language. Omit the parameter to see all custom language models for all languages. You must use * credentials for the instance of the service that owns a model to list information about it. * *

**See also:** [Listing custom language * models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#listModels-language). * * @return a {@link ServiceCall} with a response type of {@link LanguageModels} */ public ServiceCall listLanguageModels() { return listLanguageModels(null); } /** * Get a custom language model. * *

Gets information about a specified custom language model. You must use credentials for the * instance of the service that owns a model to list information about it. * *

**See also:** [Listing custom language * models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#listModels-language). * * @param getLanguageModelOptions the {@link GetLanguageModelOptions} containing the options for * the call * @return a {@link ServiceCall} with a response type of {@link LanguageModel} */ public ServiceCall getLanguageModel( GetLanguageModelOptions getLanguageModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( getLanguageModelOptions, "getLanguageModelOptions cannot be null"); String[] pathSegments = {"v1/customizations"}; String[] pathParameters = {getLanguageModelOptions.customizationId()}; RequestBuilder builder = RequestBuilder.get( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "getLanguageModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Delete a custom language model. * *

Deletes an existing custom language model. The custom model cannot be deleted if another * request, such as adding a corpus or grammar to the model, is currently being processed. You * must use credentials for the instance of the service that owns a model to delete it. * *

**See also:** [Deleting a custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#deleteModel-language). * * @param deleteLanguageModelOptions the {@link DeleteLanguageModelOptions} containing the options * for the call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall deleteLanguageModel( DeleteLanguageModelOptions deleteLanguageModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( deleteLanguageModelOptions, "deleteLanguageModelOptions cannot be null"); String[] pathSegments = {"v1/customizations"}; String[] pathParameters = {deleteLanguageModelOptions.customizationId()}; RequestBuilder builder = RequestBuilder.delete( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "deleteLanguageModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * Train a custom language model. * *

Initiates the training of a custom language model with new resources such as corpora, * grammars, and custom words. After adding, modifying, or deleting resources for a custom * language model, use this method to begin the actual training of the model on the latest data. * You can specify whether the custom language model is to be trained with all words from its * words resource or only with words that were added or modified by the user directly. You must * use credentials for the instance of the service that owns a model to train it. * *

The training method is asynchronous. It can take on the order of minutes to complete * depending on the amount of data on which the service is being trained and the current load on * the service. The method returns an HTTP 200 response code to indicate that the training process * has begun. * *

You can monitor the status of the training by using the **Get a custom language model** * method to poll the model's status. Use a loop to check the status every 10 seconds. The method * returns a `LanguageModel` object that includes `status` and `progress` fields. A status of * `available` means that the custom model is trained and ready to use. The service cannot accept * subsequent training requests or requests to add new resources until the existing request * completes. * *

**See also:** [Train the custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#trainModel-language). * *

### Training failures * *

Training can fail to start for the following reasons: * The service is currently handling * another request for the custom model, such as another training request or a request to add a * corpus or grammar to the model. * No training data have been added to the custom model. * The * custom model contains one or more invalid corpora, grammars, or words (for example, a custom * word has an invalid sounds-like pronunciation). You can correct the invalid resources or set * the `strict` parameter to `false` to exclude the invalid resources from the training. The model * must contain at least one valid resource for training to succeed. * * @param trainLanguageModelOptions the {@link TrainLanguageModelOptions} containing the options * for the call * @return a {@link ServiceCall} with a response type of {@link TrainingResponse} */ public ServiceCall trainLanguageModel( TrainLanguageModelOptions trainLanguageModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( trainLanguageModelOptions, "trainLanguageModelOptions cannot be null"); String[] pathSegments = {"v1/customizations", "train"}; String[] pathParameters = {trainLanguageModelOptions.customizationId()}; RequestBuilder builder = RequestBuilder.post( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "trainLanguageModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); if (trainLanguageModelOptions.wordTypeToAdd() != null) { builder.query("word_type_to_add", trainLanguageModelOptions.wordTypeToAdd()); } if (trainLanguageModelOptions.customizationWeight() != null) { builder.query( "customization_weight", String.valueOf(trainLanguageModelOptions.customizationWeight())); } ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Reset a custom language model. * *

Resets a custom language model by removing all corpora, grammars, and words from the model. * Resetting a custom language model initializes the model to its state when it was first created. * Metadata such as the name and language of the model are preserved, but the model's words * resource is removed and must be re-created. You must use credentials for the instance of the * service that owns a model to reset it. * *

**See also:** [Resetting a custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageLanguageModels#resetModel-language). * * @param resetLanguageModelOptions the {@link ResetLanguageModelOptions} containing the options * for the call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall resetLanguageModel(ResetLanguageModelOptions resetLanguageModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( resetLanguageModelOptions, "resetLanguageModelOptions cannot be null"); String[] pathSegments = {"v1/customizations", "reset"}; String[] pathParameters = {resetLanguageModelOptions.customizationId()}; RequestBuilder builder = RequestBuilder.post( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "resetLanguageModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * Upgrade a custom language model. * *

Initiates the upgrade of a custom language model to the latest version of its base language * model. The upgrade method is asynchronous. It can take on the order of minutes to complete * depending on the amount of data in the custom model and the current load on the service. A * custom model must be in the `ready` or `available` state to be upgraded. You must use * credentials for the instance of the service that owns a model to upgrade it. * *

The method returns an HTTP 200 response code to indicate that the upgrade process has begun * successfully. You can monitor the status of the upgrade by using the **Get a custom language * model** method to poll the model's status. The method returns a `LanguageModel` object that * includes `status` and `progress` fields. Use a loop to check the status every 10 seconds. While * it is being upgraded, the custom model has the status `upgrading`. When the upgrade is * complete, the model resumes the status that it had prior to upgrade. The service cannot accept * subsequent requests for the model until the upgrade completes. * *

**See also:** [Upgrading a custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customUpgrade#upgradeLanguage). * * @param upgradeLanguageModelOptions the {@link UpgradeLanguageModelOptions} containing the * options for the call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall upgradeLanguageModel( UpgradeLanguageModelOptions upgradeLanguageModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( upgradeLanguageModelOptions, "upgradeLanguageModelOptions cannot be null"); String[] pathSegments = {"v1/customizations", "upgrade_model"}; String[] pathParameters = {upgradeLanguageModelOptions.customizationId()}; RequestBuilder builder = RequestBuilder.post( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "upgradeLanguageModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * List corpora. * *

Lists information about all corpora from a custom language model. The information includes * the total number of words and out-of-vocabulary (OOV) words, name, and status of each corpus. * You must use credentials for the instance of the service that owns a model to list its corpora. * *

**See also:** [Listing corpora for a custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageCorpora#listCorpora). * * @param listCorporaOptions the {@link ListCorporaOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of {@link Corpora} */ public ServiceCall listCorpora(ListCorporaOptions listCorporaOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( listCorporaOptions, "listCorporaOptions cannot be null"); String[] pathSegments = {"v1/customizations", "corpora"}; String[] pathParameters = {listCorporaOptions.customizationId()}; RequestBuilder builder = RequestBuilder.get( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "listCorpora"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Add a corpus. * *

Adds a single corpus text file of new training data to a custom language model. Use multiple * requests to submit multiple corpus text files. You must use credentials for the instance of the * service that owns a model to add a corpus to it. Adding a corpus does not affect the custom * language model until you train the model for the new data by using the **Train a custom * language model** method. * *

Submit a plain text file that contains sample sentences from the domain of interest to * enable the service to extract words in context. The more sentences you add that represent the * context in which speakers use words from the domain, the better the service's recognition * accuracy. * *

The call returns an HTTP 201 response code if the corpus is valid. The service then * asynchronously processes the contents of the corpus and automatically extracts new words that * it finds. This operation can take on the order of minutes to complete depending on the total * number of words and the number of new words in the corpus, as well as the current load on the * service. You cannot submit requests to add additional resources to the custom model or to train * the model until the service's analysis of the corpus for the current request completes. Use the * **List a corpus** method to check the status of the analysis. * *

The service auto-populates the model's words resource with words from the corpus that are * not found in its base vocabulary. These words are referred to as out-of-vocabulary (OOV) words. * After adding a corpus, you must validate the words resource to ensure that each OOV word's * definition is complete and valid. You can use the **List custom words** method to examine the * words resource. You can use other words method to eliminate typos and modify how words are * pronounced as needed. * *

To add a corpus file that has the same name as an existing corpus, set the `allow_overwrite` * parameter to `true`; otherwise, the request fails. Overwriting an existing corpus causes the * service to process the corpus text file and extract OOV words anew. Before doing so, it removes * any OOV words associated with the existing corpus from the model's words resource unless they * were also added by another corpus or grammar, or they have been modified in some way with the * **Add custom words** or **Add a custom word** method. * *

The service limits the overall amount of data that you can add to a custom model to a * maximum of 10 million total words from all sources combined. Also, you can add no more than 90 * thousand custom (OOV) words to a model. This includes words that the service extracts from * corpora and grammars, and words that you add directly. * *

**See also:** * [Add a corpus to the custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addCorpus) * * [Working with * corpora](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingCorpora) * * [Validating a words * resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel). * * @param addCorpusOptions the {@link AddCorpusOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall addCorpus(AddCorpusOptions addCorpusOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( addCorpusOptions, "addCorpusOptions cannot be null"); String[] pathSegments = {"v1/customizations", "corpora"}; String[] pathParameters = {addCorpusOptions.customizationId(), addCorpusOptions.corpusName()}; RequestBuilder builder = RequestBuilder.post( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "addCorpus"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); if (addCorpusOptions.allowOverwrite() != null) { builder.query("allow_overwrite", String.valueOf(addCorpusOptions.allowOverwrite())); } builder.body(RequestUtils.inputStreamBody(addCorpusOptions.corpusFile(), "text/plain")); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * Get a corpus. * *

Gets information about a corpus from a custom language model. The information includes the * total number of words and out-of-vocabulary (OOV) words, name, and status of the corpus. You * must use credentials for the instance of the service that owns a model to list its corpora. * *

**See also:** [Listing corpora for a custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageCorpora#listCorpora). * * @param getCorpusOptions the {@link GetCorpusOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of {@link Corpus} */ public ServiceCall getCorpus(GetCorpusOptions getCorpusOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( getCorpusOptions, "getCorpusOptions cannot be null"); String[] pathSegments = {"v1/customizations", "corpora"}; String[] pathParameters = {getCorpusOptions.customizationId(), getCorpusOptions.corpusName()}; RequestBuilder builder = RequestBuilder.get( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "getCorpus"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Delete a corpus. * *

Deletes an existing corpus from a custom language model. The service removes any * out-of-vocabulary (OOV) words that are associated with the corpus from the custom model's words * resource unless they were also added by another corpus or grammar, or they were modified in * some way with the **Add custom words** or **Add a custom word** method. Removing a corpus does * not affect the custom model until you train the model with the **Train a custom language * model** method. You must use credentials for the instance of the service that owns a model to * delete its corpora. * *

**See also:** [Deleting a corpus from a custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageCorpora#deleteCorpus). * * @param deleteCorpusOptions the {@link DeleteCorpusOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall deleteCorpus(DeleteCorpusOptions deleteCorpusOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( deleteCorpusOptions, "deleteCorpusOptions cannot be null"); String[] pathSegments = {"v1/customizations", "corpora"}; String[] pathParameters = { deleteCorpusOptions.customizationId(), deleteCorpusOptions.corpusName() }; RequestBuilder builder = RequestBuilder.delete( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "deleteCorpus"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * List custom words. * *

Lists information about custom words from a custom language model. You can list all words * from the custom model's words resource, only custom words that were added or modified by the * user, or only out-of-vocabulary (OOV) words that were extracted from corpora or are recognized * by grammars. You can also indicate the order in which the service is to return words; by * default, the service lists words in ascending alphabetical order. You must use credentials for * the instance of the service that owns a model to list information about its words. * *

**See also:** [Listing words from a custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageWords#listWords). * * @param listWordsOptions the {@link ListWordsOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of {@link Words} */ public ServiceCall listWords(ListWordsOptions listWordsOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( listWordsOptions, "listWordsOptions cannot be null"); String[] pathSegments = {"v1/customizations", "words"}; String[] pathParameters = {listWordsOptions.customizationId()}; RequestBuilder builder = RequestBuilder.get( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "listWords"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); if (listWordsOptions.wordType() != null) { builder.query("word_type", listWordsOptions.wordType()); } if (listWordsOptions.sort() != null) { builder.query("sort", listWordsOptions.sort()); } ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Add custom words. * *

Adds one or more custom words to a custom language model. The service populates the words * resource for a custom model with out-of-vocabulary (OOV) words from each corpus or grammar that * is added to the model. You can use this method to add additional words or to modify existing * words in the words resource. The words resource for a model can contain a maximum of 90 * thousand custom (OOV) words. This includes words that the service extracts from corpora and * grammars and words that you add directly. * *

You must use credentials for the instance of the service that owns a model to add or modify * custom words for the model. Adding or modifying custom words does not affect the custom model * until you train the model for the new data by using the **Train a custom language model** * method. * *

You add custom words by providing a `CustomWords` object, which is an array of `CustomWord` * objects, one per word. You must use the object's `word` parameter to identify the word that is * to be added. You can also provide one or both of the optional `sounds_like` and `display_as` * fields for each word. * The `sounds_like` field provides an array of one or more pronunciations * for the word. Use the parameter to specify how the word can be pronounced by users. Use the * parameter for words that are difficult to pronounce, foreign words, acronyms, and so on. For * example, you might specify that the word `IEEE` can sound like `i triple e`. You can specify a * maximum of five sounds-like pronunciations for a word. If you omit the `sounds_like` field, the * service attempts to set the field to its pronunciation of the word. It cannot generate a * pronunciation for all words, so you must review the word's definition to ensure that it is * complete and valid. * The `display_as` field provides a different way of spelling the word in a * transcript. Use the parameter when you want the word to appear different from its usual * representation or from its spelling in training data. For example, you might indicate that the * word `IBM(trademark)` is to be displayed as `IBM™`. * *

If you add a custom word that already exists in the words resource for the custom model, the * new definition overwrites the existing data for the word. If the service encounters an error * with the input data, it returns a failure code and does not add any of the words to the words * resource. * *

The call returns an HTTP 201 response code if the input data is valid. It then * asynchronously processes the words to add them to the model's words resource. The time that it * takes for the analysis to complete depends on the number of new words that you add but is * generally faster than adding a corpus or grammar. * *

You can monitor the status of the request by using the **List a custom language model** * method to poll the model's status. Use a loop to check the status every 10 seconds. The method * returns a `Customization` object that includes a `status` field. A status of `ready` means that * the words have been added to the custom model. The service cannot accept requests to add new * data or to train the model until the existing request completes. * *

You can use the **List custom words** or **List a custom word** method to review the words * that you add. Words with an invalid `sounds_like` field include an `error` field that describes * the problem. You can use other words-related methods to correct errors, eliminate typos, and * modify how words are pronounced as needed. * *

**See also:** * [Add words to the custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords) * * [Working with custom * words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords) * * [Validating a words * resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel). * * @param addWordsOptions the {@link AddWordsOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall addWords(AddWordsOptions addWordsOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( addWordsOptions, "addWordsOptions cannot be null"); String[] pathSegments = {"v1/customizations", "words"}; String[] pathParameters = {addWordsOptions.customizationId()}; RequestBuilder builder = RequestBuilder.post( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "addWords"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); final JsonObject contentJson = new JsonObject(); contentJson.add( "words", com.ibm.cloud.sdk.core.util.GsonSingleton.getGson().toJsonTree(addWordsOptions.words())); builder.bodyJson(contentJson); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * Add a custom word. * *

Adds a custom word to a custom language model. The service populates the words resource for * a custom model with out-of-vocabulary (OOV) words from each corpus or grammar that is added to * the model. You can use this method to add a word or to modify an existing word in the words * resource. The words resource for a model can contain a maximum of 90 thousand custom (OOV) * words. This includes words that the service extracts from corpora and grammars and words that * you add directly. * *

You must use credentials for the instance of the service that owns a model to add or modify * a custom word for the model. Adding or modifying a custom word does not affect the custom model * until you train the model for the new data by using the **Train a custom language model** * method. * *

Use the `word_name` parameter to specify the custom word that is to be added or modified. * Use the `CustomWord` object to provide one or both of the optional `sounds_like` and * `display_as` fields for the word. * The `sounds_like` field provides an array of one or more * pronunciations for the word. Use the parameter to specify how the word can be pronounced by * users. Use the parameter for words that are difficult to pronounce, foreign words, acronyms, * and so on. For example, you might specify that the word `IEEE` can sound like `i triple e`. You * can specify a maximum of five sounds-like pronunciations for a word. If you omit the * `sounds_like` field, the service attempts to set the field to its pronunciation of the word. It * cannot generate a pronunciation for all words, so you must review the word's definition to * ensure that it is complete and valid. * The `display_as` field provides a different way of * spelling the word in a transcript. Use the parameter when you want the word to appear different * from its usual representation or from its spelling in training data. For example, you might * indicate that the word `IBM(trademark)` is to be displayed as `IBM™`. * *

If you add a custom word that already exists in the words resource for the custom model, the * new definition overwrites the existing data for the word. If the service encounters an error, * it does not add the word to the words resource. Use the **List a custom word** method to review * the word that you add. * *

**See also:** * [Add words to the custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#addWords) * * [Working with custom * words](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#workingWords) * * [Validating a words * resource](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#validateModel). * * @param addWordOptions the {@link AddWordOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall addWord(AddWordOptions addWordOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull(addWordOptions, "addWordOptions cannot be null"); String[] pathSegments = {"v1/customizations", "words"}; String[] pathParameters = {addWordOptions.customizationId(), addWordOptions.wordName()}; RequestBuilder builder = RequestBuilder.put( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "addWord"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); final JsonObject contentJson = new JsonObject(); if (addWordOptions.word() != null) { contentJson.addProperty("word", addWordOptions.word()); } if (addWordOptions.soundsLike() != null) { contentJson.add( "sounds_like", com.ibm.cloud.sdk.core.util.GsonSingleton.getGson() .toJsonTree(addWordOptions.soundsLike())); } if (addWordOptions.displayAs() != null) { contentJson.addProperty("display_as", addWordOptions.displayAs()); } builder.bodyJson(contentJson); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * Get a custom word. * *

Gets information about a custom word from a custom language model. You must use credentials * for the instance of the service that owns a model to list information about its words. * *

**See also:** [Listing words from a custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageWords#listWords). * * @param getWordOptions the {@link GetWordOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of {@link Word} */ public ServiceCall getWord(GetWordOptions getWordOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull(getWordOptions, "getWordOptions cannot be null"); String[] pathSegments = {"v1/customizations", "words"}; String[] pathParameters = {getWordOptions.customizationId(), getWordOptions.wordName()}; RequestBuilder builder = RequestBuilder.get( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "getWord"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue(new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Delete a custom word. * *

Deletes a custom word from a custom language model. You can remove any word that you added * to the custom model's words resource via any means. However, if the word also exists in the * service's base vocabulary, the service removes only the custom pronunciation for the word; the * word remains in the base vocabulary. Removing a custom word does not affect the custom model * until you train the model with the **Train a custom language model** method. You must use * credentials for the instance of the service that owns a model to delete its words. * *

**See also:** [Deleting a word from a custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageWords#deleteWord). * * @param deleteWordOptions the {@link DeleteWordOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall deleteWord(DeleteWordOptions deleteWordOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( deleteWordOptions, "deleteWordOptions cannot be null"); String[] pathSegments = {"v1/customizations", "words"}; String[] pathParameters = {deleteWordOptions.customizationId(), deleteWordOptions.wordName()}; RequestBuilder builder = RequestBuilder.delete( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "deleteWord"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * List grammars. * *

Lists information about all grammars from a custom language model. The information includes * the total number of out-of-vocabulary (OOV) words, name, and status of each grammar. You must * use credentials for the instance of the service that owns a model to list its grammars. * *

**See also:** [Listing grammars from a custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageGrammars#listGrammars). * * @param listGrammarsOptions the {@link ListGrammarsOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of {@link Grammars} */ public ServiceCall listGrammars(ListGrammarsOptions listGrammarsOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( listGrammarsOptions, "listGrammarsOptions cannot be null"); String[] pathSegments = {"v1/customizations", "grammars"}; String[] pathParameters = {listGrammarsOptions.customizationId()}; RequestBuilder builder = RequestBuilder.get( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "listGrammars"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Add a grammar. * *

Adds a single grammar file to a custom language model. Submit a plain text file in UTF-8 * format that defines the grammar. Use multiple requests to submit multiple grammar files. You * must use credentials for the instance of the service that owns a model to add a grammar to it. * Adding a grammar does not affect the custom language model until you train the model for the * new data by using the **Train a custom language model** method. * *

The call returns an HTTP 201 response code if the grammar is valid. The service then * asynchronously processes the contents of the grammar and automatically extracts new words that * it finds. This operation can take a few seconds or minutes to complete depending on the size * and complexity of the grammar, as well as the current load on the service. You cannot submit * requests to add additional resources to the custom model or to train the model until the * service's analysis of the grammar for the current request completes. Use the **Get a grammar** * method to check the status of the analysis. * *

The service populates the model's words resource with any word that is recognized by the * grammar that is not found in the model's base vocabulary. These are referred to as * out-of-vocabulary (OOV) words. You can use the **List custom words** method to examine the * words resource and use other words-related methods to eliminate typos and modify how words are * pronounced as needed. * *

To add a grammar that has the same name as an existing grammar, set the `allow_overwrite` * parameter to `true`; otherwise, the request fails. Overwriting an existing grammar causes the * service to process the grammar file and extract OOV words anew. Before doing so, it removes any * OOV words associated with the existing grammar from the model's words resource unless they were * also added by another resource or they have been modified in some way with the **Add custom * words** or **Add a custom word** method. * *

The service limits the overall amount of data that you can add to a custom model to a * maximum of 10 million total words from all sources combined. Also, you can add no more than 90 * thousand OOV words to a model. This includes words that the service extracts from corpora and * grammars and words that you add directly. * *

**See also:** * [Understanding * grammars](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-grammarUnderstand#grammarUnderstand) * * [Add a grammar to the custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-grammarAdd#addGrammar). * * @param addGrammarOptions the {@link AddGrammarOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall addGrammar(AddGrammarOptions addGrammarOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( addGrammarOptions, "addGrammarOptions cannot be null"); String[] pathSegments = {"v1/customizations", "grammars"}; String[] pathParameters = { addGrammarOptions.customizationId(), addGrammarOptions.grammarName() }; RequestBuilder builder = RequestBuilder.post( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "addGrammar"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); builder.header("Content-Type", addGrammarOptions.contentType()); if (addGrammarOptions.allowOverwrite() != null) { builder.query("allow_overwrite", String.valueOf(addGrammarOptions.allowOverwrite())); } builder.bodyContent( addGrammarOptions.contentType(), null, null, addGrammarOptions.grammarFile()); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * Get a grammar. * *

Gets information about a grammar from a custom language model. The information includes the * total number of out-of-vocabulary (OOV) words, name, and status of the grammar. You must use * credentials for the instance of the service that owns a model to list its grammars. * *

**See also:** [Listing grammars from a custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageGrammars#listGrammars). * * @param getGrammarOptions the {@link GetGrammarOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of {@link Grammar} */ public ServiceCall getGrammar(GetGrammarOptions getGrammarOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( getGrammarOptions, "getGrammarOptions cannot be null"); String[] pathSegments = {"v1/customizations", "grammars"}; String[] pathParameters = { getGrammarOptions.customizationId(), getGrammarOptions.grammarName() }; RequestBuilder builder = RequestBuilder.get( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "getGrammar"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Delete a grammar. * *

Deletes an existing grammar from a custom language model. The service removes any * out-of-vocabulary (OOV) words associated with the grammar from the custom model's words * resource unless they were also added by another resource or they were modified in some way with * the **Add custom words** or **Add a custom word** method. Removing a grammar does not affect * the custom model until you train the model with the **Train a custom language model** method. * You must use credentials for the instance of the service that owns a model to delete its * grammar. * *

**See also:** [Deleting a grammar from a custom language * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageGrammars#deleteGrammar). * * @param deleteGrammarOptions the {@link DeleteGrammarOptions} containing the options for the * call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall deleteGrammar(DeleteGrammarOptions deleteGrammarOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( deleteGrammarOptions, "deleteGrammarOptions cannot be null"); String[] pathSegments = {"v1/customizations", "grammars"}; String[] pathParameters = { deleteGrammarOptions.customizationId(), deleteGrammarOptions.grammarName() }; RequestBuilder builder = RequestBuilder.delete( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "deleteGrammar"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * Create a custom acoustic model. * *

Creates a new custom acoustic model for a specified base model. The custom acoustic model * can be used only with the base model for which it is created. The model is owned by the * instance of the service whose credentials are used to create it. * *

You can create a maximum of 1024 custom acoustic models per owning credentials. The service * returns an error if you attempt to create more than 1024 models. You do not lose any models, * but you cannot create any more until your model count is below the limit. * *

**See also:** [Create a custom acoustic * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-acoustic#createModel-acoustic). * * @param createAcousticModelOptions the {@link CreateAcousticModelOptions} containing the options * for the call * @return a {@link ServiceCall} with a response type of {@link AcousticModel} */ public ServiceCall createAcousticModel( CreateAcousticModelOptions createAcousticModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( createAcousticModelOptions, "createAcousticModelOptions cannot be null"); String[] pathSegments = {"v1/acoustic_customizations"}; RequestBuilder builder = RequestBuilder.post(RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "createAcousticModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); final JsonObject contentJson = new JsonObject(); contentJson.addProperty("name", createAcousticModelOptions.name()); contentJson.addProperty("base_model_name", createAcousticModelOptions.baseModelName()); if (createAcousticModelOptions.description() != null) { contentJson.addProperty("description", createAcousticModelOptions.description()); } builder.bodyJson(contentJson); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * List custom acoustic models. * *

Lists information about all custom acoustic models that are owned by an instance of the * service. Use the `language` parameter to see all custom acoustic models for the specified * language. Omit the parameter to see all custom acoustic models for all languages. You must use * credentials for the instance of the service that owns a model to list information about it. * *

**See also:** [Listing custom acoustic * models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#listModels-acoustic). * * @param listAcousticModelsOptions the {@link ListAcousticModelsOptions} containing the options * for the call * @return a {@link ServiceCall} with a response type of {@link AcousticModels} */ public ServiceCall listAcousticModels( ListAcousticModelsOptions listAcousticModelsOptions) { String[] pathSegments = {"v1/acoustic_customizations"}; RequestBuilder builder = RequestBuilder.get(RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "listAcousticModels"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); if (listAcousticModelsOptions != null) { if (listAcousticModelsOptions.language() != null) { builder.query("language", listAcousticModelsOptions.language()); } } ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * List custom acoustic models. * *

Lists information about all custom acoustic models that are owned by an instance of the * service. Use the `language` parameter to see all custom acoustic models for the specified * language. Omit the parameter to see all custom acoustic models for all languages. You must use * credentials for the instance of the service that owns a model to list information about it. * *

**See also:** [Listing custom acoustic * models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#listModels-acoustic). * * @return a {@link ServiceCall} with a response type of {@link AcousticModels} */ public ServiceCall listAcousticModels() { return listAcousticModels(null); } /** * Get a custom acoustic model. * *

Gets information about a specified custom acoustic model. You must use credentials for the * instance of the service that owns a model to list information about it. * *

**See also:** [Listing custom acoustic * models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#listModels-acoustic). * * @param getAcousticModelOptions the {@link GetAcousticModelOptions} containing the options for * the call * @return a {@link ServiceCall} with a response type of {@link AcousticModel} */ public ServiceCall getAcousticModel( GetAcousticModelOptions getAcousticModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( getAcousticModelOptions, "getAcousticModelOptions cannot be null"); String[] pathSegments = {"v1/acoustic_customizations"}; String[] pathParameters = {getAcousticModelOptions.customizationId()}; RequestBuilder builder = RequestBuilder.get( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "getAcousticModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Delete a custom acoustic model. * *

Deletes an existing custom acoustic model. The custom model cannot be deleted if another * request, such as adding an audio resource to the model, is currently being processed. You must * use credentials for the instance of the service that owns a model to delete it. * *

**See also:** [Deleting a custom acoustic * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#deleteModel-acoustic). * * @param deleteAcousticModelOptions the {@link DeleteAcousticModelOptions} containing the options * for the call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall deleteAcousticModel( DeleteAcousticModelOptions deleteAcousticModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( deleteAcousticModelOptions, "deleteAcousticModelOptions cannot be null"); String[] pathSegments = {"v1/acoustic_customizations"}; String[] pathParameters = {deleteAcousticModelOptions.customizationId()}; RequestBuilder builder = RequestBuilder.delete( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "deleteAcousticModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * Train a custom acoustic model. * *

Initiates the training of a custom acoustic model with new or changed audio resources. After * adding or deleting audio resources for a custom acoustic model, use this method to begin the * actual training of the model on the latest audio data. The custom acoustic model does not * reflect its changed data until you train it. You must use credentials for the instance of the * service that owns a model to train it. * *

The training method is asynchronous. It can take on the order of minutes or hours to * complete depending on the total amount of audio data on which the custom acoustic model is * being trained and the current load on the service. Typically, training a custom acoustic model * takes approximately two to four times the length of its audio data. The actual time depends on * the model being trained and the nature of the audio, such as whether the audio is clean or * noisy. The method returns an HTTP 200 response code to indicate that the training process has * begun. * *

You can monitor the status of the training by using the **Get a custom acoustic model** * method to poll the model's status. Use a loop to check the status once a minute. The method * returns an `AcousticModel` object that includes `status` and `progress` fields. A status of * `available` indicates that the custom model is trained and ready to use. The service cannot * train a model while it is handling another request for the model. The service cannot accept * subsequent training requests, or requests to add new audio resources, until the existing * training request completes. * *

You can use the optional `custom_language_model_id` parameter to specify the GUID of a * separately created custom language model that is to be used during training. Train with a * custom language model if you have verbatim transcriptions of the audio files that you have * added to the custom model or you have either corpora (text files) or a list of words that are * relevant to the contents of the audio files. For training to succeed, both of the custom models * must be based on the same version of the same base model, and the custom language model must be * fully trained and available. * *

**See also:** * [Train the custom acoustic * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-acoustic#trainModel-acoustic) * * [Using custom acoustic and custom language models * together](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-useBoth#useBoth) * *

### Training failures * *

Training can fail to start for the following reasons: * The service is currently handling * another request for the custom model, such as another training request or a request to add * audio resources to the model. * The custom model contains less than 10 minutes or more than 200 * hours of audio data. * You passed a custom language model with the `custom_language_model_id` * query parameter that is not in the available state. A custom language model must be fully * trained and available to be used to train a custom acoustic model. * You passed an incompatible * custom language model with the `custom_language_model_id` query parameter. Both custom models * must be based on the same version of the same base model. * The custom model contains one or * more invalid audio resources. You can correct the invalid audio resources or set the `strict` * parameter to `false` to exclude the invalid resources from the training. The model must contain * at least one valid resource for training to succeed. * * @param trainAcousticModelOptions the {@link TrainAcousticModelOptions} containing the options * for the call * @return a {@link ServiceCall} with a response type of {@link TrainingResponse} */ public ServiceCall trainAcousticModel( TrainAcousticModelOptions trainAcousticModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( trainAcousticModelOptions, "trainAcousticModelOptions cannot be null"); String[] pathSegments = {"v1/acoustic_customizations", "train"}; String[] pathParameters = {trainAcousticModelOptions.customizationId()}; RequestBuilder builder = RequestBuilder.post( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "trainAcousticModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); if (trainAcousticModelOptions.customLanguageModelId() != null) { builder.query("custom_language_model_id", trainAcousticModelOptions.customLanguageModelId()); } ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Reset a custom acoustic model. * *

Resets a custom acoustic model by removing all audio resources from the model. Resetting a * custom acoustic model initializes the model to its state when it was first created. Metadata * such as the name and language of the model are preserved, but the model's audio resources are * removed and must be re-created. The service cannot reset a model while it is handling another * request for the model. The service cannot accept subsequent requests for the model until the * existing reset request completes. You must use credentials for the instance of the service that * owns a model to reset it. * *

**See also:** [Resetting a custom acoustic * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAcousticModels#resetModel-acoustic). * * @param resetAcousticModelOptions the {@link ResetAcousticModelOptions} containing the options * for the call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall resetAcousticModel(ResetAcousticModelOptions resetAcousticModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( resetAcousticModelOptions, "resetAcousticModelOptions cannot be null"); String[] pathSegments = {"v1/acoustic_customizations", "reset"}; String[] pathParameters = {resetAcousticModelOptions.customizationId()}; RequestBuilder builder = RequestBuilder.post( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "resetAcousticModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * Upgrade a custom acoustic model. * *

Initiates the upgrade of a custom acoustic model to the latest version of its base language * model. The upgrade method is asynchronous. It can take on the order of minutes or hours to * complete depending on the amount of data in the custom model and the current load on the * service; typically, upgrade takes approximately twice the length of the total audio contained * in the custom model. A custom model must be in the `ready` or `available` state to be upgraded. * You must use credentials for the instance of the service that owns a model to upgrade it. * *

The method returns an HTTP 200 response code to indicate that the upgrade process has begun * successfully. You can monitor the status of the upgrade by using the **Get a custom acoustic * model** method to poll the model's status. The method returns an `AcousticModel` object that * includes `status` and `progress` fields. Use a loop to check the status once a minute. While it * is being upgraded, the custom model has the status `upgrading`. When the upgrade is complete, * the model resumes the status that it had prior to upgrade. The service cannot upgrade a model * while it is handling another request for the model. The service cannot accept subsequent * requests for the model until the existing upgrade request completes. * *

If the custom acoustic model was trained with a separately created custom language model, * you must use the `custom_language_model_id` parameter to specify the GUID of that custom * language model. The custom language model must be upgraded before the custom acoustic model can * be upgraded. Omit the parameter if the custom acoustic model was not trained with a custom * language model. * *

**See also:** [Upgrading a custom acoustic * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-customUpgrade#upgradeAcoustic). * * @param upgradeAcousticModelOptions the {@link UpgradeAcousticModelOptions} containing the * options for the call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall upgradeAcousticModel( UpgradeAcousticModelOptions upgradeAcousticModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( upgradeAcousticModelOptions, "upgradeAcousticModelOptions cannot be null"); String[] pathSegments = {"v1/acoustic_customizations", "upgrade_model"}; String[] pathParameters = {upgradeAcousticModelOptions.customizationId()}; RequestBuilder builder = RequestBuilder.post( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "upgradeAcousticModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); if (upgradeAcousticModelOptions.customLanguageModelId() != null) { builder.query( "custom_language_model_id", upgradeAcousticModelOptions.customLanguageModelId()); } if (upgradeAcousticModelOptions.force() != null) { builder.query("force", String.valueOf(upgradeAcousticModelOptions.force())); } ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * List audio resources. * *

Lists information about all audio resources from a custom acoustic model. The information * includes the name of the resource and information about its audio data, such as its duration. * It also includes the status of the audio resource, which is important for checking the * service's analysis of the resource in response to a request to add it to the custom acoustic * model. You must use credentials for the instance of the service that owns a model to list its * audio resources. * *

**See also:** [Listing audio resources for a custom acoustic * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAudio#listAudio). * * @param listAudioOptions the {@link ListAudioOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of {@link AudioResources} */ public ServiceCall listAudio(ListAudioOptions listAudioOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( listAudioOptions, "listAudioOptions cannot be null"); String[] pathSegments = {"v1/acoustic_customizations", "audio"}; String[] pathParameters = {listAudioOptions.customizationId()}; RequestBuilder builder = RequestBuilder.get( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "listAudio"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Add an audio resource. * *

Adds an audio resource to a custom acoustic model. Add audio content that reflects the * acoustic characteristics of the audio that you plan to transcribe. You must use credentials for * the instance of the service that owns a model to add an audio resource to it. Adding audio data * does not affect the custom acoustic model until you train the model for the new data by using * the **Train a custom acoustic model** method. * *

You can add individual audio files or an archive file that contains multiple audio files. * Adding multiple audio files via a single archive file is significantly more efficient than * adding each file individually. You can add audio resources in any format that the service * supports for speech recognition. * *

You can use this method to add any number of audio resources to a custom model by calling * the method once for each audio or archive file. You can add multiple different audio resources * at the same time. You must add a minimum of 10 minutes and a maximum of 200 hours of audio that * includes speech, not just silence, to a custom acoustic model before you can train it. No audio * resource, audio- or archive-type, can be larger than 100 MB. To add an audio resource that has * the same name as an existing audio resource, set the `allow_overwrite` parameter to `true`; * otherwise, the request fails. * *

The method is asynchronous. It can take several seconds or minutes to complete depending on * the duration of the audio and, in the case of an archive file, the total number of audio files * being processed. The service returns a 201 response code if the audio is valid. It then * asynchronously analyzes the contents of the audio file or files and automatically extracts * information about the audio such as its length, sampling rate, and encoding. You cannot submit * requests to train or upgrade the model until the service's analysis of all audio resources for * current requests completes. * *

To determine the status of the service's analysis of the audio, use the **Get an audio * resource** method to poll the status of the audio. The method accepts the customization ID of * the custom model and the name of the audio resource, and it returns the status of the resource. * Use a loop to check the status of the audio every few seconds until it becomes `ok`. * *

**See also:** [Add audio to the custom acoustic * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-acoustic#addAudio). * *

### Content types for audio-type resources * *

You can add an individual audio file in any format that the service supports for speech * recognition. For an audio-type resource, use the `Content-Type` parameter to specify the audio * format (MIME type) of the audio file, including specifying the sampling rate, channels, and * endianness where indicated. * `audio/alaw` (Specify the sampling rate (`rate`) of the audio.) * * `audio/basic` (Use only with narrowband models.) * `audio/flac` * `audio/g729` (Use only with * narrowband models.) * `audio/l16` (Specify the sampling rate (`rate`) and optionally the number * of channels (`channels`) and endianness (`endianness`) of the audio.) * `audio/mp3` * * `audio/mpeg` * `audio/mulaw` (Specify the sampling rate (`rate`) of the audio.) * `audio/ogg` * (The service automatically detects the codec of the input audio.) * `audio/ogg;codecs=opus` * * `audio/ogg;codecs=vorbis` * `audio/wav` (Provide audio with a maximum of nine channels.) * * `audio/webm` (The service automatically detects the codec of the input audio.) * * `audio/webm;codecs=opus` * `audio/webm;codecs=vorbis` * *

The sampling rate of an audio file must match the sampling rate of the base model for the * custom model: for broadband models, at least 16 kHz; for narrowband models, at least 8 kHz. If * the sampling rate of the audio is higher than the minimum required rate, the service * down-samples the audio to the appropriate rate. If the sampling rate of the audio is lower than * the minimum required rate, the service labels the audio file as `invalid`. * *

**See also:** [Audio * formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats#audio-formats). * *

### Content types for archive-type resources * *

You can add an archive file (**.zip** or **.tar.gz** file) that contains audio files in any * format that the service supports for speech recognition. For an archive-type resource, use the * `Content-Type` parameter to specify the media type of the archive file: * `application/zip` for * a **.zip** file * `application/gzip` for a **.tar.gz** file. * *

When you add an archive-type resource, the `Contained-Content-Type` header is optional * depending on the format of the files that you are adding: * For audio files of type * `audio/alaw`, `audio/basic`, `audio/l16`, or `audio/mulaw`, you must use the * `Contained-Content-Type` header to specify the format of the contained audio files. Include the * `rate`, `channels`, and `endianness` parameters where necessary. In this case, all audio files * contained in the archive file must have the same audio format. * For audio files of all other * types, you can omit the `Contained-Content-Type` header. In this case, the audio files * contained in the archive file can have any of the formats not listed in the previous bullet. * The audio files do not need to have the same format. * *

Do not use the `Contained-Content-Type` header when adding an audio-type resource. * *

### Naming restrictions for embedded audio files * *

The name of an audio file that is contained in an archive-type resource can include a * maximum of 128 characters. This includes the file extension and all elements of the name (for * example, slashes). * * @param addAudioOptions the {@link AddAudioOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall addAudio(AddAudioOptions addAudioOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( addAudioOptions, "addAudioOptions cannot be null"); String[] pathSegments = {"v1/acoustic_customizations", "audio"}; String[] pathParameters = {addAudioOptions.customizationId(), addAudioOptions.audioName()}; RequestBuilder builder = RequestBuilder.post( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "addAudio"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); if (addAudioOptions.contentType() != null) { builder.header("Content-Type", addAudioOptions.contentType()); } if (addAudioOptions.containedContentType() != null) { builder.header("Contained-Content-Type", addAudioOptions.containedContentType()); } if (addAudioOptions.allowOverwrite() != null) { builder.query("allow_overwrite", String.valueOf(addAudioOptions.allowOverwrite())); } builder.bodyContent(addAudioOptions.contentType(), null, null, addAudioOptions.audioResource()); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * Get an audio resource. * *

Gets information about an audio resource from a custom acoustic model. The method returns an * `AudioListing` object whose fields depend on the type of audio resource that you specify with * the method's `audio_name` parameter: * **For an audio-type resource,** the object's fields * match those of an `AudioResource` object: `duration`, `name`, `details`, and `status`. * **For * an archive-type resource,** the object includes a `container` field whose fields match those of * an `AudioResource` object. It also includes an `audio` field, which contains an array of * `AudioResource` objects that provides information about the audio files that are contained in * the archive. * *

The information includes the status of the specified audio resource. The status is important * for checking the service's analysis of a resource that you add to the custom model. * For an * audio-type resource, the `status` field is located in the `AudioListing` object. * For an * archive-type resource, the `status` field is located in the `AudioResource` object that is * returned in the `container` field. * *

You must use credentials for the instance of the service that owns a model to list its audio * resources. * *

**See also:** [Listing audio resources for a custom acoustic * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAudio#listAudio). * * @param getAudioOptions the {@link GetAudioOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of {@link AudioListing} */ public ServiceCall getAudio(GetAudioOptions getAudioOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( getAudioOptions, "getAudioOptions cannot be null"); String[] pathSegments = {"v1/acoustic_customizations", "audio"}; String[] pathParameters = {getAudioOptions.customizationId(), getAudioOptions.audioName()}; RequestBuilder builder = RequestBuilder.get( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "getAudio"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Delete an audio resource. * *

Deletes an existing audio resource from a custom acoustic model. Deleting an archive-type * audio resource removes the entire archive of files. The service does not allow deletion of * individual files from an archive resource. * *

Removing an audio resource does not affect the custom model until you train the model on its * updated data by using the **Train a custom acoustic model** method. You can delete an existing * audio resource from a model while a different resource is being added to the model. You must * use credentials for the instance of the service that owns a model to delete its audio * resources. * *

**See also:** [Deleting an audio resource from a custom acoustic * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageAudio#deleteAudio). * * @param deleteAudioOptions the {@link DeleteAudioOptions} containing the options for the call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall deleteAudio(DeleteAudioOptions deleteAudioOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( deleteAudioOptions, "deleteAudioOptions cannot be null"); String[] pathSegments = {"v1/acoustic_customizations", "audio"}; String[] pathParameters = { deleteAudioOptions.customizationId(), deleteAudioOptions.audioName() }; RequestBuilder builder = RequestBuilder.delete( RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments, pathParameters)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "deleteAudio"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * Delete labeled data. * *

Deletes all data that is associated with a specified customer ID. The method deletes all * data for the customer ID, regardless of the method by which the information was added. The * method has no effect if no data is associated with the customer ID. You must issue the request * with credentials for the same instance of the service that was used to associate the customer * ID with the data. * *

You associate a customer ID with data by passing the `X-Watson-Metadata` header with a * request that passes the data. * *

**See also:** [Information * security](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-information-security#information-security). * * @param deleteUserDataOptions the {@link DeleteUserDataOptions} containing the options for the * call * @return a {@link ServiceCall} with a response type of Void */ public ServiceCall deleteUserData(DeleteUserDataOptions deleteUserDataOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( deleteUserDataOptions, "deleteUserDataOptions cannot be null"); String[] pathSegments = {"v1/user_data"}; RequestBuilder builder = RequestBuilder.delete(RequestBuilder.constructHttpUrl(getServiceUrl(), pathSegments)); Map sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "deleteUserData"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.query("customer_id", deleteUserDataOptions.customerId()); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy