All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.watson.text_to_speech.v1.TextToSpeech Maven / Gradle / Ivy

There is a newer version: 13.0.0
Show newest version
/*
 * (C) Copyright IBM Corp. 2019, 2021.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */

/*
 * IBM OpenAPI SDK Code Generator Version: 3.38.0-07189efd-20210827-205025
 */

package com.ibm.watson.text_to_speech.v1;

import com.google.gson.JsonObject;
import com.ibm.cloud.sdk.core.http.RequestBuilder;
import com.ibm.cloud.sdk.core.http.ResponseConverter;
import com.ibm.cloud.sdk.core.http.ServiceCall;
import com.ibm.cloud.sdk.core.security.Authenticator;
import com.ibm.cloud.sdk.core.security.ConfigBasedAuthenticatorFactory;
import com.ibm.cloud.sdk.core.service.BaseService;
import com.ibm.cloud.sdk.core.util.RequestUtils;
import com.ibm.cloud.sdk.core.util.ResponseConverterUtils;
import com.ibm.watson.common.SdkCommon;
import com.ibm.watson.text_to_speech.v1.model.AddCustomPromptOptions;
import com.ibm.watson.text_to_speech.v1.model.AddWordOptions;
import com.ibm.watson.text_to_speech.v1.model.AddWordsOptions;
import com.ibm.watson.text_to_speech.v1.model.CreateCustomModelOptions;
import com.ibm.watson.text_to_speech.v1.model.CreateSpeakerModelOptions;
import com.ibm.watson.text_to_speech.v1.model.CustomModel;
import com.ibm.watson.text_to_speech.v1.model.CustomModels;
import com.ibm.watson.text_to_speech.v1.model.DeleteCustomModelOptions;
import com.ibm.watson.text_to_speech.v1.model.DeleteCustomPromptOptions;
import com.ibm.watson.text_to_speech.v1.model.DeleteSpeakerModelOptions;
import com.ibm.watson.text_to_speech.v1.model.DeleteUserDataOptions;
import com.ibm.watson.text_to_speech.v1.model.DeleteWordOptions;
import com.ibm.watson.text_to_speech.v1.model.GetCustomModelOptions;
import com.ibm.watson.text_to_speech.v1.model.GetCustomPromptOptions;
import com.ibm.watson.text_to_speech.v1.model.GetPronunciationOptions;
import com.ibm.watson.text_to_speech.v1.model.GetSpeakerModelOptions;
import com.ibm.watson.text_to_speech.v1.model.GetVoiceOptions;
import com.ibm.watson.text_to_speech.v1.model.GetWordOptions;
import com.ibm.watson.text_to_speech.v1.model.ListCustomModelsOptions;
import com.ibm.watson.text_to_speech.v1.model.ListCustomPromptsOptions;
import com.ibm.watson.text_to_speech.v1.model.ListSpeakerModelsOptions;
import com.ibm.watson.text_to_speech.v1.model.ListVoicesOptions;
import com.ibm.watson.text_to_speech.v1.model.ListWordsOptions;
import com.ibm.watson.text_to_speech.v1.model.Prompt;
import com.ibm.watson.text_to_speech.v1.model.Prompts;
import com.ibm.watson.text_to_speech.v1.model.Pronunciation;
import com.ibm.watson.text_to_speech.v1.model.SpeakerCustomModels;
import com.ibm.watson.text_to_speech.v1.model.SpeakerModel;
import com.ibm.watson.text_to_speech.v1.model.Speakers;
import com.ibm.watson.text_to_speech.v1.model.SynthesizeOptions;
import com.ibm.watson.text_to_speech.v1.model.Translation;
import com.ibm.watson.text_to_speech.v1.model.UpdateCustomModelOptions;
import com.ibm.watson.text_to_speech.v1.model.Voice;
import com.ibm.watson.text_to_speech.v1.model.Voices;
import com.ibm.watson.text_to_speech.v1.model.Words;
import com.ibm.watson.text_to_speech.v1.websocket.SynthesizeCallback;
import com.ibm.watson.text_to_speech.v1.websocket.TextToSpeechWebSocketListener;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import okhttp3.HttpUrl;
import okhttp3.MultipartBody;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.WebSocket;

/**
 * The IBM Watson™ Text to Speech service provides APIs that use IBM's speech-synthesis
 * capabilities to synthesize text into natural-sounding speech in a variety of languages, dialects,
 * and voices. The service supports at least one male or female voice, sometimes both, for each
 * language. The audio is streamed back to the client with minimal delay.
 *
 * 

For speech synthesis, the service supports a synchronous HTTP Representational State Transfer * (REST) interface and a WebSocket interface. Both interfaces support plain text and SSML input. * SSML is an XML-based markup language that provides text annotation for speech-synthesis * applications. The WebSocket interface also supports the SSML * <code>&lt;mark&gt;</code> element and word timings. * *

The service offers a customization interface that you can use to define sounds-like or * phonetic translations for words. A sounds-like translation consists of one or more words that, * when combined, sound like the word. A phonetic translation is based on the SSML phoneme format * for representing a word. You can specify a phonetic translation in standard International * Phonetic Alphabet (IPA) representation or in the proprietary IBM Symbolic Phonetic Representation * (SPR). * *

The service also offers a Tune by Example feature that lets you define custom prompts. You can * also define speaker models to improve the quality of your custom prompts. The service support * custom prompts only for US English custom models and voices. * *

**IBM Cloud&reg;.** The Arabic, Chinese, Dutch, Australian English, and Korean languages * and voices are supported only for IBM Cloud. For phonetic translation, they support only IPA, not * SPR. * *

API Version: 1.0.0 See: https://cloud.ibm.com/docs/text-to-speech */ public class TextToSpeech extends BaseService { public static final String DEFAULT_SERVICE_NAME = "text_to_speech"; public static final String DEFAULT_SERVICE_URL = "https://api.us-south.text-to-speech.watson.cloud.ibm.com"; /** * Constructs an instance of the `TextToSpeech` client. The default service name is used to * configure the client instance. */ public TextToSpeech() { this( DEFAULT_SERVICE_NAME, ConfigBasedAuthenticatorFactory.getAuthenticator(DEFAULT_SERVICE_NAME)); } /** * Constructs an instance of the `TextToSpeech` client. The default service name and specified * authenticator are used to configure the client instance. * * @param authenticator the {@link Authenticator} instance to be configured for this client */ public TextToSpeech(Authenticator authenticator) { this(DEFAULT_SERVICE_NAME, authenticator); } /** * Constructs an instance of the `TextToSpeech` client. The specified service name is used to * configure the client instance. * * @param serviceName the service name to be used when configuring the client instance */ public TextToSpeech(String serviceName) { this(serviceName, ConfigBasedAuthenticatorFactory.getAuthenticator(serviceName)); } /** * Constructs an instance of the `TextToSpeech` client. The specified service name and * authenticator are used to configure the client instance. * * @param serviceName the service name to be used when configuring the client instance * @param authenticator the {@link Authenticator} instance to be configured for this client */ public TextToSpeech(String serviceName, Authenticator authenticator) { super(serviceName, authenticator); setServiceUrl(DEFAULT_SERVICE_URL); this.configureService(serviceName); } /** * List voices. * *

Lists all voices available for use with the service. The information includes the name, * language, gender, and other details about the voice. The ordering of the list of voices can * change from call to call; do not rely on an alphabetized or static list of voices. To see * information about a specific voice, use the [Get a voice](#getvoice). * *

**See also:** [Listing all available * voices](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-voices#listVoices). * * @param listVoicesOptions the {@link ListVoicesOptions} containing the options for the call * @return a {@link ServiceCall} with a result of type {@link Voices} */ public ServiceCall listVoices(ListVoicesOptions listVoicesOptions) { RequestBuilder builder = RequestBuilder.get(RequestBuilder.resolveRequestUrl(getServiceUrl(), "/v1/voices")); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "listVoices"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * List voices. * *

Lists all voices available for use with the service. The information includes the name, * language, gender, and other details about the voice. The ordering of the list of voices can * change from call to call; do not rely on an alphabetized or static list of voices. To see * information about a specific voice, use the [Get a voice](#getvoice). * *

**See also:** [Listing all available * voices](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-voices#listVoices). * * @return a {@link ServiceCall} with a result of type {@link Voices} */ public ServiceCall listVoices() { return listVoices(null); } /** * Get a voice. * *

Gets information about the specified voice. The information includes the name, language, * gender, and other details about the voice. Specify a customization ID to obtain information for * a custom model that is defined for the language of the specified voice. To list information * about all available voices, use the [List voices](#listvoices) method. * *

**See also:** [Listing a specific * voice](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-voices#listVoice). * *

### Important voice updates for IBM Cloud * *

The service's voices underwent significant change on 2 December 2020. * The Arabic, Chinese, * Dutch, Australian English, and Korean voices are now neural instead of concatenative. * The * `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead. * The `ar-AR` * language identifier cannot be used to create a custom model. Use the `ar-MS` identifier * instead. * The standard concatenative voices for the following languages are now deprecated: * Brazilian Portuguese, United Kingdom and United States English, French, German, Italian, * Japanese, and Spanish (all dialects). * The features expressive SSML, voice transformation * SSML, and use of the `volume` attribute of the `<prosody>` element are deprecated and are * not supported with any of the service's neural voices. * All of the service's voices are now * customizable and generally available (GA) for production use. * *

The deprecated voices and features will continue to function for at least one year but might * be removed at a future date. You are encouraged to migrate to the equivalent neural voices at * your earliest convenience. For more information about all voice updates, see the [2 December * 2020 service * update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020) * in the release notes for IBM Cloud. * * @param getVoiceOptions the {@link GetVoiceOptions} containing the options for the call * @return a {@link ServiceCall} with a result of type {@link Voice} */ public ServiceCall getVoice(GetVoiceOptions getVoiceOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( getVoiceOptions, "getVoiceOptions cannot be null"); Map pathParamsMap = new HashMap(); pathParamsMap.put("voice", getVoiceOptions.voice()); RequestBuilder builder = RequestBuilder.get( RequestBuilder.resolveRequestUrl(getServiceUrl(), "/v1/voices/{voice}", pathParamsMap)); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "getVoice"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); if (getVoiceOptions.customizationId() != null) { builder.query("customization_id", String.valueOf(getVoiceOptions.customizationId())); } ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Synthesize audio. * *

Synthesizes text to audio that is spoken in the specified voice. The service bases its * understanding of the language for the input text on the specified voice. Use a voice that * matches the language of the input text. * *

The method accepts a maximum of 5 KB of input text in the body of the request, and 8 KB for * the URL and headers. The 5 KB limit includes any SSML tags that you specify. The service * returns the synthesized audio stream as an array of bytes. * *

**See also:** [The HTTP * interface](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-usingHTTP#usingHTTP). * *

### Audio formats (accept types) * *

The service can return audio in the following formats (MIME types). * Where indicated, you * can optionally specify the sampling rate (`rate`) of the audio. You must specify a sampling * rate for the `audio/l16` and `audio/mulaw` formats. A specified sampling rate must lie in the * range of 8 kHz to 192 kHz. Some formats restrict the sampling rate to certain values, as noted. * * For the `audio/l16` format, you can optionally specify the endianness (`endianness`) of the * audio: `endianness=big-endian` or `endianness=little-endian`. * *

Use the `Accept` header or the `accept` parameter to specify the requested format of the * response audio. If you omit an audio format altogether, the service returns the audio in Ogg * format with the Opus codec (`audio/ogg;codecs=opus`). The service always returns single-channel * audio. * `audio/basic` - The service returns audio with a sampling rate of 8000 Hz. * * `audio/flac` - You can optionally specify the `rate` of the audio. The default sampling rate is * 22,050 Hz. * `audio/l16` - You must specify the `rate` of the audio. You can optionally specify * the `endianness` of the audio. The default endianness is `little-endian`. * `audio/mp3` - You * can optionally specify the `rate` of the audio. The default sampling rate is 22,050 Hz. * * `audio/mpeg` - You can optionally specify the `rate` of the audio. The default sampling rate is * 22,050 Hz. * `audio/mulaw` - You must specify the `rate` of the audio. * `audio/ogg` - The * service returns the audio in the `vorbis` codec. You can optionally specify the `rate` of the * audio. The default sampling rate is 22,050 Hz. * `audio/ogg;codecs=opus` - You can optionally * specify the `rate` of the audio. Only the following values are valid sampling rates: `48000`, * `24000`, `16000`, `12000`, or `8000`. If you specify a value other than one of these, the * service returns an error. The default sampling rate is 48,000 Hz. * `audio/ogg;codecs=vorbis` - * You can optionally specify the `rate` of the audio. The default sampling rate is 22,050 Hz. * * `audio/wav` - You can optionally specify the `rate` of the audio. The default sampling rate is * 22,050 Hz. * `audio/webm` - The service returns the audio in the `opus` codec. The service * returns audio with a sampling rate of 48,000 Hz. * `audio/webm;codecs=opus` - The service * returns audio with a sampling rate of 48,000 Hz. * `audio/webm;codecs=vorbis` - You can * optionally specify the `rate` of the audio. The default sampling rate is 22,050 Hz. * *

For more information about specifying an audio format, including additional details about * some of the formats, see [Using audio * formats](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-audio-formats). * *

### Important voice updates for IBM Cloud * *

The service's voices underwent significant change on 2 December 2020. * The Arabic, Chinese, * Dutch, Australian English, and Korean voices are now neural instead of concatenative. * The * `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead. * The `ar-AR` * language identifier cannot be used to create a custom model. Use the `ar-MS` identifier * instead. * The standard concatenative voices for the following languages are now deprecated: * Brazilian Portuguese, United Kingdom and United States English, French, German, Italian, * Japanese, and Spanish (all dialects). * The features expressive SSML, voice transformation * SSML, and use of the `volume` attribute of the `<prosody>` element are deprecated and are * not supported with any of the service's neural voices. * All of the service's voices are now * customizable and generally available (GA) for production use. * *

The deprecated voices and features will continue to function for at least one year but might * be removed at a future date. You are encouraged to migrate to the equivalent neural voices at * your earliest convenience. For more information about all voice updates, see the [2 December * 2020 service * update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020) * in the release notes for IBM Cloud. * *

### Warning messages * *

If a request includes invalid query parameters, the service returns a `Warnings` response * header that provides messages about the invalid parameters. The warning includes a descriptive * message and a list of invalid argument strings. For example, a message such as `"Unknown * arguments:"` or `"Unknown url query arguments:"` followed by a list of the form * `"{invalid_arg_1}, {invalid_arg_2}."` The request succeeds despite the warnings. * * @param synthesizeOptions the {@link SynthesizeOptions} containing the options for the call * @return a {@link ServiceCall} with a result of type {@link InputStream} */ public ServiceCall synthesize(SynthesizeOptions synthesizeOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( synthesizeOptions, "synthesizeOptions cannot be null"); RequestBuilder builder = RequestBuilder.post(RequestBuilder.resolveRequestUrl(getServiceUrl(), "/v1/synthesize")); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "synthesize"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } if (synthesizeOptions.accept() != null) { builder.header("Accept", synthesizeOptions.accept()); } if (synthesizeOptions.voice() != null) { builder.query("voice", String.valueOf(synthesizeOptions.voice())); } if (synthesizeOptions.customizationId() != null) { builder.query("customization_id", String.valueOf(synthesizeOptions.customizationId())); } final JsonObject contentJson = new JsonObject(); contentJson.addProperty("text", synthesizeOptions.text()); builder.bodyJson(contentJson); ResponseConverter responseConverter = ResponseConverterUtils.getInputStream(); return createServiceCall(builder.build(), responseConverter); } /** * Synthesize audio. * *

Synthesizes text to audio that is spoken in the specified voice. The service bases its * understanding of the language for the input text on the specified voice. Use a voice that * matches the language of the input text. * *

The method accepts a maximum of 5 KB of input text in the body of the request, and 8 KB for * the URL and headers. The 5 KB limit includes any SSML tags that you specify. The service * returns the synthesized audio stream as an array of bytes. * *

### Audio formats (accept types) * *

For more information about specifying an audio format, including additional details about * some of the formats, see [Audio * formats](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-audioFormats#audioFormats). * * @param synthesizeOptions the {@link SynthesizeOptions} containing the options for the call * @param callback the {@link SynthesizeCallback} callback * @return a {@link WebSocket} instance */ public WebSocket synthesizeUsingWebSocket( SynthesizeOptions synthesizeOptions, SynthesizeCallback callback) { com.ibm.cloud.sdk.core.util.Validator.notNull( synthesizeOptions, "synthesizeOptions cannot be null"); com.ibm.cloud.sdk.core.util.Validator.notNull(callback, "callback cannot be null"); HttpUrl.Builder urlBuilder = HttpUrl.parse(getServiceUrl() + "/v1/synthesize").newBuilder(); if (synthesizeOptions.voice() != null) { urlBuilder.addQueryParameter("voice", synthesizeOptions.voice()); } if (synthesizeOptions.customizationId() != null) { urlBuilder.addQueryParameter("customization_id", synthesizeOptions.customizationId()); } String url = urlBuilder.toString().replace("https://", "wss://"); Request.Builder builder = new Request.Builder().url(url); setAuthentication(builder); setDefaultHeaders(builder); OkHttpClient client = configureHttpClient(); return client.newWebSocket( builder.build(), new TextToSpeechWebSocketListener(synthesizeOptions, callback)); } /** * Get pronunciation. * *

Gets the phonetic pronunciation for the specified word. You can request the pronunciation * for a specific format. You can also request the pronunciation for a specific voice to see the * default translation for the language of that voice or for a specific custom model to see the * translation for that model. * *

**See also:** [Querying a word from a * language](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuWordsQueryLanguage). * *

### Important voice updates for IBM Cloud * *

The service's voices underwent significant change on 2 December 2020. * The Arabic, Chinese, * Dutch, Australian English, and Korean voices are now neural instead of concatenative. * The * `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead. * The `ar-AR` * language identifier cannot be used to create a custom model. Use the `ar-MS` identifier * instead. * The standard concatenative voices for the following languages are now deprecated: * Brazilian Portuguese, United Kingdom and United States English, French, German, Italian, * Japanese, and Spanish (all dialects). * The features expressive SSML, voice transformation * SSML, and use of the `volume` attribute of the `<prosody>` element are deprecated and are * not supported with any of the service's neural voices. * All of the service's voices are now * customizable and generally available (GA) for production use. * *

The deprecated voices and features will continue to function for at least one year but might * be removed at a future date. You are encouraged to migrate to the equivalent neural voices at * your earliest convenience. For more information about all voice updates, see the [2 December * 2020 service * update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020) * in the release notes for IBM Cloud. * * @param getPronunciationOptions the {@link GetPronunciationOptions} containing the options for * the call * @return a {@link ServiceCall} with a result of type {@link Pronunciation} */ public ServiceCall getPronunciation( GetPronunciationOptions getPronunciationOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( getPronunciationOptions, "getPronunciationOptions cannot be null"); RequestBuilder builder = RequestBuilder.get(RequestBuilder.resolveRequestUrl(getServiceUrl(), "/v1/pronunciation")); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "getPronunciation"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); builder.query("text", String.valueOf(getPronunciationOptions.text())); if (getPronunciationOptions.voice() != null) { builder.query("voice", String.valueOf(getPronunciationOptions.voice())); } if (getPronunciationOptions.format() != null) { builder.query("format", String.valueOf(getPronunciationOptions.format())); } if (getPronunciationOptions.customizationId() != null) { builder.query("customization_id", String.valueOf(getPronunciationOptions.customizationId())); } ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Create a custom model. * *

Creates a new empty custom model. You must specify a name for the new custom model. You can * optionally specify the language and a description for the new model. The model is owned by the * instance of the service whose credentials are used to create it. * *

**See also:** [Creating a custom * model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customModels#cuModelsCreate). * *

### Important voice updates for IBM Cloud * *

The service's voices underwent significant change on 2 December 2020. * The Arabic, Chinese, * Dutch, Australian English, and Korean voices are now neural instead of concatenative. * The * `ar-AR_OmarVoice` voice is deprecated. Use `ar-MS_OmarVoice` voice instead. * The `ar-AR` * language identifier cannot be used to create a custom model. Use the `ar-MS` identifier * instead. * The standard concatenative voices for the following languages are now deprecated: * Brazilian Portuguese, United Kingdom and United States English, French, German, Italian, * Japanese, and Spanish (all dialects). * The features expressive SSML, voice transformation * SSML, and use of the `volume` attribute of the `<prosody>` element are deprecated and are * not supported with any of the service's neural voices. * All of the service's voices are now * customizable and generally available (GA) for production use. * *

The deprecated voices and features will continue to function for at least one year but might * be removed at a future date. You are encouraged to migrate to the equivalent neural voices at * your earliest convenience. For more information about all voice updates, see the [2 December * 2020 service * update](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-release-notes#December2020) * in the release notes for IBM Cloud. * * @param createCustomModelOptions the {@link CreateCustomModelOptions} containing the options for * the call * @return a {@link ServiceCall} with a result of type {@link CustomModel} */ public ServiceCall createCustomModel( CreateCustomModelOptions createCustomModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( createCustomModelOptions, "createCustomModelOptions cannot be null"); RequestBuilder builder = RequestBuilder.post( RequestBuilder.resolveRequestUrl(getServiceUrl(), "/v1/customizations")); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "createCustomModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); final JsonObject contentJson = new JsonObject(); contentJson.addProperty("name", createCustomModelOptions.name()); if (createCustomModelOptions.language() != null) { contentJson.addProperty("language", createCustomModelOptions.language()); } if (createCustomModelOptions.description() != null) { contentJson.addProperty("description", createCustomModelOptions.description()); } builder.bodyJson(contentJson); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * List custom models. * *

Lists metadata such as the name and description for all custom models that are owned by an * instance of the service. Specify a language to list the custom models for that language only. * To see the words and prompts in addition to the metadata for a specific custom model, use the * [Get a custom model](#getcustommodel) method. You must use credentials for the instance of the * service that owns a model to list information about it. * *

**See also:** [Querying all custom * models](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customModels#cuModelsQueryAll). * * @param listCustomModelsOptions the {@link ListCustomModelsOptions} containing the options for * the call * @return a {@link ServiceCall} with a result of type {@link CustomModels} */ public ServiceCall listCustomModels( ListCustomModelsOptions listCustomModelsOptions) { if (listCustomModelsOptions == null) { listCustomModelsOptions = new ListCustomModelsOptions.Builder().build(); } RequestBuilder builder = RequestBuilder.get(RequestBuilder.resolveRequestUrl(getServiceUrl(), "/v1/customizations")); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "listCustomModels"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); if (listCustomModelsOptions.language() != null) { builder.query("language", String.valueOf(listCustomModelsOptions.language())); } ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * List custom models. * *

Lists metadata such as the name and description for all custom models that are owned by an * instance of the service. Specify a language to list the custom models for that language only. * To see the words and prompts in addition to the metadata for a specific custom model, use the * [Get a custom model](#getcustommodel) method. You must use credentials for the instance of the * service that owns a model to list information about it. * *

**See also:** [Querying all custom * models](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customModels#cuModelsQueryAll). * * @return a {@link ServiceCall} with a result of type {@link CustomModels} */ public ServiceCall listCustomModels() { return listCustomModels(null); } /** * Update a custom model. * *

Updates information for the specified custom model. You can update metadata such as the name * and description of the model. You can also update the words in the model and their * translations. Adding a new translation for a word that already exists in a custom model * overwrites the word's existing translation. A custom model can contain no more than 20,000 * entries. You must use credentials for the instance of the service that owns a model to update * it. * *

You can define sounds-like or phonetic translations for words. A sounds-like translation * consists of one or more words that, when combined, sound like the word. Phonetic translations * are based on the SSML phoneme format for representing a word. You can specify them in standard * International Phonetic Alphabet (IPA) representation * *

<code>&lt;phoneme alphabet="ipa" * ph="t&#601;m&#712;&#593;to"&gt;&lt;/phoneme&gt;</code> * *

or in the proprietary IBM Symbolic Phonetic Representation (SPR) * *

<code>&lt;phoneme alphabet="ibm" * ph="1gAstroEntxrYFXs"&gt;&lt;/phoneme&gt;</code> * *

**See also:** * [Updating a custom * model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customModels#cuModelsUpdate) * * [Adding words to a Japanese custom * model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuJapaneseAdd) * * [Understanding * customization](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customIntro#customIntro). * * @param updateCustomModelOptions the {@link UpdateCustomModelOptions} containing the options for * the call * @return a {@link ServiceCall} with a void result */ public ServiceCall updateCustomModel(UpdateCustomModelOptions updateCustomModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( updateCustomModelOptions, "updateCustomModelOptions cannot be null"); Map pathParamsMap = new HashMap(); pathParamsMap.put("customization_id", updateCustomModelOptions.customizationId()); RequestBuilder builder = RequestBuilder.post( RequestBuilder.resolveRequestUrl( getServiceUrl(), "/v1/customizations/{customization_id}", pathParamsMap)); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "updateCustomModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); final JsonObject contentJson = new JsonObject(); if (updateCustomModelOptions.name() != null) { contentJson.addProperty("name", updateCustomModelOptions.name()); } if (updateCustomModelOptions.description() != null) { contentJson.addProperty("description", updateCustomModelOptions.description()); } if (updateCustomModelOptions.words() != null) { contentJson.add( "words", com.ibm.cloud.sdk.core.util.GsonSingleton.getGson() .toJsonTree(updateCustomModelOptions.words())); } builder.bodyJson(contentJson); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * Get a custom model. * *

Gets all information about a specified custom model. In addition to metadata such as the * name and description of the custom model, the output includes the words and their translations * that are defined for the model, as well as any prompts that are defined for the model. To see * just the metadata for a model, use the [List custom models](#listcustommodels) method. * *

**See also:** [Querying a custom * model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customModels#cuModelsQuery). * * @param getCustomModelOptions the {@link GetCustomModelOptions} containing the options for the * call * @return a {@link ServiceCall} with a result of type {@link CustomModel} */ public ServiceCall getCustomModel(GetCustomModelOptions getCustomModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( getCustomModelOptions, "getCustomModelOptions cannot be null"); Map pathParamsMap = new HashMap(); pathParamsMap.put("customization_id", getCustomModelOptions.customizationId()); RequestBuilder builder = RequestBuilder.get( RequestBuilder.resolveRequestUrl( getServiceUrl(), "/v1/customizations/{customization_id}", pathParamsMap)); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "getCustomModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Delete a custom model. * *

Deletes the specified custom model. You must use credentials for the instance of the service * that owns a model to delete it. * *

**See also:** [Deleting a custom * model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customModels#cuModelsDelete). * * @param deleteCustomModelOptions the {@link DeleteCustomModelOptions} containing the options for * the call * @return a {@link ServiceCall} with a void result */ public ServiceCall deleteCustomModel(DeleteCustomModelOptions deleteCustomModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( deleteCustomModelOptions, "deleteCustomModelOptions cannot be null"); Map pathParamsMap = new HashMap(); pathParamsMap.put("customization_id", deleteCustomModelOptions.customizationId()); RequestBuilder builder = RequestBuilder.delete( RequestBuilder.resolveRequestUrl( getServiceUrl(), "/v1/customizations/{customization_id}", pathParamsMap)); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "deleteCustomModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * Add custom words. * *

Adds one or more words and their translations to the specified custom model. Adding a new * translation for a word that already exists in a custom model overwrites the word's existing * translation. A custom model can contain no more than 20,000 entries. You must use credentials * for the instance of the service that owns a model to add words to it. * *

You can define sounds-like or phonetic translations for words. A sounds-like translation * consists of one or more words that, when combined, sound like the word. Phonetic translations * are based on the SSML phoneme format for representing a word. You can specify them in standard * International Phonetic Alphabet (IPA) representation * *

<code>&lt;phoneme alphabet="ipa" * ph="t&#601;m&#712;&#593;to"&gt;&lt;/phoneme&gt;</code> * *

or in the proprietary IBM Symbolic Phonetic Representation (SPR) * *

<code>&lt;phoneme alphabet="ibm" * ph="1gAstroEntxrYFXs"&gt;&lt;/phoneme&gt;</code> * *

**See also:** * [Adding multiple words to a custom * model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuWordsAdd) * * [Adding words to a Japanese custom * model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuJapaneseAdd) * * [Understanding * customization](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customIntro#customIntro). * * @param addWordsOptions the {@link AddWordsOptions} containing the options for the call * @return a {@link ServiceCall} with a void result */ public ServiceCall addWords(AddWordsOptions addWordsOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( addWordsOptions, "addWordsOptions cannot be null"); Map pathParamsMap = new HashMap(); pathParamsMap.put("customization_id", addWordsOptions.customizationId()); RequestBuilder builder = RequestBuilder.post( RequestBuilder.resolveRequestUrl( getServiceUrl(), "/v1/customizations/{customization_id}/words", pathParamsMap)); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "addWords"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); final JsonObject contentJson = new JsonObject(); contentJson.add( "words", com.ibm.cloud.sdk.core.util.GsonSingleton.getGson().toJsonTree(addWordsOptions.words())); builder.bodyJson(contentJson); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * List custom words. * *

Lists all of the words and their translations for the specified custom model. The output * shows the translations as they are defined in the model. You must use credentials for the * instance of the service that owns a model to list its words. * *

**See also:** [Querying all words from a custom * model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuWordsQueryModel). * * @param listWordsOptions the {@link ListWordsOptions} containing the options for the call * @return a {@link ServiceCall} with a result of type {@link Words} */ public ServiceCall listWords(ListWordsOptions listWordsOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( listWordsOptions, "listWordsOptions cannot be null"); Map pathParamsMap = new HashMap(); pathParamsMap.put("customization_id", listWordsOptions.customizationId()); RequestBuilder builder = RequestBuilder.get( RequestBuilder.resolveRequestUrl( getServiceUrl(), "/v1/customizations/{customization_id}/words", pathParamsMap)); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "listWords"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Add a custom word. * *

Adds a single word and its translation to the specified custom model. Adding a new * translation for a word that already exists in a custom model overwrites the word's existing * translation. A custom model can contain no more than 20,000 entries. You must use credentials * for the instance of the service that owns a model to add a word to it. * *

You can define sounds-like or phonetic translations for words. A sounds-like translation * consists of one or more words that, when combined, sound like the word. Phonetic translations * are based on the SSML phoneme format for representing a word. You can specify them in standard * International Phonetic Alphabet (IPA) representation * *

<code>&lt;phoneme alphabet="ipa" * ph="t&#601;m&#712;&#593;to"&gt;&lt;/phoneme&gt;</code> * *

or in the proprietary IBM Symbolic Phonetic Representation (SPR) * *

<code>&lt;phoneme alphabet="ibm" * ph="1gAstroEntxrYFXs"&gt;&lt;/phoneme&gt;</code> * *

**See also:** * [Adding a single word to a custom * model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuWordAdd) * * [Adding words to a Japanese custom * model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuJapaneseAdd) * * [Understanding * customization](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customIntro#customIntro). * * @param addWordOptions the {@link AddWordOptions} containing the options for the call * @return a {@link ServiceCall} with a void result */ public ServiceCall addWord(AddWordOptions addWordOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull(addWordOptions, "addWordOptions cannot be null"); Map pathParamsMap = new HashMap(); pathParamsMap.put("customization_id", addWordOptions.customizationId()); pathParamsMap.put("word", addWordOptions.word()); RequestBuilder builder = RequestBuilder.put( RequestBuilder.resolveRequestUrl( getServiceUrl(), "/v1/customizations/{customization_id}/words/{word}", pathParamsMap)); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "addWord"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } final JsonObject contentJson = new JsonObject(); contentJson.addProperty("translation", addWordOptions.translation()); if (addWordOptions.partOfSpeech() != null) { contentJson.addProperty("part_of_speech", addWordOptions.partOfSpeech()); } builder.bodyJson(contentJson); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * Get a custom word. * *

Gets the translation for a single word from the specified custom model. The output shows the * translation as it is defined in the model. You must use credentials for the instance of the * service that owns a model to list its words. * *

**See also:** [Querying a single word from a custom * model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuWordQueryModel). * * @param getWordOptions the {@link GetWordOptions} containing the options for the call * @return a {@link ServiceCall} with a result of type {@link Translation} */ public ServiceCall getWord(GetWordOptions getWordOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull(getWordOptions, "getWordOptions cannot be null"); Map pathParamsMap = new HashMap(); pathParamsMap.put("customization_id", getWordOptions.customizationId()); pathParamsMap.put("word", getWordOptions.word()); RequestBuilder builder = RequestBuilder.get( RequestBuilder.resolveRequestUrl( getServiceUrl(), "/v1/customizations/{customization_id}/words/{word}", pathParamsMap)); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "getWord"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Delete a custom word. * *

Deletes a single word from the specified custom model. You must use credentials for the * instance of the service that owns a model to delete its words. * *

**See also:** [Deleting a word from a custom * model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-customWords#cuWordDelete). * * @param deleteWordOptions the {@link DeleteWordOptions} containing the options for the call * @return a {@link ServiceCall} with a void result */ public ServiceCall deleteWord(DeleteWordOptions deleteWordOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( deleteWordOptions, "deleteWordOptions cannot be null"); Map pathParamsMap = new HashMap(); pathParamsMap.put("customization_id", deleteWordOptions.customizationId()); pathParamsMap.put("word", deleteWordOptions.word()); RequestBuilder builder = RequestBuilder.delete( RequestBuilder.resolveRequestUrl( getServiceUrl(), "/v1/customizations/{customization_id}/words/{word}", pathParamsMap)); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "deleteWord"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * List custom prompts. * *

Lists information about all custom prompts that are defined for a custom model. The * information includes the prompt ID, prompt text, status, and optional speaker ID for each * prompt of the custom model. You must use credentials for the instance of the service that owns * the custom model. The same information about all of the prompts for a custom model is also * provided by the [Get a custom model](#getcustommodel) method. That method provides complete * details about a specified custom model, including its language, owner, custom words, and more. * Custom prompts are supported only for use with US English custom models and voices. * *

**See also:** [Listing custom * prompts](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-custom-prompts#tbe-custom-prompts-list). * * @param listCustomPromptsOptions the {@link ListCustomPromptsOptions} containing the options for * the call * @return a {@link ServiceCall} with a result of type {@link Prompts} */ public ServiceCall listCustomPrompts(ListCustomPromptsOptions listCustomPromptsOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( listCustomPromptsOptions, "listCustomPromptsOptions cannot be null"); Map pathParamsMap = new HashMap(); pathParamsMap.put("customization_id", listCustomPromptsOptions.customizationId()); RequestBuilder builder = RequestBuilder.get( RequestBuilder.resolveRequestUrl( getServiceUrl(), "/v1/customizations/{customization_id}/prompts", pathParamsMap)); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "listCustomPrompts"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Add a custom prompt. * *

Adds a custom prompt to a custom model. A prompt is defined by the text that is to be * spoken, the audio for that text, a unique user-specified ID for the prompt, and an optional * speaker ID. The information is used to generate prosodic data that is not visible to the user. * This data is used by the service to produce the synthesized audio upon request. You must use * credentials for the instance of the service that owns a custom model to add a prompt to it. You * can add a maximum of 1000 custom prompts to a single custom model. * *

You are recommended to assign meaningful values for prompt IDs. For example, use `goodbye` * to identify a prompt that speaks a farewell message. Prompt IDs must be unique within a given * custom model. You cannot define two prompts with the same name for the same custom model. If * you provide the ID of an existing prompt, the previously uploaded prompt is replaced by the new * information. The existing prompt is reprocessed by using the new text and audio and, if * provided, new speaker model, and the prosody data associated with the prompt is updated. * *

The quality of a prompt is undefined if the language of a prompt does not match the language * of its custom model. This is consistent with any text or SSML that is specified for a speech * synthesis request. The service makes a best-effort attempt to render the specified text for the * prompt; it does not validate that the language of the text matches the language of the model. * *

Adding a prompt is an asynchronous operation. Although it accepts less audio than speaker * enrollment, the service must align the audio with the provided text. The time that it takes to * process a prompt depends on the prompt itself. The processing time for a reasonably sized * prompt generally matches the length of the audio (for example, it takes 20 seconds to process a * 20-second prompt). * *

For shorter prompts, you can wait for a reasonable amount of time and then check the status * of the prompt with the [Get a custom prompt](#getcustomprompt) method. For longer prompts, * consider using that method to poll the service every few seconds to determine when the prompt * becomes available. No prompt can be used for speech synthesis if it is in the `processing` or * `failed` state. Only prompts that are in the `available` state can be used for speech * synthesis. * *

When it processes a request, the service attempts to align the text and the audio that are * provided for the prompt. The text that is passed with a prompt must match the spoken audio as * closely as possible. Optimally, the text and audio match exactly. The service does its best to * align the specified text with the audio, and it can often compensate for mismatches between the * two. But if the service cannot effectively align the text and the audio, possibly because the * magnitude of mismatches between the two is too great, processing of the prompt fails. * *

### Evaluating a prompt * *

Always listen to and evaluate a prompt to determine its quality before using it in * production. To evaluate a prompt, include only the single prompt in a speech synthesis request * by using the following SSML extension, in this case for a prompt whose ID is `goodbye`: * *

`<ibm:prompt id="goodbye"/>` * *

In some cases, you might need to rerecord and resubmit a prompt as many as five times to * address the following possible problems: * The service might fail to detect a mismatch between * the prompt’s text and audio. The longer the prompt, the greater the chance for misalignment * between its text and audio. Therefore, multiple shorter prompts are preferable to a single long * prompt. * The text of a prompt might include a word that the service does not recognize. In * this case, you can create a custom word and pronunciation pair to tell the service how to * pronounce the word. You must then re-create the prompt. * The quality of the input audio might * be insufficient or the service’s processing of the audio might fail to detect the intended * prosody. Submitting new audio for the prompt can correct these issues. * *

If a prompt that is created without a speaker ID does not adequately reflect the intended * prosody, enrolling the speaker and providing a speaker ID for the prompt is one recommended * means of potentially improving the quality of the prompt. This is especially important for * shorter prompts such as "good-bye" or "thank you," where less audio data makes it more * difficult to match the prosody of the speaker. Custom prompts are supported only for use with * US English custom models and voices. * *

**See also:** * [Add a custom * prompt](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-create#tbe-create-add-prompt) * * [Evaluate a custom * prompt](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-create#tbe-create-evaluate-prompt) * * [Rules for creating custom * prompts](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-rules#tbe-rules-prompts). * * @param addCustomPromptOptions the {@link AddCustomPromptOptions} containing the options for the * call * @return a {@link ServiceCall} with a result of type {@link Prompt} */ public ServiceCall addCustomPrompt(AddCustomPromptOptions addCustomPromptOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( addCustomPromptOptions, "addCustomPromptOptions cannot be null"); Map pathParamsMap = new HashMap(); pathParamsMap.put("customization_id", addCustomPromptOptions.customizationId()); pathParamsMap.put("prompt_id", addCustomPromptOptions.promptId()); RequestBuilder builder = RequestBuilder.post( RequestBuilder.resolveRequestUrl( getServiceUrl(), "/v1/customizations/{customization_id}/prompts/{prompt_id}", pathParamsMap)); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "addCustomPrompt"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); MultipartBody.Builder multipartBuilder = new MultipartBody.Builder(); multipartBuilder.setType(MultipartBody.FORM); multipartBuilder.addFormDataPart("metadata", addCustomPromptOptions.metadata().toString()); okhttp3.RequestBody fileBody = RequestUtils.inputStreamBody(addCustomPromptOptions.file(), "audio/wav"); multipartBuilder.addFormDataPart("file", "filename", fileBody); builder.body(multipartBuilder.build()); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Get a custom prompt. * *

Gets information about a specified custom prompt for a specified custom model. The * information includes the prompt ID, prompt text, status, and optional speaker ID for each * prompt of the custom model. You must use credentials for the instance of the service that owns * the custom model. Custom prompts are supported only for use with US English custom models and * voices. * *

**See also:** [Listing custom * prompts](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-custom-prompts#tbe-custom-prompts-list). * * @param getCustomPromptOptions the {@link GetCustomPromptOptions} containing the options for the * call * @return a {@link ServiceCall} with a result of type {@link Prompt} */ public ServiceCall getCustomPrompt(GetCustomPromptOptions getCustomPromptOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( getCustomPromptOptions, "getCustomPromptOptions cannot be null"); Map pathParamsMap = new HashMap(); pathParamsMap.put("customization_id", getCustomPromptOptions.customizationId()); pathParamsMap.put("prompt_id", getCustomPromptOptions.promptId()); RequestBuilder builder = RequestBuilder.get( RequestBuilder.resolveRequestUrl( getServiceUrl(), "/v1/customizations/{customization_id}/prompts/{prompt_id}", pathParamsMap)); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "getCustomPrompt"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Delete a custom prompt. * *

Deletes an existing custom prompt from a custom model. The service deletes the prompt with * the specified ID. You must use credentials for the instance of the service that owns the custom * model from which the prompt is to be deleted. * *

**Caution:** Deleting a custom prompt elicits a 400 response code from synthesis requests * that attempt to use the prompt. Make sure that you do not attempt to use a deleted prompt in a * production application. Custom prompts are supported only for use with US English custom models * and voices. * *

**See also:** [Deleting a custom * prompt](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-custom-prompts#tbe-custom-prompts-delete). * * @param deleteCustomPromptOptions the {@link DeleteCustomPromptOptions} containing the options * for the call * @return a {@link ServiceCall} with a void result */ public ServiceCall deleteCustomPrompt(DeleteCustomPromptOptions deleteCustomPromptOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( deleteCustomPromptOptions, "deleteCustomPromptOptions cannot be null"); Map pathParamsMap = new HashMap(); pathParamsMap.put("customization_id", deleteCustomPromptOptions.customizationId()); pathParamsMap.put("prompt_id", deleteCustomPromptOptions.promptId()); RequestBuilder builder = RequestBuilder.delete( RequestBuilder.resolveRequestUrl( getServiceUrl(), "/v1/customizations/{customization_id}/prompts/{prompt_id}", pathParamsMap)); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "deleteCustomPrompt"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * List speaker models. * *

Lists information about all speaker models that are defined for a service instance. The * information includes the speaker ID and speaker name of each defined speaker. You must use * credentials for the instance of a service to list its speakers. Speaker models and the custom * prompts with which they are used are supported only for use with US English custom models and * voices. * *

**See also:** [Listing speaker * models](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-speaker-models#tbe-speaker-models-list). * * @param listSpeakerModelsOptions the {@link ListSpeakerModelsOptions} containing the options for * the call * @return a {@link ServiceCall} with a result of type {@link Speakers} */ public ServiceCall listSpeakerModels( ListSpeakerModelsOptions listSpeakerModelsOptions) { RequestBuilder builder = RequestBuilder.get(RequestBuilder.resolveRequestUrl(getServiceUrl(), "/v1/speakers")); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "listSpeakerModels"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * List speaker models. * *

Lists information about all speaker models that are defined for a service instance. The * information includes the speaker ID and speaker name of each defined speaker. You must use * credentials for the instance of a service to list its speakers. Speaker models and the custom * prompts with which they are used are supported only for use with US English custom models and * voices. * *

**See also:** [Listing speaker * models](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-speaker-models#tbe-speaker-models-list). * * @return a {@link ServiceCall} with a result of type {@link Speakers} */ public ServiceCall listSpeakerModels() { return listSpeakerModels(null); } /** * Create a speaker model. * *

Creates a new speaker model, which is an optional enrollment token for users who are to add * prompts to custom models. A speaker model contains information about a user's voice. The * service extracts this information from a WAV audio sample that you pass as the body of the * request. Associating a speaker model with a prompt is optional, but the information that is * extracted from the speaker model helps the service learn about the speaker's voice. * *

A speaker model can make an appreciable difference in the quality of prompts, especially * short prompts with relatively little audio, that are associated with that speaker. A speaker * model can help the service produce a prompt with more confidence; the lack of a speaker model * can potentially compromise the quality of a prompt. * *

The gender of the speaker who creates a speaker model does not need to match the gender of a * voice that is used with prompts that are associated with that speaker model. For example, a * speaker model that is created by a male speaker can be associated with prompts that are spoken * by female voices. * *

You create a speaker model for a given instance of the service. The new speaker model is * owned by the service instance whose credentials are used to create it. That same speaker can * then be used to create prompts for all custom models within that service instance. No language * is associated with a speaker model, but each custom model has a single specified language. You * can add prompts only to US English models. * *

You specify a name for the speaker when you create it. The name must be unique among all * speaker names for the owning service instance. To re-create a speaker model for an existing * speaker name, you must first delete the existing speaker model that has that name. * *

Speaker enrollment is a synchronous operation. Although it accepts more audio data than a * prompt, the process of adding a speaker is very fast. The service simply extracts information * about the speaker’s voice from the audio. Unlike prompts, speaker models neither need nor * accept a transcription of the audio. When the call returns, the audio is fully processed and * the speaker enrollment is complete. * *

The service returns a speaker ID with the request. A speaker ID is globally unique * identifier (GUID) that you use to identify the speaker in subsequent requests to the service. * Speaker models and the custom prompts with which they are used are supported only for use with * US English custom models and voices. * *

**See also:** * [Create a speaker * model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-create#tbe-create-speaker-model) * * [Rules for creating speaker * models](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-rules#tbe-rules-speakers). * * @param createSpeakerModelOptions the {@link CreateSpeakerModelOptions} containing the options * for the call * @return a {@link ServiceCall} with a result of type {@link SpeakerModel} */ public ServiceCall createSpeakerModel( CreateSpeakerModelOptions createSpeakerModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( createSpeakerModelOptions, "createSpeakerModelOptions cannot be null"); RequestBuilder builder = RequestBuilder.post(RequestBuilder.resolveRequestUrl(getServiceUrl(), "/v1/speakers")); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "createSpeakerModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); builder.query("speaker_name", String.valueOf(createSpeakerModelOptions.speakerName())); builder.bodyContent(createSpeakerModelOptions.audio(), "audio/wav"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Get a speaker model. * *

Gets information about all prompts that are defined by a specified speaker for all custom * models that are owned by a service instance. The information is grouped by the customization * IDs of the custom models. For each custom model, the information lists information about each * prompt that is defined for that custom model by the speaker. You must use credentials for the * instance of the service that owns a speaker model to list its prompts. Speaker models and the * custom prompts with which they are used are supported only for use with US English custom * models and voices. * *

**See also:** [Listing the custom prompts for a speaker * model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-speaker-models#tbe-speaker-models-list-prompts). * * @param getSpeakerModelOptions the {@link GetSpeakerModelOptions} containing the options for the * call * @return a {@link ServiceCall} with a result of type {@link SpeakerCustomModels} */ public ServiceCall getSpeakerModel( GetSpeakerModelOptions getSpeakerModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( getSpeakerModelOptions, "getSpeakerModelOptions cannot be null"); Map pathParamsMap = new HashMap(); pathParamsMap.put("speaker_id", getSpeakerModelOptions.speakerId()); RequestBuilder builder = RequestBuilder.get( RequestBuilder.resolveRequestUrl( getServiceUrl(), "/v1/speakers/{speaker_id}", pathParamsMap)); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "getSpeakerModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.header("Accept", "application/json"); ResponseConverter responseConverter = ResponseConverterUtils.getValue( new com.google.gson.reflect.TypeToken() {}.getType()); return createServiceCall(builder.build(), responseConverter); } /** * Delete a speaker model. * *

Deletes an existing speaker model from the service instance. The service deletes the * enrolled speaker with the specified speaker ID. You must use credentials for the instance of * the service that owns a speaker model to delete the speaker. * *

Any prompts that are associated with the deleted speaker are not affected by the speaker's * deletion. The prosodic data that defines the quality of a prompt is established when the prompt * is created. A prompt is static and remains unaffected by deletion of its associated speaker. * However, the prompt cannot be resubmitted or updated with its original speaker once that * speaker is deleted. Speaker models and the custom prompts with which they are used are * supported only for use with US English custom models and voices. * *

**See also:** [Deleting a speaker * model](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-tbe-speaker-models#tbe-speaker-models-delete). * * @param deleteSpeakerModelOptions the {@link DeleteSpeakerModelOptions} containing the options * for the call * @return a {@link ServiceCall} with a void result */ public ServiceCall deleteSpeakerModel(DeleteSpeakerModelOptions deleteSpeakerModelOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( deleteSpeakerModelOptions, "deleteSpeakerModelOptions cannot be null"); Map pathParamsMap = new HashMap(); pathParamsMap.put("speaker_id", deleteSpeakerModelOptions.speakerId()); RequestBuilder builder = RequestBuilder.delete( RequestBuilder.resolveRequestUrl( getServiceUrl(), "/v1/speakers/{speaker_id}", pathParamsMap)); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "deleteSpeakerModel"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } /** * Delete labeled data. * *

Deletes all data that is associated with a specified customer ID. The method deletes all * data for the customer ID, regardless of the method by which the information was added. The * method has no effect if no data is associated with the customer ID. You must issue the request * with credentials for the same instance of the service that was used to associate the customer * ID with the data. You associate a customer ID with data by passing the `X-Watson-Metadata` * header with a request that passes the data. * *

**Note:** If you delete an instance of the service from the service console, all data * associated with that service instance is automatically deleted. This includes all custom models * and word/translation pairs, and all data related to speech synthesis requests. * *

**See also:** [Information * security](https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-information-security#information-security). * * @param deleteUserDataOptions the {@link DeleteUserDataOptions} containing the options for the * call * @return a {@link ServiceCall} with a void result */ public ServiceCall deleteUserData(DeleteUserDataOptions deleteUserDataOptions) { com.ibm.cloud.sdk.core.util.Validator.notNull( deleteUserDataOptions, "deleteUserDataOptions cannot be null"); RequestBuilder builder = RequestBuilder.delete(RequestBuilder.resolveRequestUrl(getServiceUrl(), "/v1/user_data")); Map sdkHeaders = SdkCommon.getSdkHeaders("text_to_speech", "v1", "deleteUserData"); for (Entry header : sdkHeaders.entrySet()) { builder.header(header.getKey(), header.getValue()); } builder.query("customer_id", String.valueOf(deleteUserDataOptions.customerId())); ResponseConverter responseConverter = ResponseConverterUtils.getVoid(); return createServiceCall(builder.build(), responseConverter); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy