All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.alibaba.dashscope.audio.tts.SpeechSynthesisParam Maven / Gradle / Ivy

package com.alibaba.dashscope.audio.tts;

import static com.alibaba.dashscope.utils.ApiKeywords.TEXT;

import com.alibaba.dashscope.common.*;
import com.alibaba.dashscope.protocol.Request;
import com.alibaba.dashscope.utils.JsonUtils;
import com.google.gson.JsonObject;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.SuperBuilder;

@EqualsAndHashCode(callSuper = true)
@Data
@SuperBuilder
public class SpeechSynthesisParam extends Param {

  /** The input text. */
  private String text;

  /** Input text type. */
  @Builder.Default private SpeechSynthesisTextType textType = SpeechSynthesisTextType.PLAIN_TEXT;

  /** synthesis audio format. */
  @Builder.Default private SpeechSynthesisAudioFormat format = SpeechSynthesisAudioFormat.WAV;

  /** synthesis audio sample rate. */
  @Builder.Default private int sampleRate = 16000;

  /** synthesis audio volume. */
  @Builder.Default private int volume = 50;

  /** synthesis audio speed. */
  @Builder.Default private float rate = 1.0f;

  /** synthesis audio pitch. */
  @Builder.Default private float pitch = 1.0f;

  /** enable word level timestamp. */
  @Builder.Default private boolean enableWordTimestamp = false;

  /** enable phoneme level timestamp. */
  @Builder.Default private boolean enablePhonemeTimestamp = false;

  private JsonObject toParameters() {
    JsonObject parameters = new JsonObject();
    parameters.addProperty(SpeechSynthesisApiKeywords.TEXT_TYPE, getTextType().getValue());
    parameters.addProperty(SpeechSynthesisApiKeywords.FORMAT, getFormat().getValue());
    parameters.addProperty(SpeechSynthesisApiKeywords.SAMPLE_RATE, getSampleRate());
    parameters.addProperty(SpeechSynthesisApiKeywords.VOLUME, getVolume());
    parameters.addProperty(SpeechSynthesisApiKeywords.SPEECH_RATE, getRate());
    parameters.addProperty(SpeechSynthesisApiKeywords.PITCH_RATE, getPitch());
    parameters.addProperty(SpeechSynthesisApiKeywords.WORD_TIMESTAMP, isEnableWordTimestamp());
    parameters.addProperty(
        SpeechSynthesisApiKeywords.PHONEME_TIMESTAMP, isEnablePhonemeTimestamp());
    return parameters;
  }

  @Override
  public String url() {
    return String.format(
        "/services/%s/%s/%s?request_id=%s",
        TaskGroup.AUDIO.getValue(),
        Task.TEXT_TO_SPEECH.getValue(),
        Function.SPEECH_SYNTHESIZER.getValue(),
        this.getRequestId());
  }

  @Override
  public Request toRequest(Protocol protocol) {
    JsonObject jsonObject = new JsonObject();
    jsonObject.addProperty(TEXT, getText());
    SpeechSynthesisMessagePayload payload =
        SpeechSynthesisMessagePayload.buildInputPayload(getModel(), jsonObject);
    payload.setParameters(toParameters());
    return Request.builder().message(JsonUtils.toJson(payload)).build();
  }

  @Override
  public Class resultType() {
    return SpeechSynthesisResult.class;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy