com.alibaba.nls.client.protocol.tts.FlowingSpeechSynthesizer Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of nls-sdk-tts Show documentation
There is a newer version: 2.2.16
/*
 * Copyright 2015 Alibaba Group Holding Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.nls.client.protocol.tts;

import com.alibaba.nls.client.protocol.*;
import com.alibaba.nls.client.transport.Connection;
import com.alibaba.nls.client.util.IdGen;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.HashMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;

import static com.alibaba.nls.client.protocol.SpeechReqProtocol.State.*;

/**
 * @author lengjiayi.ljy
 * @date 2024/01/02
 *
 * 实时语音合成器，用于设置及发送合成请求,处理合成结果回调
 * 非线程安全
 */
public class FlowingSpeechSynthesizer extends SpeechReqProtocol {
    static Logger logger = LoggerFactory.getLogger(FlowingSpeechSynthesizer.class);
    protected FlowingSpeechSynthesizerListener StreamTTSListener;
    private CountDownLatch completeLatch;
    private CountDownLatch readyLatch;
    protected long lastSendTime=-1;
    protected String currentSessionId;
    protected long minSendIntervalMS = 100;
    private ResettableCountdown countdown = new ResettableCountdown(new Runnable() {
        @Override
        public void run() {
            markFail();
        }
    });

    private static final String DEFAULT_FORMAT = "pcm";
    /**
     * 如果没有设置sampleRate,默认为16000
     */
    private static final Integer DEFAULT_SAMPLE_RATE = 16000;

    private static final Integer DEFAULT_VOICE_VOLUME = 50;

    public FlowingSpeechSynthesizer(NlsClient client, FlowingSpeechSynthesizerListener listener) throws Exception {
        this.conn = client.connect(listener);
        afterConnection(listener);
    }
    public FlowingSpeechSynthesizer(NlsClient client, String token, FlowingSpeechSynthesizerListener listener) throws Exception {
        Connection conn = client.connect(token, listener);
        this.conn = conn;
        afterConnection(listener);
    }
    public FlowingSpeechSynthesizerListener getStreamTTSListener() {
        return this.StreamTTSListener;
    }
    public String getCurrentSessionId() {
        return this.currentSessionId;
    }
    protected void afterConnection(FlowingSpeechSynthesizerListener listener) {
        payload = new HashMap();
        header.put(Constant.PROP_NAMESPACE, TTSConstant.VALUE_NAMESPACE_STREAM_REALTIME_TTS);
        header.put(Constant.PROP_NAME, TTSConstant.VALUE_NAME_TTS_START);
        payload.put(TTSConstant.PROP_TTS_FORMAT, DEFAULT_FORMAT);
        payload.put(TTSConstant.PROP_TTS_SAMPLE_RATE, DEFAULT_SAMPLE_RATE);
        payload.put(TTSConstant.PROP_TTS_VOLUME, DEFAULT_VOICE_VOLUME);
        listener.setFlowingSpeechSynthesizer(this);
        StreamTTSListener = listener;
        state = STATE_CONNECTED;
    }
    /**
     * 发音人
     *
     * @param voice
     */
    public void setVoice(String voice) {
        payload.put(TTSConstant.PROP_TTS_VOICE, voice);
    }
    /**
     * 合成语音的编码格式,支持的格式：pcm，wav，mp3 默认是pcm
     *
     * @param format
     */
    public void setFormat(OutputFormatEnum format) {
        payload.put(Constant.PROP_ASR_FORMAT, format.getName());
    }
    /**
     * 合成语音的采样率,可选默认16000
     *
     * @param sampleRate
     */
    public void setSampleRate(SampleRateEnum sampleRate) {
        payload.put(TTSConstant.PROP_TTS_SAMPLE_RATE, sampleRate.value);
    }

    /**
     * 个别场景需要设置除'SampleRateEnum'之外的采样率,如32000,采用此方法进行设置
     * @param sampleRate
     */
    public void setSampleRate(int sampleRate) {
        payload.put(TTSConstant.PROP_TTS_SAMPLE_RATE, sampleRate);
    }

    /**
     * 音量，范围是0~100，可选，默认50
     *
     * @param volume
     */
    public void setVolume(int volume) {
        payload.put(TTSConstant.PROP_TTS_VOLUME, volume);
    }

    /**
     * 语速，范围是-500~500，可选，默认是0
     *
     * @param speechRate
     */
    public void setSpeechRate(int speechRate) {
        payload.put(TTSConstant.PROP_TTS_SPEECH_RATE, speechRate);
    }

    /**
     * 语调，范围是-500~500，可选，默认是0
     *
     * @param pitchRate
     */
    public void setPitchRate(int pitchRate) {
        payload.put(TTSConstant.PROP_TTS_PITCH_RATE, pitchRate);
    }

    /**
     * 设置连续两次发送文本（调用send函数）的最小时间间隔（毫秒）
     * @param sendInterval 单位毫秒
     */
    public void setMinSendIntervalMS(long sendInterval) {
        this.minSendIntervalMS = sendInterval;
    }

    /**
     * 发送文本。
     * 如果当前调用send时距离上次调用时间小于minSendIntervalMS，则会阻塞并等待直到满足条件再发送文本
     * @param text  需要合成的文本
     */
    public void send(String text) {
        long msNeedToSleep = minSendIntervalMS - (System.currentTimeMillis() - lastSendTime);
        if (lastSendTime != -1 && msNeedToSleep > 0) {
            logger.info("too short send interval, sleep {} million second", msNeedToSleep);
            try {
                Thread.sleep(msNeedToSleep);
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
        }
        state.checkSend();
        try {
            SpeechReqProtocol req = new SpeechReqProtocol();
            req.header.put(Constant.PROP_TASK_ID, currentTaskId);
            req.header.put(Constant.PROP_NAMESPACE, TTSConstant.VALUE_NAMESPACE_STREAM_REALTIME_TTS);
            req.header.put(Constant.PROP_NAME, TTSConstant.VALUE_NAME_RUN_SYNTHESIS);
            req.setAppKey(getAppKey());
            req.payload = new HashMap();
            req.payload.put(TTSConstant.PROP_TTS_TEXT, text);
            conn.sendText(req.serialize());
            lastSendTime = System.currentTimeMillis();
        } catch (Exception e) {
            logger.error("fail to send text, current_task_id:{},state:{}", currentTaskId, state, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * 服务端准备好了进行语音合成
     */
    void markSynthesisReady() {
        state = STATE_REQUEST_CONFIRMED;
        if (readyLatch != null) {
            readyLatch.countDown();
        }
    }

    /**
     * 服务端停止了语音转写
     */
    void markSynthesisComplete() {
        state = STATE_COMPLETE;
        if (completeLatch != null) {
            completeLatch.countDown();
        }
    }

    /**
     * 服务端返回错误
     */
    void markFail() {
        state = STATE_FAIL;
        if (readyLatch != null) {
            readyLatch.countDown();
        }
        if (completeLatch != null) {
            completeLatch.countDown();
        }
    }

    /**
     * 内部调用方法
     */
    void markClosed() {
        state = STATE_CLOSED;
        if (readyLatch != null) {
            readyLatch.countDown();
        }
        if (completeLatch != null) {
            completeLatch.countDown();
        }
    }

    /**
     * 开始语音转写：发送语音转写请求，同步接收服务端确认
     *
     * @throws Exception
     */
    @Override
    public void start() throws Exception {
        start(Constant.DEFAULT_START_TIMEOUT_MILLISECONDS);
    }

    /**
     * 开始语音合成：发送语音合成请求，同步接收服务端确认, 超时未返回则抛出异常
     *
     * @throws Exception
     */
    public void start(long milliSeconds) throws Exception {
        String sessionId = IdGen.genId();
        logger.info("start, gen session id: {}", sessionId);
        currentSessionId = sessionId;
        header.put(TTSConstant.PROP_SESSION_ID, currentSessionId);
        super.start();
        completeLatch = new CountDownLatch(1);
        readyLatch = new CountDownLatch(1);
        boolean result = readyLatch.await(milliSeconds, TimeUnit.MILLISECONDS);
        if (!result) {
            String msg = String.format("timeout after %d ms waiting for start confirmation.task_id:%s,state:%s",
                    milliSeconds, currentTaskId, state);
            logger.error(msg);
            throw new Exception(msg);
        }
    }

    /**
     * 重置stop函数中的10s倒计时器
     */
    public void reSetCountdown() {
        countdown.resetCountdown();
    }

    /**
     * 结束语音合成: 发送结束识别通知，接收服务端确认
     *
     * @throws Exception
     */


    /**
     * 结束语音合成: 发送结束识别通知，阻塞接收服务端确认, 超时10s未收到任何信息则抛出异常
     * @throws Exception
     */
    public void stop() throws Exception {
        state.checkStop();
        SpeechReqProtocol req = new SpeechReqProtocol();
        req.header.put(Constant.PROP_TASK_ID, currentTaskId);
        req.header.put(Constant.PROP_NAMESPACE, TTSConstant.VALUE_NAMESPACE_STREAM_REALTIME_TTS);
        req.header.put(Constant.PROP_NAME, TTSConstant.VALUE_NAME_TTS_STOP);
        req.setAppKey(getAppKey());
        conn.sendText(req.serialize());
        state = STATE_STOP_SENT;
        //start count down 10s
        countdown.startCountdown();
        completeLatch.await();
        countdown.stopCountdown();
        if (state == STATE_FAIL) {
            String msg = String.format("timeout after %d ms waiting for complete confirmation.task_id:%s,state:%s",
                    10000, currentTaskId, state);
            logger.error(msg);
            throw new Exception(msg);
        }
    }

    /**
     * 关闭连接
     */
    public void close() {
        conn.close();
    }

}