All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.oracle.bmc.aispeech.realtimespeech.RealtimeSpeechClient Maven / Gradle / Ivy

/**
 * Copyright (c) 2016, 2024, Oracle and/or its affiliates.  All rights reserved.
 * This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
 */
package com.oracle.bmc.aispeech.realtimespeech;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ser.FilterProvider;
import com.fasterxml.jackson.databind.ser.impl.SimpleBeanPropertyFilter;
import com.fasterxml.jackson.databind.ser.impl.SimpleFilterProvider;
import com.oracle.bmc.aispeech.model.RealtimeMessage;
import com.oracle.bmc.aispeech.model.RealtimeMessageAckAudio;
import com.oracle.bmc.aispeech.model.RealtimeMessageAuthenticationCredentials;
import com.oracle.bmc.aispeech.model.RealtimeMessageConnect;
import com.oracle.bmc.aispeech.model.RealtimeMessageError;
import com.oracle.bmc.aispeech.model.RealtimeMessageResult;
import com.oracle.bmc.aispeech.model.RealtimeMessageSendFinalResult;
import com.oracle.bmc.aispeech.model.RealtimeParameters;
import com.oracle.bmc.auth.BasicAuthenticationDetailsProvider;
import com.oracle.bmc.http.signing.DefaultRequestSigner;
import com.oracle.bmc.http.signing.RequestSigner;
import com.oracle.bmc.serialization.jackson.JacksonSerializer;
import com.oracle.bmc.util.VisibleForTesting;
import com.oracle.bmc.util.internal.StringUtils;
import org.eclipse.jetty.websocket.api.Session;
import org.eclipse.jetty.websocket.api.annotations.OnWebSocketClose;
import org.eclipse.jetty.websocket.api.annotations.OnWebSocketConnect;
import org.eclipse.jetty.websocket.api.annotations.OnWebSocketError;
import org.eclipse.jetty.websocket.api.annotations.OnWebSocketMessage;
import org.eclipse.jetty.websocket.api.annotations.WebSocket;
import org.eclipse.jetty.websocket.client.ClientUpgradeRequest;
import org.eclipse.jetty.websocket.client.WebSocketClient;

import java.io.IOException;
import java.net.ConnectException;
import java.net.URI;
import java.net.URLEncoder;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;

/**
 * This is the client to be used for connecting to the realtime speech service. Note that this
 * contains an instance of the RealtimeSpeechClientListener class which will be called on the
 * appropriate event (CONNECT, RESULT, etc). Make sure you use the right combination of the
 * endpoint, compartmentId, and authenticationDetailsProvider to use this client.
 */
@WebSocket
@SuppressWarnings({"PMD.AvoidCatchingGenericException", "PMD.AvoidThrowingRawExceptionTypes"})
public class RealtimeSpeechClient {

    private final String compartmentId;
    private WebSocketClient webSocketClient;
    private Session session;
    private boolean isConnected;
    private RealtimeSpeechClientListener listener;
    private BasicAuthenticationDetailsProvider authenticationDetailsProvider;
    private URI destUri;
    private Boolean isClosureClientInitiated = false;

    private Status status;

    private static final org.slf4j.Logger LOG =
            org.slf4j.LoggerFactory.getLogger(RealtimeSpeechClient.class);

    private final FilterProvider filters =
            new SimpleFilterProvider()
                    .setFailOnUnknownId(false)
                    .addFilter("explicitlySetFilter", SimpleBeanPropertyFilter.serializeAll());

    private final ObjectMapper objectMapper =
            JacksonSerializer.getDefaultObjectMapper().setFilterProvider(filters);

    /**
     * Constructor.
     *
     * @param listener for the RealtimeSpeechClientListener
     * @param authenticationDetailsProvider for the BasicAuthenticationDetailsProvider
     * @param compartmentId for the compartmentId
     * @param webSocketClient for the WebsocketClient
     */
    public RealtimeSpeechClient(
            RealtimeSpeechClientListener listener,
            BasicAuthenticationDetailsProvider authenticationDetailsProvider,
            String compartmentId,
            WebSocketClient webSocketClient) {

        this.isConnected = false;
        this.listener = listener;
        this.authenticationDetailsProvider = authenticationDetailsProvider;
        this.compartmentId = compartmentId;
        this.webSocketClient = webSocketClient;
    }

    @com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "")
    public static class Builder {
        RealtimeSpeechClientListener listener;

        public Builder listener(RealtimeSpeechClientListener listener) {
            this.listener = listener;
            return this;
        }

        BasicAuthenticationDetailsProvider authenticationDetailsProvider;

        public Builder authenticationDetailsProvider(
                BasicAuthenticationDetailsProvider authenticationDetailsProvider) {
            this.authenticationDetailsProvider = authenticationDetailsProvider;
            return this;
        }

        WebSocketClient webSocketClient;

        public Builder webSocketClient(WebSocketClient webSocketClient) {
            this.webSocketClient = webSocketClient;
            return this;
        }

        String compartmentId;

        public Builder compartmentId(String compartmentId) {
            this.compartmentId = compartmentId;
            return this;
        }

        public RealtimeSpeechClient build() {
            if (StringUtils.isBlank(compartmentId)) {
                throw new IllegalStateException("Please initialize with a non-null compartmentId");
            }

            if (listener == null) {
                throw new IllegalStateException("Please initialize the listener parameter");
            }

            if (authenticationDetailsProvider == null) {
                throw new IllegalStateException(
                        "Please initialize the authentication details provider");
            }

            if (webSocketClient == null) {
                throw new IllegalStateException("Please initialize the websocket client");
            }

            return new RealtimeSpeechClient(
                    listener, authenticationDetailsProvider, compartmentId, webSocketClient);
        }
    }

    public static Builder builder() {
        return new Builder();
    }

    /**
     * the onClose event handler.
     *
     * @param statusCode the status code sent from remote
     * @param reason the close reason sent from remote
     */
    @OnWebSocketClose
    public void onClose(int statusCode, String reason) {
        LOG.info(
                "Session closed by {} : reason = {}, status code = {}",
                isClosureClientInitiated ? "client" : "server",
                reason,
                statusCode);

        isConnected = false;
        status = Status.DISCONNECTED;

        // The listener can implement their own closing logic
        this.listener.onClose(statusCode, reason);

        this.session = null;
        this.webSocketClient = null;
        this.listener = null;
    }

    /**
     * the openError event handler.
     *
     * @param error the error throwable sent from remote
     */
    @OnWebSocketError
    public void onError(Throwable error) {
        LOG.info("Error: {}", error.getMessage());
        isConnected = false;
        this.session = null;

        this.status = Status.ERROR;

        // Pass the exception down to the listener.
        if (listener != null) {
            listener.onError(error);
        }
    }

    /**
     * the onConnect event handler.
     *
     * @param session the session that got connected
     */
    @OnWebSocketConnect
    public void onConnect(Session session) {
        LOG.info("Connect: {}", session.getRemoteAddress().getAddress());
        synchronized (this) {
            this.session = session;
        }

        // We need to decide if we want to send tokens or credentials in the client
        // initialization
        sendCreds(compartmentId);

        status = Status.CONNECTED;

        isConnected = true;
        if (listener != null) {
            listener.onConnect();
        }
    }

    /**
     * The onMessage event handler.
     *
     * @param message the message sent from remote string of server
     * @throws JsonProcessingException if errors happens on processing json response
     */
    @OnWebSocketMessage
    public void onMessage(String message) throws JsonProcessingException {
        if (listener == null) {
            return;
        }

        try {
            final RealtimeMessage realtimeMessage =
                    objectMapper.readValue(message, RealtimeMessage.class);
            if (realtimeMessage instanceof RealtimeMessageAckAudio) {
                listener.onAckMessage((RealtimeMessageAckAudio) realtimeMessage);
            } else if (realtimeMessage instanceof RealtimeMessageConnect) {
                listener.onConnectMessage((RealtimeMessageConnect) realtimeMessage);
            } else if (realtimeMessage instanceof RealtimeMessageResult) {
                listener.onResult((RealtimeMessageResult) realtimeMessage);
            } else if (realtimeMessage instanceof RealtimeMessageError) {
                final RealtimeMessageError errorMessage = (RealtimeMessageError) realtimeMessage;
                LOG.error(
                        "Received RealtimeMessageError with message {}", errorMessage.getMessage());
                listener.onError(new ConnectException(errorMessage.getMessage()));
            }
        } catch (JsonProcessingException e) {
            LOG.error("Text Message: JsonProcessingException {}", e);
        }
    }

    /**
     * Opens a connection to the specified remote.
     *
     * @param realtimeSpeechEndpoint the URL string of server
     * @param parameters other additional connection parameters
     * @throws RealtimeSpeechConnectException If there are connection errors
     */
    public void open(String realtimeSpeechEndpoint, RealtimeParameters parameters)
            throws RealtimeSpeechConnectException {
        try {
            status = Status.CONNECTING;

            final String customizationsJson =
                    objectMapper.writeValueAsString(parameters.getCustomizations());
            String queryParameter = "";
            if (parameters.getIsAckEnabled() != null) {
                queryParameter +=
                        "isAckEnabled=" + (parameters.getIsAckEnabled() ? "true" : "false") + "&";
            }
            if (parameters.getShouldIgnoreInvalidCustomizations() != null) {
                queryParameter +=
                        "shouldIgnoreInvalidCustomizations="
                                + (parameters.getShouldIgnoreInvalidCustomizations()
                                        ? "true"
                                        : "false")
                                + "&";
            }
            if (parameters.getPartialSilenceThresholdInMs() != null) {
                queryParameter +=
                        "partialSilenceThresholdInMs="
                                + parameters.getPartialSilenceThresholdInMs()
                                + "&";
            }
            if (parameters.getFinalSilenceThresholdInMs() != null) {
                queryParameter +=
                        "finalSilenceThresholdInMs="
                                + parameters.getFinalSilenceThresholdInMs()
                                + "&";
            }
            if (parameters.getStabilizePartialResults() != null) {
                queryParameter +=
                        "stabilizePartialResults="
                                + parameters.getStabilizePartialResults().getValue()
                                + "&";
            }
            if (parameters.getLanguageCode() != null) {
                queryParameter += "languageCode=" + parameters.getLanguageCode() + "&";
            }
            if (parameters.getModelDomain() != null) {
                queryParameter += "modelDomain=" + parameters.getModelDomain().getValue() + "&";
            }
            if (parameters.getCustomizations() != null
                    && !parameters.getCustomizations().isEmpty()) {
                queryParameter +=
                        "customizations=" + URLEncoder.encode(customizationsJson, "UTF-8");
            }
            if (queryParameter.length() > 0
                    && queryParameter.charAt(queryParameter.length() - 1) == '&') {
                queryParameter = queryParameter.substring(0, queryParameter.length() - 1);
            }
            // The server should contain ws or wss
            destUri =
                    new URI(
                            realtimeSpeechEndpoint
                                    + "/ws/transcribe/stream?"
                                    + queryParameter); // TODO

            LOG.info("Connecting to {} \n", destUri);

            final ClientUpgradeRequest request = new ClientUpgradeRequest();
            LOG.info("Content-Type: {}", parameters.getEncoding());
            request.setHeader("Content-Type", parameters.getEncoding());

            if (!webSocketClient.isStarted()) {
                LOG.info("Client not started, starting it now");
                webSocketClient.start();
            }

            this.session =
                    webSocketClient.connect(this, destUri, request).get(10, TimeUnit.SECONDS);
        } catch (Exception e) {
            status = Status.ERROR;
            LOG.error("Open connection exception {}", e);
            throw new RealtimeSpeechConnectException(e);
        }
    }

    /**
     * Checks the connection status.
     *
     * @return true if connected
     */
    public boolean isConnected() {
        return isConnected;
    }

    /**
     * Sends the audio data of bytes to remote.
     *
     * @param audioBytes represeting the audio data
     * @throws RealtimeSpeechConnectException If there are errors while sending audio data
     */
    public void sendAudioData(byte[] audioBytes) throws RealtimeSpeechConnectException {
        if (this.session == null || this.status.equals(Status.DISCONNECTED)) {
            LOG.error("Session has been closed, cannot send audio anymore");
            throw new RealtimeSpeechConnectException(
                    "Session has been closed, cannot send audio anymore");
        } else {
            try {
                if (this.status.equals(Status.CONNECTED)) {
                    this.session.getRemote().sendBytes(ByteBuffer.wrap(audioBytes));

                } else {
                    this.status = Status.ERROR;
                    throw new RealtimeSpeechConnectException(
                            "Could not send audio data to the realtime speech service");
                }
            } catch (IOException e) {
                this.status = Status.ERROR;
                LOG.error("Send exception {}", e);
                throw new RealtimeSpeechConnectException(e);
            }
        }
    }

    /** Closes the connection. */
    public void close() {
        isClosureClientInitiated = true;
        LOG.info("Closing SDK connection");

        try {
            if (this.session != null) {
                LOG.info(
                        "Here are the sessions shared by the client: {}",
                        webSocketClient.getOpenSessions().stream().count());
                this.session.close(1000, "Session Closed by Client");
                // this.session = null;
            }
        } catch (Exception e) {
            LOG.warn("Encountered an issue while closing the session: {}", e.getMessage());
        }

        this.isConnected = false;
        this.authenticationDetailsProvider = null;
    }

    private void sendCreds(String compartmentId) {

        final RequestSigner requestSigner =
                getRequestSignerFromAuthenticationDetailsProvider(authenticationDetailsProvider);
        LOG.info("Sending credentials");
        final Map> headers = new HashMap<>();
        final Map newHeaders =
                requestSigner.signRequest(destUri, "GET", headers, null);
        newHeaders.put("uri", destUri.toString());

        final RealtimeMessageAuthenticationCredentials authenticationMessage =
                RealtimeMessageAuthenticationCredentials.builder()
                        .compartmentId(compartmentId)
                        .headers(newHeaders)
                        .build();

        try {
            sendMessage(objectMapper.writeValueAsString(authenticationMessage));
        } catch (JsonProcessingException e) {
            LOG.info("Could not serialize authentication credentials: {}", e);
            // TODO: Add better exceptions
            this.status = Status.ERROR;
        }

        LOG.info("Credentials sent");
    }

    public void sendMessage(String message) {
        try {
            session.getRemote().sendString(message);
        } catch (IOException e) {
            LOG.info("Could not send message to the remote server: {}", e);
            // TODO: Add better exceptions
            this.status = Status.ERROR;
        }
    }
    /**
     * Use this to request the final result without waiting for the timeout if you need to close the
     * session immediately
     */
    public void requestFinalResult() {
        try {
            String message =
                    objectMapper.writeValueAsString(
                            RealtimeMessageSendFinalResult.builder().build());
            LOG.info("Requesting final result: " + message);
            sendMessage(message);
        } catch (IOException e) {
            LOG.info("Could not request final result from the remote server: {}", e);
            this.status = Status.ERROR;
        }
    }

    @VisibleForTesting
    protected RequestSigner getRequestSignerFromAuthenticationDetailsProvider(
            BasicAuthenticationDetailsProvider authenticationDetailsProvider) {
        return DefaultRequestSigner.createRequestSigner(authenticationDetailsProvider);
    }

    public Status getStatus() {
        return status;
    }

    public static enum Status {
        CONNECTED, // When the connection is active
        CONNECTING, // When the connection is being initiated
        DISCONNECTED, // Connection closed by client/server
        ERROR // Something went wrong while connecting
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy