com.composum.ai.backend.base.service.chat.GPTChatCompletionService Maven / Gradle / Ivy

Go to download
package com.composum.ai.backend.base.service.chat;

import java.util.List;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import com.composum.ai.backend.base.service.GPTException;

/**
 * Raw abstraction of the ChatGPT chat interface, with only the details that are needed.
 * 
 * This does deliberately not use the OpenAI API classes because we want to be able to switch to a different API implementation, and hide their complexity from the rest of the code. If we need special parameters,
 * we will add new methods with more specific function.
 */
public interface GPTChatCompletionService {

    /**
     * The simplest case: give some messages and get a single response.
     * If the response can be more than a few words, do consider using {@link #streamingChatCompletion(GPTChatRequest, GPTCompletionCallback)} instead,
     * to give the user some feedback while waiting.
     */
    @Nullable
    String getSingleChatCompletion(@Nonnull GPTChatRequest request) throws GPTException;

    /**
     * Give some messages and receive the streaming response via callback, to reduce waiting time.
     * It possibly waits if a rate limit is reached, but otherwise returns immediately after scheduling an asynchronous call.
     */
    void streamingChatCompletion(@Nonnull GPTChatRequest request, @Nonnull GPTCompletionCallback callback) throws GPTException;

    /**
     * Give some messages and receive the streaming response via callback, to reduce waiting time.
     * This implementation also performs tool calls if tools are given in {@link GPTChatRequest#getConfiguration()}.
     * It possibly waits if a rate limit is reached, but otherwise returns immediately after scheduling an asynchronous call.
     */
    void streamingChatCompletionWithToolCalls(@Nonnull GPTChatRequest request, @Nonnull GPTCompletionCallback callback) throws GPTException;

    /**
     * Retrieves a (usually cached) chat template with that name. Mostly for backend internal use.
     * The templates are retrieved from the bundle resources at "chattemplates/", and are cached.
     *
     * @param templateName the name of the template to retrieve, e.g. "singleTranslation" .
     */
    @Nonnull
    GPTChatMessagesTemplate getTemplate(@Nonnull String templateName) throws GPTException;

    /**
     * Helper method to shorten texts by taking out the middle if too long.
     * In texts longer than this many tokens we replace the middle with " ... (truncated) ... " since ChatGPT can only
     * process a limited number of words / tokens and in the introduction or summary there is probably the most
     * condensed information about the text. The output has then maxTokens tokens, including the ... marker.
     *
     * @param text      the text to shorten
     * @param maxTokens the maximum number of tokens in the output
     */
    @Nonnull
    String shorten(@Nullable String text, int maxTokens) throws GPTException;

    /**
     * Helper for preprocessing HTML so that it can easily read by ChatGPT.
     */
    @Nonnull
    String htmlToMarkdown(@Nullable String html);

    /**
     * Opposite of {@link #htmlToMarkdown(String)}.
     */
    String markdownToHtml(String markdown);

    /**
     * Counts the number of tokens for the text for the normally used model. Caution: message boundaries need some tokens
     * and slicing text might create a token or two, too, so do not exactly rely on that.
     */
    int countTokens(@Nullable String text);

    /**
     * Whether ChatGPT completion is enabled. If not, calling the methods that access ChatGPT throws an IllegalStateException.
     */
    boolean isEnabled();

    /**
     * Checks whether {@link #isEnabled()} and whether gptConfig enables executing GPT calls.
     * (That is currently whether there is an api key either globally or in the gptConfig).
     */
    boolean isEnabled(GPTConfiguration gptConfig);

    /**
     * Returns true if vision is enabled.
     */
    boolean isVisionEnabled();

    /** Calculates embeddings for the given list of texts. */
    @Nonnull
    List getEmbeddings(List texts, GPTConfiguration configuration) throws GPTException;

    /** Returns the model used for {@link #getEmbeddings(List, GPTConfiguration)}. */
    String getEmbeddingsModel();

}