dev.langchain4j.model.openai.OpenAiStreamingChatModel Maven / Gradle / Ivy
The newest version!
package dev.langchain4j.model.openai;
import dev.ai4j.openai4j.OpenAiClient;
import dev.ai4j.openai4j.chat.ChatCompletionChoice;
import dev.ai4j.openai4j.chat.ChatCompletionRequest;
import dev.ai4j.openai4j.chat.ChatCompletionResponse;
import dev.ai4j.openai4j.chat.Delta;
import dev.ai4j.openai4j.chat.ResponseFormat;
import dev.ai4j.openai4j.chat.ResponseFormatType;
import dev.ai4j.openai4j.chat.StreamOptions;
import dev.langchain4j.agent.tool.ToolSpecification;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.model.StreamingResponseHandler;
import dev.langchain4j.model.Tokenizer;
import dev.langchain4j.model.chat.StreamingChatLanguageModel;
import dev.langchain4j.model.chat.TokenCountEstimator;
import dev.langchain4j.model.chat.listener.ChatModelErrorContext;
import dev.langchain4j.model.chat.listener.ChatModelListener;
import dev.langchain4j.model.chat.listener.ChatModelRequest;
import dev.langchain4j.model.chat.listener.ChatModelRequestContext;
import dev.langchain4j.model.chat.listener.ChatModelResponse;
import dev.langchain4j.model.chat.listener.ChatModelResponseContext;
import dev.langchain4j.model.openai.spi.OpenAiStreamingChatModelBuilderFactory;
import dev.langchain4j.model.output.Response;
import lombok.Builder;
import lombok.extern.slf4j.Slf4j;
import java.net.Proxy;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicReference;
import static dev.langchain4j.internal.Utils.getOrDefault;
import static dev.langchain4j.internal.Utils.isNullOrBlank;
import static dev.langchain4j.internal.Utils.isNullOrEmpty;
import static dev.langchain4j.model.openai.InternalOpenAiHelper.DEFAULT_USER_AGENT;
import static dev.langchain4j.model.openai.InternalOpenAiHelper.OPENAI_URL;
import static dev.langchain4j.model.openai.InternalOpenAiHelper.createModelListenerRequest;
import static dev.langchain4j.model.openai.InternalOpenAiHelper.createModelListenerResponse;
import static dev.langchain4j.model.openai.InternalOpenAiHelper.isOpenAiModel;
import static dev.langchain4j.model.openai.InternalOpenAiHelper.removeTokenUsage;
import static dev.langchain4j.model.openai.InternalOpenAiHelper.toOpenAiMessages;
import static dev.langchain4j.model.openai.InternalOpenAiHelper.toTools;
import static dev.langchain4j.model.openai.OpenAiModelName.GPT_3_5_TURBO;
import static dev.langchain4j.spi.ServiceHelper.loadFactories;
import static java.time.Duration.ofSeconds;
import static java.util.Collections.emptyList;
import static java.util.Collections.singletonList;
/**
* Represents an OpenAI language model with a chat completion interface, such as gpt-3.5-turbo and gpt-4.
* The model's response is streamed token by token and should be handled with {@link StreamingResponseHandler}.
* You can find description of parameters here.
*/
@Slf4j
public class OpenAiStreamingChatModel implements StreamingChatLanguageModel, TokenCountEstimator {
private final OpenAiClient client;
private final String modelName;
private final Double temperature;
private final Double topP;
private final List stop;
private final Integer maxTokens;
private final Double presencePenalty;
private final Double frequencyPenalty;
private final Map logitBias;
private final ResponseFormat responseFormat;
private final Integer seed;
private final String user;
private final Boolean strictTools;
private final Boolean parallelToolCalls;
private final Tokenizer tokenizer;
private final boolean isOpenAiModel;
private final List listeners;
@Builder
public OpenAiStreamingChatModel(String baseUrl,
String apiKey,
String organizationId,
String modelName,
Double temperature,
Double topP,
List stop,
Integer maxTokens,
Double presencePenalty,
Double frequencyPenalty,
Map logitBias,
String responseFormat,
Integer seed,
String user,
Boolean strictTools,
Boolean parallelToolCalls,
Duration timeout,
Proxy proxy,
Boolean logRequests,
Boolean logResponses,
Tokenizer tokenizer,
Map customHeaders,
List listeners) {
timeout = getOrDefault(timeout, ofSeconds(60));
this.client = OpenAiClient.builder()
.baseUrl(getOrDefault(baseUrl, OPENAI_URL))
.openAiApiKey(apiKey)
.organizationId(organizationId)
.callTimeout(timeout)
.connectTimeout(timeout)
.readTimeout(timeout)
.writeTimeout(timeout)
.proxy(proxy)
.logRequests(logRequests)
.logStreamingResponses(logResponses)
.userAgent(DEFAULT_USER_AGENT)
.customHeaders(customHeaders)
.build();
this.modelName = getOrDefault(modelName, GPT_3_5_TURBO);
this.temperature = getOrDefault(temperature, 0.7);
this.topP = topP;
this.stop = stop;
this.maxTokens = maxTokens;
this.presencePenalty = presencePenalty;
this.frequencyPenalty = frequencyPenalty;
this.logitBias = logitBias;
this.responseFormat = responseFormat == null ? null : ResponseFormat.builder()
.type(ResponseFormatType.valueOf(responseFormat.toUpperCase(Locale.ROOT)))
.build();
this.seed = seed;
this.user = user;
this.strictTools = getOrDefault(strictTools, false);
this.parallelToolCalls = parallelToolCalls;
this.tokenizer = getOrDefault(tokenizer, OpenAiTokenizer::new);
this.isOpenAiModel = isOpenAiModel(this.modelName);
this.listeners = listeners == null ? emptyList() : new ArrayList<>(listeners);
}
public String modelName() {
return modelName;
}
@Override
public void generate(List messages, StreamingResponseHandler handler) {
generate(messages, null, null, handler);
}
@Override
public void generate(List messages, List toolSpecifications, StreamingResponseHandler handler) {
generate(messages, toolSpecifications, null, handler);
}
@Override
public void generate(List messages, ToolSpecification toolSpecification, StreamingResponseHandler handler) {
generate(messages, null, toolSpecification, handler);
}
private void generate(List messages,
List toolSpecifications,
ToolSpecification toolThatMustBeExecuted,
StreamingResponseHandler handler
) {
ChatCompletionRequest.Builder requestBuilder = ChatCompletionRequest.builder()
.stream(true)
.streamOptions(StreamOptions.builder()
.includeUsage(true)
.build())
.model(modelName)
.messages(toOpenAiMessages(messages))
.temperature(temperature)
.topP(topP)
.stop(stop)
.maxTokens(maxTokens)
.presencePenalty(presencePenalty)
.frequencyPenalty(frequencyPenalty)
.logitBias(logitBias)
.responseFormat(responseFormat)
.seed(seed)
.user(user)
.parallelToolCalls(parallelToolCalls);
if (toolThatMustBeExecuted != null) {
requestBuilder.tools(toTools(singletonList(toolThatMustBeExecuted), strictTools));
requestBuilder.toolChoice(toolThatMustBeExecuted.name());
} else if (!isNullOrEmpty(toolSpecifications)) {
requestBuilder.tools(toTools(toolSpecifications, strictTools));
}
ChatCompletionRequest request = requestBuilder.build();
ChatModelRequest modelListenerRequest = createModelListenerRequest(request, messages, toolSpecifications);
Map