io.quarkiverse.langchain4j.ollama.OllamaRestApi Maven / Gradle / Ivy
package io.quarkiverse.langchain4j.ollama;
import static java.util.stream.Collectors.joining;
import static java.util.stream.StreamSupport.stream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import jakarta.ws.rs.Consumes;
import jakarta.ws.rs.POST;
import jakarta.ws.rs.Path;
import jakarta.ws.rs.Produces;
import jakarta.ws.rs.WebApplicationException;
import jakarta.ws.rs.core.HttpHeaders;
import jakarta.ws.rs.core.MediaType;
import jakarta.ws.rs.core.MultivaluedMap;
import jakarta.ws.rs.ext.ReaderInterceptor;
import jakarta.ws.rs.ext.ReaderInterceptorContext;
import jakarta.ws.rs.ext.WriterInterceptor;
import jakarta.ws.rs.ext.WriterInterceptorContext;
import org.eclipse.microprofile.rest.client.annotation.RegisterProvider;
import org.jboss.logging.Logger;
import org.jboss.resteasy.reactive.ClientWebApplicationException;
import org.jboss.resteasy.reactive.RestStreamElementType;
import org.jboss.resteasy.reactive.client.api.ClientLogger;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.ObjectMapper;
import io.quarkiverse.langchain4j.QuarkusJsonCodecFactory;
import io.quarkus.rest.client.reactive.jackson.ClientObjectMapper;
import io.smallrye.mutiny.Multi;
import io.vertx.core.Handler;
import io.vertx.core.MultiMap;
import io.vertx.core.Vertx;
import io.vertx.core.buffer.Buffer;
import io.vertx.core.http.HttpClientRequest;
import io.vertx.core.http.HttpClientResponse;
/**
* This Microprofile REST client is used as the building block of all the API calls to HuggingFace.
* The implementation is provided by the Reactive REST Client in Quarkus.
*/
@Path("")
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
@RegisterProvider(OllamaRestApi.OllamaRestApiReaderInterceptor.class)
@RegisterProvider(OllamaRestApi.OpenAiRestApiWriterInterceptor.class)
public interface OllamaRestApi {
@Path("/api/chat")
@POST
ChatResponse chat(ChatRequest request);
@Path("/api/chat")
@POST
@RestStreamElementType(MediaType.APPLICATION_JSON)
Multi streamingChat(ChatRequest request);
@Path("/api/embeddings")
@POST
EmbeddingResponse embeddings(EmbeddingRequest request);
@ClientObjectMapper
static ObjectMapper objectMapper(ObjectMapper defaultObjectMapper) {
return QuarkusJsonCodecFactory.SnakeCaseObjectMapperHolder.MAPPER;
}
/**
* This is needed because for some reason Vert.x is not giving us the entire content of the last chunk and this results
* in a json parsing exception.
*/
class OllamaRestApiReaderInterceptor implements ReaderInterceptor {
@Override
public Object aroundReadFrom(ReaderInterceptorContext context) throws IOException, WebApplicationException {
try {
return context.proceed();
} catch (ClientWebApplicationException e) {
if (e.getCause() instanceof JsonParseException) {
if (e.getResponse().getStatus() == 200) {
Object invokedMethod = context.getProperty("org.eclipse.microprofile.rest.client.invokedMethod");
if ((invokedMethod != null) && invokedMethod.toString().contains("OllamaRestApi.streamingChat")) {
InputStream is = context.getInputStream();
if (is instanceof ByteArrayInputStream bis) {
bis.reset();
String chunk = new String(bis.readAllBytes());
final var ctx = Vertx.currentContext();
if (ctx == null) {
throw e;
}
// This piece of code deals with is the case where a message from Ollama is not received as an entire line
// but in pieces (my guess is that it is a Vertx bug).
// There is nothing we can do in this case except for returning empty responses and in the meantime buffer the pieces
// by storing them in the Vertx Duplicated Context
String existingBuffer = ctx.getLocal("buffer");
if ((existingBuffer != null) && !existingBuffer.isEmpty()) {
if (chunk.endsWith("}")) {
ctx.putLocal("buffer", "");
String entireLine = existingBuffer + chunk;
return QuarkusJsonCodecFactory.SnakeCaseObjectMapperHolder.MAPPER.readValue(entireLine,
ChatResponse.class);
} else {
ctx.putLocal("buffer", existingBuffer + chunk);
return ChatResponse.emptyNotDone();
}
} else {
ctx.putLocal("buffer", chunk);
return ChatResponse.emptyNotDone();
}
}
}
}
}
throw e;
}
}
}
/**
* The point of this is to properly set the {@code stream} value of the request
* so users don't have to remember to set it manually
*/
class OpenAiRestApiWriterInterceptor implements WriterInterceptor {
@Override
public void aroundWriteTo(WriterInterceptorContext context) throws IOException, WebApplicationException {
Object entity = context.getEntity();
if (entity instanceof ChatRequest request) {
MultivaluedMap headers = context.getHeaders();
List
© 2015 - 2024 Weber Informatics LLC | Privacy Policy