io.quarkiverse.langchain4j.vertexai.runtime.gemini.config.ChatModelConfig Maven / Gradle / Ivy
package io.quarkiverse.langchain4j.vertexai.runtime.gemini.config;
import java.time.Duration;
import java.util.Optional;
import java.util.OptionalDouble;
import java.util.OptionalInt;
import io.quarkus.runtime.annotations.ConfigDocDefault;
import io.quarkus.runtime.annotations.ConfigGroup;
import io.smallrye.config.WithDefault;
@ConfigGroup
public interface ChatModelConfig {
/**
* The id of the model to use
*/
@WithDefault("gemini-pro")
String modelId();
/**
* The temperature is used for sampling during response generation, which occurs when topP and topK are applied.
* Temperature controls the degree of randomness in token selection. Lower temperatures are good for prompts that require a
* less open-ended or creative response, while higher temperatures can lead to more diverse or creative results.
* A temperature of 0 means that the highest probability tokens are always selected. In this case, responses for a given
* prompt are mostly deterministic, but a small amount of variation is still possible.
*
* If the model returns a response that's too generic, too short, or the model gives a fallback response, try increasing the
* temperature.
*
* Range for gemini-1.0-pro-001
*
* Range for gemini-1.0-pro-002, gemini-1.5-pro: 0.0 - 2.0
*
* Default for gemini-1.5-pro and gemini-1.0-pro-002: 1.0
*
* Default for gemini-1.0-pro-001: 0.9
*/
@WithDefault("0.0")
OptionalDouble temperature();
/**
* Maximum number of tokens that can be generated in the response. A token is approximately four characters. 100 tokens
* correspond to roughly 60-80 words.
* Specify a lower value for shorter responses and a higher value for potentially longer responses.
*/
@WithDefault("8192")
Integer maxOutputTokens();
/**
* Top-P changes how the model selects tokens for output. Tokens are selected from the most (see top-K) to least probable
* until the sum of their probabilities equals the top-P value.
* For example, if tokens A, B, and C have a probability of 0.3, 0.2, and 0.1 and the top-P value is 0.5, then the model
* will select either A or B as the next token by using temperature and excludes C as a candidate.
*
* Specify a lower value for less random responses and a higher value for more random responses.
*
* Range: 0.0 - 1.0
*
* gemini-1.0-pro and gemini-1.5-pro don't support topK
*/
OptionalDouble topP();
/**
* Top-K changes how the model selects tokens for output. A top-K of 1 means the next selected token is the most probable
* among all tokens in the model's vocabulary (also called greedy decoding),
* while a top-K of 3 means that the next token is selected from among the three most probable tokens by using temperature.
*
* For each token selection step, the top-K tokens with the highest probabilities are sampled. Then tokens are further
* filtered based on top-P with the final token selected using temperature sampling.
*
* Specify a lower value for less random responses and a higher value for more random responses.
*
* Range: 1-40
*
* Default for gemini-1.5-pro: 0.94
*
* Default for gemini-1.0-pro: 1
*/
OptionalInt topK();
/**
* Whether chat model requests should be logged
*/
@ConfigDocDefault("false")
Optional logRequests();
/**
* Whether chat model responses should be logged
*/
@ConfigDocDefault("false")
Optional logResponses();
/**
* Global timeout for requests to gemini APIs
*/
@ConfigDocDefault("10s")
@WithDefault("${quarkus.langchain4j.vertexai.gemini.timeout}")
Optional timeout();
}