io.quarkiverse.langchain4j.watsonx.runtime.config.ChatModelConfig Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of quarkus-langchain4j-watsonx Show documentation
There is a newer version: 0.21.0
Show newest version
package io.quarkiverse.langchain4j.watsonx.runtime.config;

import java.util.List;
import java.util.Optional;

import io.quarkus.runtime.annotations.ConfigDocDefault;
import io.quarkus.runtime.annotations.ConfigGroup;
import io.smallrye.config.WithDefault;

@ConfigGroup
public interface ChatModelConfig {

    /**
     * Represents the strategy used for picking the tokens during generation of the output text. During text generation when
     * parameter
     * value is set to greedy, each successive token corresponds to the highest probability token given the text
     * that has
     * already been generated. This strategy can lead to repetitive results especially for longer output sequences. The
     * alternative
     * sample strategy generates text by picking subsequent tokens based on the probability distribution of
     * possible next
     * tokens defined by (i.e., conditioned on) the already-generated text and the top_k and top_p
     * parameters.
     * 
     * Allowable values: [sample,greedy]
     */
    @WithDefault("greedy")
    String decodingMethod();

    /**
     * It can be used to exponentially increase the likelihood of the text generation terminating once a specified number of
     * tokens
     * have been generated.
     */
    LengthPenaltyConfig lengthPenalty();

    /**
     * The maximum number of new tokens to be generated. The maximum supported value for this field depends on the model being
     * used.
     * How the "token" is defined depends on the tokenizer and vocabulary size, which in turn depends on the model. Often the
     * tokens
     * are a mix of full words and sub-words. Depending on the users plan, and on the model being used, there may be an enforced
     * maximum number of new tokens.
     * 

     * Possible values: ≥ 0
     */
    @WithDefault("200")
    Integer maxNewTokens();

    /**
     * If stop sequences are given, they are ignored until minimum tokens are generated.
     * 

     * Possible values: ≥ 0
     */
    @WithDefault("0")
    Integer minNewTokens();

    /**
     * Random number generator seed to use in sampling mode for experimental repeatability.
     * 

     * Possible values: ≥ 1
     */
    Optional randomSeed();

    /**
     * Stop sequences are one or more strings which will cause the text generation to stop if/when they are produced as part of
     * the
     * output. Stop sequences encountered prior to the minimum number of tokens being generated will be ignored.
     * 

     * Possible values: 0 ≤ number of items ≤ 6, contains only unique items
     */
    Optional> stopSequences();

    /**
     * A value used to modify the next-token probabilities in sampling mode. Values less than 1.0
     * sharpen
     * the probability distribution, resulting in "less random" output. Values greater than 1.0 flatten the
     * probability
     * distribution, resulting in "more random" output. A value of 1.0 has no effect.
     * 

     * Possible values: 0 ≤ value ≤ 2
     */
    @WithDefault("1.0")
    Double temperature();

    /**
     * The number of highest probability vocabulary tokens to keep for top-k-filtering. Only applies for sampling
     * mode.
     * When decoding_strategy is set to sample, only the top_k most likely tokens are considered as
     * candidates for the next generated token.
     * 

     * Possible values: 1 ≤ value ≤ 100
     */
    Optional topK();

    /**
     * Similar to top_k except the candidates to generate the next token are the most likely tokens with
     * probabilities
     * that add up to at least top_p. Also known as nucleus sampling. A value of 1.0 is equivalent to
     * disabled.
     * 

     * Possible values: 0 < value ≤ 1
     */
    Optional topP();

    /**
     * Represents the penalty for penalizing tokens that have already been generated or belong to the context. The value
     * 1.0 means that there is no penalty.
     * 

     * Possible values: 1 ≤ value ≤ 2
     */
    Optional repetitionPenalty();

    /**
     * Represents the maximum number of input tokens accepted. This can be used to avoid requests failing due to input being
     * longer
     * than configured limits. If the text is truncated, then it truncates the start of the input (on the left), so the end of
     * the
     * input will remain the same. If this value exceeds the maximum sequence length (refer to the documentation to find this
     * value
     * for the model) then the call will fail if the total number of tokens exceeds the maximum sequence length. Zero means
     * don't
     * truncate.
     * 

     * Possible values: ≥ 0
     */
    Optional truncateInputTokens();

    /**
     * Pass false to omit matched stop sequences from the end of the output text. The default is true,
     * meaning that the output will end with the stop sequence text when matched.
     */
    Optional includeStopSequence();

    /**
     * Whether chat model requests should be logged.
     */
    @ConfigDocDefault("false")
    @WithDefault("${quarkus.langchain4j.watsonx.log-requests}")
    Optional logRequests();

    /**
     * Whether chat model responses should be logged.
     */
    @ConfigDocDefault("false")
    @WithDefault("${quarkus.langchain4j.watsonx.log-responses}")
    Optional logResponses();

    /**
     * Delimiter used to concatenate the ChatMessage elements into a single string. By setting this property, you can define
     * your
     * preferred way of concatenating messages to ensure that the prompt is structured in the correct way.
     */
    @WithDefault("\n")
    String promptJoiner();

    @ConfigGroup
    public interface LengthPenaltyConfig {

        /**
         * Represents the factor of exponential decay. Larger values correspond to more aggressive decay.
         * 

         * Possible values: > 1
         */
        Optional decayFactor();

        /**
         * A number of generated tokens after which this should take effect.
         * 
         * Possible values: ≥ 0
         */
        Optional startIndex();
    }
}