io.quarkiverse.langchain4j.watsonx.runtime.config.ChatModelConfig Maven / Gradle / Ivy
package io.quarkiverse.langchain4j.watsonx.runtime.config;
import java.util.List;
import java.util.Optional;
import io.quarkus.runtime.annotations.ConfigDocDefault;
import io.quarkus.runtime.annotations.ConfigGroup;
import io.smallrye.config.WithDefault;
@ConfigGroup
public interface ChatModelConfig {
/**
* Represents the strategy used for picking the tokens during generation of the output text. During text generation when
* parameter
* value is set to greedy
, each successive token corresponds to the highest probability token given the text
* that has
* already been generated. This strategy can lead to repetitive results especially for longer output sequences. The
* alternative
* sample
strategy generates text by picking subsequent tokens based on the probability distribution of
* possible next
* tokens defined by (i.e., conditioned on) the already-generated text and the top_k
and top_p
* parameters.
*
* Allowable values: [sample,greedy]
*/
@WithDefault("greedy")
String decodingMethod();
/**
* It can be used to exponentially increase the likelihood of the text generation terminating once a specified number of
* tokens
* have been generated.
*/
LengthPenaltyConfig lengthPenalty();
/**
* The maximum number of new tokens to be generated. The maximum supported value for this field depends on the model being
* used.
* How the "token" is defined depends on the tokenizer and vocabulary size, which in turn depends on the model. Often the
* tokens
* are a mix of full words and sub-words. Depending on the users plan, and on the model being used, there may be an enforced
* maximum number of new tokens.
*
* Possible values: ≥ 0
*/
@WithDefault("200")
Integer maxNewTokens();
/**
* If stop sequences are given, they are ignored until minimum tokens are generated.
*
* Possible values: ≥ 0
*/
@WithDefault("0")
Integer minNewTokens();
/**
* Random number generator seed to use in sampling mode for experimental repeatability.
*
* Possible values: ≥ 1
*/
Optional randomSeed();
/**
* Stop sequences are one or more strings which will cause the text generation to stop if/when they are produced as part of
* the
* output. Stop sequences encountered prior to the minimum number of tokens being generated will be ignored.
*
* Possible values: 0 ≤ number of items ≤ 6, contains only unique items
*/
Optional> stopSequences();
/**
* A value used to modify the next-token probabilities in sampling
mode. Values less than 1.0
* sharpen
* the probability distribution, resulting in "less random" output. Values greater than 1.0
flatten the
* probability
* distribution, resulting in "more random" output. A value of 1.0
has no effect.
*
* Possible values: 0 ≤ value ≤ 2
*/
@WithDefault("1.0")
Double temperature();
/**
* The number of highest probability vocabulary tokens to keep for top-k-filtering. Only applies for sampling
* mode.
* When decoding_strategy is set to sample
, only the top_k
most likely tokens are considered as
* candidates for the next generated token.
*
* Possible values: 1 ≤ value ≤ 100
*/
Optional topK();
/**
* Similar to top_k
except the candidates to generate the next token are the most likely tokens with
* probabilities
* that add up to at least top_p
. Also known as nucleus sampling. A value of 1.0
is equivalent to
* disabled.
*
* Possible values: 0 < value ≤ 1
*/
Optional topP();
/**
* Represents the penalty for penalizing tokens that have already been generated or belong to the context. The value
* 1.0
means that there is no penalty.
*
* Possible values: 1 ≤ value ≤ 2
*/
Optional repetitionPenalty();
/**
* Represents the maximum number of input tokens accepted. This can be used to avoid requests failing due to input being
* longer
* than configured limits. If the text is truncated, then it truncates the start of the input (on the left), so the end of
* the
* input will remain the same. If this value exceeds the maximum sequence length (refer to the documentation to find this
* value
* for the model) then the call will fail if the total number of tokens exceeds the maximum sequence length. Zero means
* don't
* truncate.
*
* Possible values: ≥ 0
*/
Optional truncateInputTokens();
/**
* Pass false
to omit matched stop sequences from the end of the output text. The default is true
,
* meaning that the output will end with the stop sequence text when matched.
*/
Optional includeStopSequence();
/**
* Whether chat model requests should be logged.
*/
@ConfigDocDefault("false")
@WithDefault("${quarkus.langchain4j.watsonx.log-requests}")
Optional logRequests();
/**
* Whether chat model responses should be logged.
*/
@ConfigDocDefault("false")
@WithDefault("${quarkus.langchain4j.watsonx.log-responses}")
Optional logResponses();
/**
* Delimiter used to concatenate the ChatMessage elements into a single string. By setting this property, you can define
* your
* preferred way of concatenating messages to ensure that the prompt is structured in the correct way.
*/
@WithDefault("\n")
String promptJoiner();
@ConfigGroup
public interface LengthPenaltyConfig {
/**
* Represents the factor of exponential decay. Larger values correspond to more aggressive decay.
*
* Possible values: > 1
*/
Optional decayFactor();
/**
* A number of generated tokens after which this should take effect.
*
* Possible values: ≥ 0
*/
Optional startIndex();
}
}