All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.quarkiverse.langchain4j.watsonx.runtime.config.ChatModelConfig Maven / Gradle / Ivy

There is a newer version: 0.21.0
Show newest version
package io.quarkiverse.langchain4j.watsonx.runtime.config;

import java.util.List;
import java.util.Optional;

import io.quarkus.runtime.annotations.ConfigDocDefault;
import io.quarkus.runtime.annotations.ConfigGroup;
import io.smallrye.config.WithDefault;

@ConfigGroup
public interface ChatModelConfig {

    /**
     * Represents the strategy used for picking the tokens during generation of the output text. During text generation when
     * parameter
     * value is set to greedy, each successive token corresponds to the highest probability token given the text
     * that has
     * already been generated. This strategy can lead to repetitive results especially for longer output sequences. The
     * alternative
     * sample strategy generates text by picking subsequent tokens based on the probability distribution of
     * possible next
     * tokens defined by (i.e., conditioned on) the already-generated text and the top_k and top_p
     * parameters.
     * 

* Allowable values: [sample,greedy] */ @WithDefault("greedy") String decodingMethod(); /** * It can be used to exponentially increase the likelihood of the text generation terminating once a specified number of * tokens * have been generated. */ LengthPenaltyConfig lengthPenalty(); /** * The maximum number of new tokens to be generated. The maximum supported value for this field depends on the model being * used. * How the "token" is defined depends on the tokenizer and vocabulary size, which in turn depends on the model. Often the * tokens * are a mix of full words and sub-words. Depending on the users plan, and on the model being used, there may be an enforced * maximum number of new tokens. *

* Possible values: ≥ 0 */ @WithDefault("200") Integer maxNewTokens(); /** * If stop sequences are given, they are ignored until minimum tokens are generated. *

* Possible values: ≥ 0 */ @WithDefault("0") Integer minNewTokens(); /** * Random number generator seed to use in sampling mode for experimental repeatability. *

* Possible values: ≥ 1 */ Optional randomSeed(); /** * Stop sequences are one or more strings which will cause the text generation to stop if/when they are produced as part of * the * output. Stop sequences encountered prior to the minimum number of tokens being generated will be ignored. *

* Possible values: 0 ≤ number of items ≤ 6, contains only unique items */ Optional> stopSequences(); /** * A value used to modify the next-token probabilities in sampling mode. Values less than 1.0 * sharpen * the probability distribution, resulting in "less random" output. Values greater than 1.0 flatten the * probability * distribution, resulting in "more random" output. A value of 1.0 has no effect. *

* Possible values: 0 ≤ value ≤ 2 */ @WithDefault("1.0") Double temperature(); /** * The number of highest probability vocabulary tokens to keep for top-k-filtering. Only applies for sampling * mode. * When decoding_strategy is set to sample, only the top_k most likely tokens are considered as * candidates for the next generated token. *

* Possible values: 1 ≤ value ≤ 100 */ Optional topK(); /** * Similar to top_k except the candidates to generate the next token are the most likely tokens with * probabilities * that add up to at least top_p. Also known as nucleus sampling. A value of 1.0 is equivalent to * disabled. *

* Possible values: 0 < value ≤ 1 */ Optional topP(); /** * Represents the penalty for penalizing tokens that have already been generated or belong to the context. The value * 1.0 means that there is no penalty. *

* Possible values: 1 ≤ value ≤ 2 */ Optional repetitionPenalty(); /** * Represents the maximum number of input tokens accepted. This can be used to avoid requests failing due to input being * longer * than configured limits. If the text is truncated, then it truncates the start of the input (on the left), so the end of * the * input will remain the same. If this value exceeds the maximum sequence length (refer to the documentation to find this * value * for the model) then the call will fail if the total number of tokens exceeds the maximum sequence length. Zero means * don't * truncate. *

* Possible values: ≥ 0 */ Optional truncateInputTokens(); /** * Pass false to omit matched stop sequences from the end of the output text. The default is true, * meaning that the output will end with the stop sequence text when matched. */ Optional includeStopSequence(); /** * Whether chat model requests should be logged. */ @ConfigDocDefault("false") @WithDefault("${quarkus.langchain4j.watsonx.log-requests}") Optional logRequests(); /** * Whether chat model responses should be logged. */ @ConfigDocDefault("false") @WithDefault("${quarkus.langchain4j.watsonx.log-responses}") Optional logResponses(); /** * Delimiter used to concatenate the ChatMessage elements into a single string. By setting this property, you can define * your * preferred way of concatenating messages to ensure that the prompt is structured in the correct way. */ @WithDefault("\n") String promptJoiner(); @ConfigGroup public interface LengthPenaltyConfig { /** * Represents the factor of exponential decay. Larger values correspond to more aggressive decay. *

* Possible values: > 1 */ Optional decayFactor(); /** * A number of generated tokens after which this should take effect. *

* Possible values: ≥ 0 */ Optional startIndex(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy