All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.codemodder.plugins.llm.Tokens Maven / Gradle / Ivy

There is a newer version: 0.97.6
Show newest version
package io.codemodder.plugins.llm;

import com.knuddels.jtokkit.Encodings;
import com.knuddels.jtokkit.api.Encoding;
import com.knuddels.jtokkit.api.EncodingRegistry;
import com.knuddels.jtokkit.api.EncodingType;
import java.util.List;

/** A set of utilities around LLM tokens. */
public final class Tokens {

  private Tokens() {}

  /**
   * Estimates the number of tokens the messages will consume.
   *
   * 

This does not yet support estimating the number of tokens the functions will consume, since * the unofficial * solutions are brittle. * *

We should be able to replace this with {@code TikTokensUtil.tokens} when the feature is released. * * @param messages The messages. * @param tokensPerMessage The number of tokens consumed per message by the given model. * @param encodingType The encoding type used by the model. * @return The number of tokens. * @see How * to count tokens with tiktoken */ public static int countTokens( final List messages, final int tokensPerMessage, final EncodingType encodingType) { EncodingRegistry registry = Encodings.newDefaultEncodingRegistry(); Encoding encoding = registry.getEncoding(encodingType); int count = 0; for (var message : messages) { count += tokensPerMessage; count += encoding.countTokens(message); } count += tokensPerMessage; // Every reply is primed with <|start|>assistant<|message|>. return count; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy