fi.evolver.ai.spring.JtokkitTokenizer Maven / Gradle / Ivy
package fi.evolver.ai.spring;
import com.knuddels.jtokkit.Encodings;
import com.knuddels.jtokkit.api.Encoding;
import com.knuddels.jtokkit.api.EncodingRegistry;
import com.knuddels.jtokkit.api.EncodingType;
public record JtokkitTokenizer(Encoding encoding) implements Tokenizer {
private static final EncodingRegistry REGISTRY = Encodings.newDefaultEncodingRegistry();
@Override
public int[] tokenize(String text) {
return encoding.encode(text).toArray();
}
@Override
public int countTokens(String text) {
return encoding.countTokens(text);
}
public static JtokkitTokenizer of(EncodingType encodingType) {
return new JtokkitTokenizer(REGISTRY.getEncoding(encodingType));
}
@Override
public final String toString() {
return encoding.getName();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy