All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fi.evolver.ai.spring.JtokkitTokenizer Maven / Gradle / Ivy

package fi.evolver.ai.spring;

import com.knuddels.jtokkit.Encodings;
import com.knuddels.jtokkit.api.Encoding;
import com.knuddels.jtokkit.api.EncodingRegistry;
import com.knuddels.jtokkit.api.EncodingType;

public record JtokkitTokenizer(Encoding encoding) implements Tokenizer {
	private static final EncodingRegistry REGISTRY = Encodings.newDefaultEncodingRegistry();

	@Override
	public int[] tokenize(String text) {
		return encoding.encode(text).toArray();
	}

	@Override
	public int countTokens(String text) {
		return encoding.countTokens(text);
	}



	public static JtokkitTokenizer of(EncodingType encodingType) {
		return new JtokkitTokenizer(REGISTRY.getEncoding(encodingType));
	}


	@Override
	public final String toString() {
		return encoding.getName();
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy