commonMain.com.xebia.functional.tokenizer.TokenEncoder.kt Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of xef-tokenizer-jvm Show documentation
Show all versions of xef-tokenizer-jvm Show documentation
Building applications with LLMs through composability in Kotlin
The newest version!
package com.xebia.functional.tokenizer
/**
* Creates a new TokenEncoder with the given input map. The keys of the map are
* the decoded tokens and the values are the encoded tokens. The keyMapper is
* applied to the keys of the input map before they are added to the internal
* maps.
*
* @param input the input map
* @param keyMapper the key mapper
*/
internal fun TokenEncoder(
input: Map,
keyMapper: (T) -> K
): TokenEncoder {
val decodedToEncoded = mutableMapOf()
val encodedToDecoded = mutableMapOf()
for ((key1, value) in input) {
val key: K = keyMapper(key1)
decodedToEncoded[key] = value
encodedToDecoded[value] = key
}
return TokenEncoder(decodedToEncoded, encodedToDecoded)
}
/**
* Creates a new TokenEncoder with the given input map. The keys of the map are
* the decoded tokens and the values are the encoded tokens.
*
* @param input the input map
*/
internal fun TokenEncoder (
input: Map,
): TokenEncoder =
TokenEncoder(input) { it }
/**
* A TokenEncoder is used to encode and decode tokens. It is initialized with a map
* that contains the decoded tokens as keys and the encoded tokens as values. The
* TokenEncoder can then be used to encode and decode tokens.
*
* @param the type of the decoded tokens
* @param the type of the encoded tokens
*/
internal class TokenEncoder(
private val decodedToEncoded: MutableMap,
private val encodedToDecoded: MutableMap
) {
/**
* Checks if the given decoded token is contained in this encoder.
*
* @param decodedToken the decoded token
* @return true if the decoded token is contained in this encoder, false otherwise
*/
fun containsDecodedToken(decodedToken: K): Boolean =
decodedToEncoded.containsKey(decodedToken)
/**
* Encodes the given decoded token.
*
* @param decodedToken the decoded token
* @return the encoded token
* @throws IllegalArgumentException if the decoded token is not contained in this encoder
*/
fun encode(decodedToken: K): V =
requireNotNull(decodedToEncoded[decodedToken]) { "Unknown token for encoding: $decodedToken" }
/**
* Encodes the given decoded token if it is contained in this encoder. Otherwise,
* an empty optional is returned.
*
* @param decodedToken the decoded token
* @return the encoded token or an empty optional
*/
fun encodeIfPresent(decodedToken: K): V? =
decodedToEncoded[decodedToken]
/**
* Decodes the given encoded token if it is contained in this encoder. Otherwise,
* an empty optional is returned.
*
* @param encodedToken the encoded token
* @return the decoded token or an empty optional
*/
fun decodeIfPresent(encodedToken: V): K? =
encodedToDecoded[encodedToken]
val decodedTokens: Set = decodedToEncoded.keys
}