All Downloads are FREE. Search and download functionalities are using the official Maven repository.

commonMain.aws.sdk.kotlin.services.bedrockagent.model.SemanticChunkingConfiguration.kt Maven / Gradle / Ivy

// Code generated by smithy-kotlin-codegen. DO NOT EDIT!

package aws.sdk.kotlin.services.bedrockagent.model

import aws.smithy.kotlin.runtime.SdkDsl

/**
 * Settings for semantic document chunking for a data source. Semantic chunking splits a document into into smaller documents based on groups of similar content derived from the text with natural language processing.
 *
 * With semantic chunking, each sentence is compared to the next to determine how similar they are. You specify a threshold in the form of a percentile, where adjacent sentences that are less similar than that percentage of sentence pairs are divided into separate chunks. For example, if you set the threshold to 90, then the 10 percent of sentence pairs that are least similar are split. So if you have 101 sentences, 100 sentence pairs are compared, and the 10 with the least similarity are split, creating 11 chunks. These chunks are further split if they exceed the max token size.
 *
 * You must also specify a buffer size, which determines whether sentences are compared in isolation, or within a moving context window that includes the previous and following sentence. For example, if you set the buffer size to `1`, the embedding for sentence 10 is derived from sentences 9, 10, and 11 combined.
 */
public class SemanticChunkingConfiguration private constructor(builder: Builder) {
    /**
     * The dissimilarity threshold for splitting chunks.
     */
    public val breakpointPercentileThreshold: kotlin.Int = requireNotNull(builder.breakpointPercentileThreshold) { "A non-null value must be provided for breakpointPercentileThreshold" }
    /**
     * The buffer size.
     */
    public val bufferSize: kotlin.Int = requireNotNull(builder.bufferSize) { "A non-null value must be provided for bufferSize" }
    /**
     * The maximum number of tokens that a chunk can contain.
     */
    public val maxTokens: kotlin.Int = requireNotNull(builder.maxTokens) { "A non-null value must be provided for maxTokens" }

    public companion object {
        public operator fun invoke(block: Builder.() -> kotlin.Unit): aws.sdk.kotlin.services.bedrockagent.model.SemanticChunkingConfiguration = Builder().apply(block).build()
    }

    override fun toString(): kotlin.String = buildString {
        append("SemanticChunkingConfiguration(")
        append("breakpointPercentileThreshold=$breakpointPercentileThreshold,")
        append("bufferSize=$bufferSize,")
        append("maxTokens=$maxTokens")
        append(")")
    }

    override fun hashCode(): kotlin.Int {
        var result = breakpointPercentileThreshold
        result = 31 * result + (bufferSize)
        result = 31 * result + (maxTokens)
        return result
    }

    override fun equals(other: kotlin.Any?): kotlin.Boolean {
        if (this === other) return true
        if (other == null || this::class != other::class) return false

        other as SemanticChunkingConfiguration

        if (breakpointPercentileThreshold != other.breakpointPercentileThreshold) return false
        if (bufferSize != other.bufferSize) return false
        if (maxTokens != other.maxTokens) return false

        return true
    }

    public inline fun copy(block: Builder.() -> kotlin.Unit = {}): aws.sdk.kotlin.services.bedrockagent.model.SemanticChunkingConfiguration = Builder(this).apply(block).build()

    @SdkDsl
    public class Builder {
        /**
         * The dissimilarity threshold for splitting chunks.
         */
        public var breakpointPercentileThreshold: kotlin.Int? = null
        /**
         * The buffer size.
         */
        public var bufferSize: kotlin.Int? = null
        /**
         * The maximum number of tokens that a chunk can contain.
         */
        public var maxTokens: kotlin.Int? = null

        @PublishedApi
        internal constructor()
        @PublishedApi
        internal constructor(x: aws.sdk.kotlin.services.bedrockagent.model.SemanticChunkingConfiguration) : this() {
            this.breakpointPercentileThreshold = x.breakpointPercentileThreshold
            this.bufferSize = x.bufferSize
            this.maxTokens = x.maxTokens
        }

        @PublishedApi
        internal fun build(): aws.sdk.kotlin.services.bedrockagent.model.SemanticChunkingConfiguration = SemanticChunkingConfiguration(this)

        internal fun correctErrors(): Builder {
            if (breakpointPercentileThreshold == null) breakpointPercentileThreshold = 0
            if (bufferSize == null) bufferSize = 0
            if (maxTokens == null) maxTokens = 0
            return this
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy