
com.amazonaws.services.bedrockagent.model.SemanticChunkingConfiguration Maven / Gradle / Ivy
/*
* Copyright 2019-2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with
* the License. A copy of the License is located at
*
* http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
* and limitations under the License.
*/
package com.amazonaws.services.bedrockagent.model;
import java.io.Serializable;
import javax.annotation.Generated;
import com.amazonaws.protocol.StructuredPojo;
import com.amazonaws.protocol.ProtocolMarshaller;
/**
*
* Settings for semantic document chunking for a data source. Semantic chunking splits a document into into smaller
* documents based on groups of similar content derived from the text with natural language processing.
*
*
* With semantic chunking, each sentence is compared to the next to determine how similar they are. You specify a
* threshold in the form of a percentile, where adjacent sentences that are less similar than that percentage of
* sentence pairs are divided into separate chunks. For example, if you set the threshold to 90, then the 10 percent of
* sentence pairs that are least similar are split. So if you have 101 sentences, 100 sentence pairs are compared, and
* the 10 with the least similarity are split, creating 11 chunks. These chunks are further split if they exceed the max
* token size.
*
*
* You must also specify a buffer size, which determines whether sentences are compared in isolation, or within a moving
* context window that includes the previous and following sentence. For example, if you set the buffer size to
* 1
, the embedding for sentence 10 is derived from sentences 9, 10, and 11 combined.
*
*
* @see AWS API Documentation
*/
@Generated("com.amazonaws:aws-java-sdk-code-generator")
public class SemanticChunkingConfiguration implements Serializable, Cloneable, StructuredPojo {
/**
*
* The dissimilarity threshold for splitting chunks.
*
*/
private Integer breakpointPercentileThreshold;
/**
*
* The buffer size.
*
*/
private Integer bufferSize;
/**
*
* The maximum number of tokens that a chunk can contain.
*
*/
private Integer maxTokens;
/**
*
* The dissimilarity threshold for splitting chunks.
*
*
* @param breakpointPercentileThreshold
* The dissimilarity threshold for splitting chunks.
*/
public void setBreakpointPercentileThreshold(Integer breakpointPercentileThreshold) {
this.breakpointPercentileThreshold = breakpointPercentileThreshold;
}
/**
*
* The dissimilarity threshold for splitting chunks.
*
*
* @return The dissimilarity threshold for splitting chunks.
*/
public Integer getBreakpointPercentileThreshold() {
return this.breakpointPercentileThreshold;
}
/**
*
* The dissimilarity threshold for splitting chunks.
*
*
* @param breakpointPercentileThreshold
* The dissimilarity threshold for splitting chunks.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public SemanticChunkingConfiguration withBreakpointPercentileThreshold(Integer breakpointPercentileThreshold) {
setBreakpointPercentileThreshold(breakpointPercentileThreshold);
return this;
}
/**
*
* The buffer size.
*
*
* @param bufferSize
* The buffer size.
*/
public void setBufferSize(Integer bufferSize) {
this.bufferSize = bufferSize;
}
/**
*
* The buffer size.
*
*
* @return The buffer size.
*/
public Integer getBufferSize() {
return this.bufferSize;
}
/**
*
* The buffer size.
*
*
* @param bufferSize
* The buffer size.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public SemanticChunkingConfiguration withBufferSize(Integer bufferSize) {
setBufferSize(bufferSize);
return this;
}
/**
*
* The maximum number of tokens that a chunk can contain.
*
*
* @param maxTokens
* The maximum number of tokens that a chunk can contain.
*/
public void setMaxTokens(Integer maxTokens) {
this.maxTokens = maxTokens;
}
/**
*
* The maximum number of tokens that a chunk can contain.
*
*
* @return The maximum number of tokens that a chunk can contain.
*/
public Integer getMaxTokens() {
return this.maxTokens;
}
/**
*
* The maximum number of tokens that a chunk can contain.
*
*
* @param maxTokens
* The maximum number of tokens that a chunk can contain.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public SemanticChunkingConfiguration withMaxTokens(Integer maxTokens) {
setMaxTokens(maxTokens);
return this;
}
/**
* Returns a string representation of this object. This is useful for testing and debugging. Sensitive data will be
* redacted from this string using a placeholder value.
*
* @return A string representation of this object.
*
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("{");
if (getBreakpointPercentileThreshold() != null)
sb.append("BreakpointPercentileThreshold: ").append(getBreakpointPercentileThreshold()).append(",");
if (getBufferSize() != null)
sb.append("BufferSize: ").append(getBufferSize()).append(",");
if (getMaxTokens() != null)
sb.append("MaxTokens: ").append(getMaxTokens());
sb.append("}");
return sb.toString();
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (obj instanceof SemanticChunkingConfiguration == false)
return false;
SemanticChunkingConfiguration other = (SemanticChunkingConfiguration) obj;
if (other.getBreakpointPercentileThreshold() == null ^ this.getBreakpointPercentileThreshold() == null)
return false;
if (other.getBreakpointPercentileThreshold() != null
&& other.getBreakpointPercentileThreshold().equals(this.getBreakpointPercentileThreshold()) == false)
return false;
if (other.getBufferSize() == null ^ this.getBufferSize() == null)
return false;
if (other.getBufferSize() != null && other.getBufferSize().equals(this.getBufferSize()) == false)
return false;
if (other.getMaxTokens() == null ^ this.getMaxTokens() == null)
return false;
if (other.getMaxTokens() != null && other.getMaxTokens().equals(this.getMaxTokens()) == false)
return false;
return true;
}
@Override
public int hashCode() {
final int prime = 31;
int hashCode = 1;
hashCode = prime * hashCode + ((getBreakpointPercentileThreshold() == null) ? 0 : getBreakpointPercentileThreshold().hashCode());
hashCode = prime * hashCode + ((getBufferSize() == null) ? 0 : getBufferSize().hashCode());
hashCode = prime * hashCode + ((getMaxTokens() == null) ? 0 : getMaxTokens().hashCode());
return hashCode;
}
@Override
public SemanticChunkingConfiguration clone() {
try {
return (SemanticChunkingConfiguration) super.clone();
} catch (CloneNotSupportedException e) {
throw new IllegalStateException("Got a CloneNotSupportedException from Object.clone() " + "even though we're Cloneable!", e);
}
}
@com.amazonaws.annotation.SdkInternalApi
@Override
public void marshall(ProtocolMarshaller protocolMarshaller) {
com.amazonaws.services.bedrockagent.model.transform.SemanticChunkingConfigurationMarshaller.getInstance().marshall(this, protocolMarshaller);
}
}