io.github.cdklabs.generative_ai_cdk_constructs.bedrock.WebCrawlerDataSource Maven / Gradle / Ivy
package io.github.cdklabs.generative_ai_cdk_constructs.bedrock;
/**
* (experimental) Sets up a web crawler data source to be added to a knowledge base.
*/
@javax.annotation.Generated(value = "jsii-pacmak/1.103.1 (build bef2dea)", date = "2024-10-07T16:14:00.755Z")
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
@software.amazon.jsii.Jsii(module = io.github.cdklabs.generative_ai_cdk_constructs.$Module.class, fqn = "@cdklabs/generative-ai-cdk-constructs.bedrock.WebCrawlerDataSource")
public class WebCrawlerDataSource extends io.github.cdklabs.generative_ai_cdk_constructs.bedrock.DataSourceNew {
protected WebCrawlerDataSource(final software.amazon.jsii.JsiiObjectRef objRef) {
super(objRef);
}
protected WebCrawlerDataSource(final software.amazon.jsii.JsiiObject.InitializationMode initializationMode) {
super(initializationMode);
}
/**
* @param scope This parameter is required.
* @param id This parameter is required.
* @param props This parameter is required.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public WebCrawlerDataSource(final @org.jetbrains.annotations.NotNull software.constructs.Construct scope, final @org.jetbrains.annotations.NotNull java.lang.String id, final @org.jetbrains.annotations.NotNull io.github.cdklabs.generative_ai_cdk_constructs.bedrock.WebCrawlerDataSourceProps props) {
super(software.amazon.jsii.JsiiObject.InitializationMode.JSII);
software.amazon.jsii.JsiiEngine.getInstance().createNewObject(this, new Object[] { java.util.Objects.requireNonNull(scope, "scope is required"), java.util.Objects.requireNonNull(id, "id is required"), java.util.Objects.requireNonNull(props, "props is required") });
}
/**
* (experimental) The max rate at which pages are crawled.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public @org.jetbrains.annotations.NotNull java.lang.Number getCrawlingRate() {
return software.amazon.jsii.Kernel.get(this, "crawlingRate", software.amazon.jsii.NativeType.forClass(java.lang.Number.class));
}
/**
* (experimental) The unique identifier of the data source.
*
* Example:
*
*
* 'JHUEVXUZMU'
*
*/
@Override
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public @org.jetbrains.annotations.NotNull java.lang.String getDataSourceId() {
return software.amazon.jsii.Kernel.get(this, "dataSourceId", software.amazon.jsii.NativeType.forClass(java.lang.String.class));
}
/**
* (experimental) The name of the data source.
*/
@Override
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public @org.jetbrains.annotations.NotNull java.lang.String getDataSourceName() {
return software.amazon.jsii.Kernel.get(this, "dataSourceName", software.amazon.jsii.NativeType.forClass(java.lang.String.class));
}
/**
* (experimental) The type of data source.
*/
@Override
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public @org.jetbrains.annotations.NotNull io.github.cdklabs.generative_ai_cdk_constructs.bedrock.DataSourceType getDataSourceType() {
return software.amazon.jsii.Kernel.get(this, "dataSourceType", software.amazon.jsii.NativeType.forClass(io.github.cdklabs.generative_ai_cdk_constructs.bedrock.DataSourceType.class));
}
/**
* (experimental) The knowledge base associated with the data source.
*/
@Override
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public @org.jetbrains.annotations.NotNull io.github.cdklabs.generative_ai_cdk_constructs.bedrock.IKnowledgeBase getKnowledgeBase() {
return software.amazon.jsii.Kernel.get(this, "knowledgeBase", software.amazon.jsii.NativeType.forClass(io.github.cdklabs.generative_ai_cdk_constructs.bedrock.IKnowledgeBase.class));
}
/**
* (experimental) The max rate at which pages are crawled.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public @org.jetbrains.annotations.NotNull java.util.List getSiteUrls() {
return java.util.Collections.unmodifiableList(software.amazon.jsii.Kernel.get(this, "siteUrls", software.amazon.jsii.NativeType.listOf(software.amazon.jsii.NativeType.forClass(java.lang.String.class))));
}
/**
* (experimental) The KMS key to use to encrypt the data source.
*/
@Override
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public @org.jetbrains.annotations.Nullable software.amazon.awscdk.services.kms.IKey getKmsKey() {
return software.amazon.jsii.Kernel.get(this, "kmsKey", software.amazon.jsii.NativeType.forClass(software.amazon.awscdk.services.kms.IKey.class));
}
/**
* (experimental) A fluent builder for {@link io.github.cdklabs.generative_ai_cdk_constructs.bedrock.WebCrawlerDataSource}.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public static final class Builder implements software.amazon.jsii.Builder {
/**
* @return a new instance of {@link Builder}.
* @param scope This parameter is required.
* @param id This parameter is required.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public static Builder create(final software.constructs.Construct scope, final java.lang.String id) {
return new Builder(scope, id);
}
private final software.constructs.Construct scope;
private final java.lang.String id;
private final io.github.cdklabs.generative_ai_cdk_constructs.bedrock.WebCrawlerDataSourceProps.Builder props;
private Builder(final software.constructs.Construct scope, final java.lang.String id) {
this.scope = scope;
this.id = id;
this.props = new io.github.cdklabs.generative_ai_cdk_constructs.bedrock.WebCrawlerDataSourceProps.Builder();
}
/**
* (experimental) The chunking stategy to use for splitting your documents or content.
*
* The chunks are then converted to embeddings and written to the vector
* index allowing for similarity search and retrieval of the content.
*
* Default: ChunkingStrategy.DEFAULT
*
* @return {@code this}
* @param chunkingStrategy The chunking stategy to use for splitting your documents or content. This parameter is required.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public Builder chunkingStrategy(final io.github.cdklabs.generative_ai_cdk_constructs.bedrock.ChunkingStrategy chunkingStrategy) {
this.props.chunkingStrategy(chunkingStrategy);
return this;
}
/**
* (experimental) The custom transformation strategy to use.
*
* Default: - No custom transformation is used.
*
* @return {@code this}
* @param customTransformation The custom transformation strategy to use. This parameter is required.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public Builder customTransformation(final io.github.cdklabs.generative_ai_cdk_constructs.bedrock.CustomTransformation customTransformation) {
this.props.customTransformation(customTransformation);
return this;
}
/**
* (experimental) The data deletion policy to apply to the data source.
*
* Default: - Sets the data deletion policy to the default of the data source type.
*
* @return {@code this}
* @param dataDeletionPolicy The data deletion policy to apply to the data source. This parameter is required.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public Builder dataDeletionPolicy(final io.github.cdklabs.generative_ai_cdk_constructs.bedrock.DataDeletionPolicy dataDeletionPolicy) {
this.props.dataDeletionPolicy(dataDeletionPolicy);
return this;
}
/**
* (experimental) The name of the data source.
*
* Default: - A new name will be generated.
*
* @return {@code this}
* @param dataSourceName The name of the data source. This parameter is required.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public Builder dataSourceName(final java.lang.String dataSourceName) {
this.props.dataSourceName(dataSourceName);
return this;
}
/**
* (experimental) A description of the data source.
*
* Default: - No description is provided.
*
* @return {@code this}
* @param description A description of the data source. This parameter is required.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public Builder description(final java.lang.String description) {
this.props.description(description);
return this;
}
/**
* (experimental) The KMS key to use to encrypt the data source.
*
* Default: - Service owned and managed key.
*
* @return {@code this}
* @param kmsKey The KMS key to use to encrypt the data source. This parameter is required.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public Builder kmsKey(final software.amazon.awscdk.services.kms.IKey kmsKey) {
this.props.kmsKey(kmsKey);
return this;
}
/**
* (experimental) The parsing strategy to use.
*
* Default: - No Parsing Stategy is used.
*
* @return {@code this}
* @param parsingStrategy The parsing strategy to use. This parameter is required.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public Builder parsingStrategy(final io.github.cdklabs.generative_ai_cdk_constructs.bedrock.ParsingStategy parsingStrategy) {
this.props.parsingStrategy(parsingStrategy);
return this;
}
/**
* (experimental) The source urls in the format https://www.sitename.com
. Maximum of 100 URLs.
*
* @return {@code this}
* @param sourceUrls The source urls in the format https://www.sitename.com
. Maximum of 100 URLs. This parameter is required.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public Builder sourceUrls(final java.util.List sourceUrls) {
this.props.sourceUrls(sourceUrls);
return this;
}
/**
* (experimental) The max rate at which pages are crawled, up to 300 per minute per host.
*
* Higher values will decrease sync time but increase the load on the host.
*
* Default: 300
*
* @return {@code this}
* @param crawlingRate The max rate at which pages are crawled, up to 300 per minute per host. This parameter is required.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public Builder crawlingRate(final java.lang.Number crawlingRate) {
this.props.crawlingRate(crawlingRate);
return this;
}
/**
* (experimental) The scope of the crawling.
*
* Default: - CrawlingScope.DEFAULT
*
* @return {@code this}
* @param crawlingScope The scope of the crawling. This parameter is required.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public Builder crawlingScope(final io.github.cdklabs.generative_ai_cdk_constructs.bedrock.CrawlingScope crawlingScope) {
this.props.crawlingScope(crawlingScope);
return this;
}
/**
* (experimental) The filters (regular expression patterns) for the crawling.
*
* If there's a conflict, the exclude pattern takes precedence.
*
* Default: None
*
* @return {@code this}
* @param filters The filters (regular expression patterns) for the crawling. This parameter is required.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public Builder filters(final io.github.cdklabs.generative_ai_cdk_constructs.bedrock.CrawlingFilters filters) {
this.props.filters(filters);
return this;
}
/**
* (experimental) The knowledge base to associate with the data source.
*
* @return {@code this}
* @param knowledgeBase The knowledge base to associate with the data source. This parameter is required.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
public Builder knowledgeBase(final io.github.cdklabs.generative_ai_cdk_constructs.bedrock.IKnowledgeBase knowledgeBase) {
this.props.knowledgeBase(knowledgeBase);
return this;
}
/**
* @return a newly built instance of {@link io.github.cdklabs.generative_ai_cdk_constructs.bedrock.WebCrawlerDataSource}.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Experimental)
@Override
public io.github.cdklabs.generative_ai_cdk_constructs.bedrock.WebCrawlerDataSource build() {
return new io.github.cdklabs.generative_ai_cdk_constructs.bedrock.WebCrawlerDataSource(
this.scope,
this.id,
this.props.build()
);
}
}
}