com.amazonaws.services.kendra.model.SeedUrlConfiguration Maven / Gradle / Ivy
Show all versions of aws-java-sdk-kendra Show documentation
/*
* Copyright 2019-2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with
* the License. A copy of the License is located at
*
* http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
* and limitations under the License.
*/
package com.amazonaws.services.kendra.model;
import java.io.Serializable;
import javax.annotation.Generated;
import com.amazonaws.protocol.StructuredPojo;
import com.amazonaws.protocol.ProtocolMarshaller;
/**
*
* Provides the configuration information for the seed or starting point URLs to crawl.
*
*
* When selecting websites to index, you must adhere to the Amazon Acceptable
* Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web Crawler to index your
* own web pages, or web pages that you have authorization to index.
*
*
* @see AWS API
* Documentation
*/
@Generated("com.amazonaws:aws-java-sdk-code-generator")
public class SeedUrlConfiguration implements Serializable, Cloneable, StructuredPojo {
/**
*
* The list of seed or starting point URLs of the websites you want to crawl.
*
*
* The list can include a maximum of 100 seed URLs.
*
*/
private java.util.List seedUrls;
/**
*
* You can choose one of the following modes:
*
*
* -
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is "abc.example.com", then
* only URLs with host name "abc.example.com" are crawled.
*
*
* -
*
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
*
*
* -
*
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages link
* to.
*
*
*
*
* The default mode is set to HOST_ONLY
.
*
*/
private String webCrawlerMode;
/**
*
* The list of seed or starting point URLs of the websites you want to crawl.
*
*
* The list can include a maximum of 100 seed URLs.
*
*
* @return The list of seed or starting point URLs of the websites you want to crawl.
*
* The list can include a maximum of 100 seed URLs.
*/
public java.util.List getSeedUrls() {
return seedUrls;
}
/**
*
* The list of seed or starting point URLs of the websites you want to crawl.
*
*
* The list can include a maximum of 100 seed URLs.
*
*
* @param seedUrls
* The list of seed or starting point URLs of the websites you want to crawl.
*
* The list can include a maximum of 100 seed URLs.
*/
public void setSeedUrls(java.util.Collection seedUrls) {
if (seedUrls == null) {
this.seedUrls = null;
return;
}
this.seedUrls = new java.util.ArrayList(seedUrls);
}
/**
*
* The list of seed or starting point URLs of the websites you want to crawl.
*
*
* The list can include a maximum of 100 seed URLs.
*
*
* NOTE: This method appends the values to the existing list (if any). Use
* {@link #setSeedUrls(java.util.Collection)} or {@link #withSeedUrls(java.util.Collection)} if you want to override
* the existing values.
*
*
* @param seedUrls
* The list of seed or starting point URLs of the websites you want to crawl.
*
* The list can include a maximum of 100 seed URLs.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public SeedUrlConfiguration withSeedUrls(String... seedUrls) {
if (this.seedUrls == null) {
setSeedUrls(new java.util.ArrayList(seedUrls.length));
}
for (String ele : seedUrls) {
this.seedUrls.add(ele);
}
return this;
}
/**
*
* The list of seed or starting point URLs of the websites you want to crawl.
*
*
* The list can include a maximum of 100 seed URLs.
*
*
* @param seedUrls
* The list of seed or starting point URLs of the websites you want to crawl.
*
* The list can include a maximum of 100 seed URLs.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public SeedUrlConfiguration withSeedUrls(java.util.Collection seedUrls) {
setSeedUrls(seedUrls);
return this;
}
/**
*
* You can choose one of the following modes:
*
*
* -
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is "abc.example.com", then
* only URLs with host name "abc.example.com" are crawled.
*
*
* -
*
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
*
*
* -
*
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages link
* to.
*
*
*
*
* The default mode is set to HOST_ONLY
.
*
*
* @param webCrawlerMode
* You can choose one of the following modes:
*
* -
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is
* "abc.example.com", then only URLs with host name "abc.example.com" are crawled.
*
*
* -
*
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
*
*
* -
*
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages
* link to.
*
*
*
*
* The default mode is set to HOST_ONLY
.
* @see WebCrawlerMode
*/
public void setWebCrawlerMode(String webCrawlerMode) {
this.webCrawlerMode = webCrawlerMode;
}
/**
*
* You can choose one of the following modes:
*
*
* -
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is "abc.example.com", then
* only URLs with host name "abc.example.com" are crawled.
*
*
* -
*
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
*
*
* -
*
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages link
* to.
*
*
*
*
* The default mode is set to HOST_ONLY
.
*
*
* @return You can choose one of the following modes:
*
* -
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is
* "abc.example.com", then only URLs with host name "abc.example.com" are crawled.
*
*
* -
*
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
*
*
* -
*
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages
* link to.
*
*
*
*
* The default mode is set to HOST_ONLY
.
* @see WebCrawlerMode
*/
public String getWebCrawlerMode() {
return this.webCrawlerMode;
}
/**
*
* You can choose one of the following modes:
*
*
* -
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is "abc.example.com", then
* only URLs with host name "abc.example.com" are crawled.
*
*
* -
*
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
*
*
* -
*
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages link
* to.
*
*
*
*
* The default mode is set to HOST_ONLY
.
*
*
* @param webCrawlerMode
* You can choose one of the following modes:
*
* -
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is
* "abc.example.com", then only URLs with host name "abc.example.com" are crawled.
*
*
* -
*
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
*
*
* -
*
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages
* link to.
*
*
*
*
* The default mode is set to HOST_ONLY
.
* @return Returns a reference to this object so that method calls can be chained together.
* @see WebCrawlerMode
*/
public SeedUrlConfiguration withWebCrawlerMode(String webCrawlerMode) {
setWebCrawlerMode(webCrawlerMode);
return this;
}
/**
*
* You can choose one of the following modes:
*
*
* -
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is "abc.example.com", then
* only URLs with host name "abc.example.com" are crawled.
*
*
* -
*
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
*
*
* -
*
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages link
* to.
*
*
*
*
* The default mode is set to HOST_ONLY
.
*
*
* @param webCrawlerMode
* You can choose one of the following modes:
*
* -
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is
* "abc.example.com", then only URLs with host name "abc.example.com" are crawled.
*
*
* -
*
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
*
*
* -
*
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages
* link to.
*
*
*
*
* The default mode is set to HOST_ONLY
.
* @return Returns a reference to this object so that method calls can be chained together.
* @see WebCrawlerMode
*/
public SeedUrlConfiguration withWebCrawlerMode(WebCrawlerMode webCrawlerMode) {
this.webCrawlerMode = webCrawlerMode.toString();
return this;
}
/**
* Returns a string representation of this object. This is useful for testing and debugging. Sensitive data will be
* redacted from this string using a placeholder value.
*
* @return A string representation of this object.
*
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("{");
if (getSeedUrls() != null)
sb.append("SeedUrls: ").append(getSeedUrls()).append(",");
if (getWebCrawlerMode() != null)
sb.append("WebCrawlerMode: ").append(getWebCrawlerMode());
sb.append("}");
return sb.toString();
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (obj instanceof SeedUrlConfiguration == false)
return false;
SeedUrlConfiguration other = (SeedUrlConfiguration) obj;
if (other.getSeedUrls() == null ^ this.getSeedUrls() == null)
return false;
if (other.getSeedUrls() != null && other.getSeedUrls().equals(this.getSeedUrls()) == false)
return false;
if (other.getWebCrawlerMode() == null ^ this.getWebCrawlerMode() == null)
return false;
if (other.getWebCrawlerMode() != null && other.getWebCrawlerMode().equals(this.getWebCrawlerMode()) == false)
return false;
return true;
}
@Override
public int hashCode() {
final int prime = 31;
int hashCode = 1;
hashCode = prime * hashCode + ((getSeedUrls() == null) ? 0 : getSeedUrls().hashCode());
hashCode = prime * hashCode + ((getWebCrawlerMode() == null) ? 0 : getWebCrawlerMode().hashCode());
return hashCode;
}
@Override
public SeedUrlConfiguration clone() {
try {
return (SeedUrlConfiguration) super.clone();
} catch (CloneNotSupportedException e) {
throw new IllegalStateException("Got a CloneNotSupportedException from Object.clone() " + "even though we're Cloneable!", e);
}
}
@com.amazonaws.annotation.SdkInternalApi
@Override
public void marshall(ProtocolMarshaller protocolMarshaller) {
com.amazonaws.services.kendra.model.transform.SeedUrlConfigurationMarshaller.getInstance().marshall(this, protocolMarshaller);
}
}