com.amazonaws.services.kendra.model.WebCrawlerConfiguration Maven / Gradle / Ivy
Show all versions of aws-java-sdk-kendra Show documentation
/*
* Copyright 2019-2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with
* the License. A copy of the License is located at
*
* http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
* and limitations under the License.
*/
package com.amazonaws.services.kendra.model;
import java.io.Serializable;
import javax.annotation.Generated;
import com.amazonaws.protocol.StructuredPojo;
import com.amazonaws.protocol.ProtocolMarshaller;
/**
*
* Provides the configuration information required for Amazon Kendra Web Crawler.
*
*
* @see AWS API
* Documentation
*/
@Generated("com.amazonaws:aws-java-sdk-code-generator")
public class WebCrawlerConfiguration implements Serializable, Cloneable, StructuredPojo {
/**
*
* Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to crawl.
*
*
* You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs.
*
*
* You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure
* (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from crawling.
*
*
* When selecting websites to index, you must adhere to the Amazon
* Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web Crawler
* to index your own web pages, or web pages that you have authorization to index.
*
*/
private Urls urls;
/**
*
* The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1 and any
* hyperlinks on this page that are also crawled are depth 2.
*
*/
private Integer crawlDepth;
/**
*
* The maximum number of URLs on a web page to include when crawling a website. This number is per web page.
*
*
* As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page are
* crawled in order of appearance.
*
*
* The default maximum links per page is 100.
*
*/
private Integer maxLinksPerPage;
/**
*
* The maximum size (in MB) of a web page or attachment to crawl.
*
*
* Files larger than this size (in MB) are skipped/not crawled.
*
*
* The default maximum size of a web page or attachment is set to 50 MB.
*
*/
private Float maxContentSizePerPageInMegaBytes;
/**
*
* The maximum number of URLs crawled per website host per minute.
*
*
* A minimum of one URL is required.
*
*
* The default maximum number of URLs crawled per website host per minute is 300.
*
*/
private Integer maxUrlsPerMinuteCrawlRate;
/**
*
* A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are included
* in the index. URLs that don't match the patterns are excluded from the index. If a URL matches both an inclusion
* and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
*
*/
private java.util.List urlInclusionPatterns;
/**
*
* A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are excluded
* from the index. URLs that don't match the patterns are included in the index. If a URL matches both an inclusion
* and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
*
*/
private java.util.List urlExclusionPatterns;
/**
*
* Configuration information required to connect to your internal websites via a web proxy.
*
*
* You must provide the website host name and port number. For example, the host name of
* https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
*
*
* Web proxy credentials are optional and you can use them to connect to a web proxy server that requires basic
* authentication. To store web proxy credentials, you use a secret in Secrets Manager.
*
*/
private ProxyConfiguration proxyConfiguration;
/**
*
* Configuration information required to connect to websites using authentication.
*
*
* You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store your
* authentication credentials.
*
*
* You must provide the website host name and port number. For example, the host name of
* https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
*
*/
private AuthenticationConfiguration authenticationConfiguration;
/**
*
* Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to crawl.
*
*
* You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs.
*
*
* You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure
* (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from crawling.
*
*
* When selecting websites to index, you must adhere to the Amazon
* Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web Crawler
* to index your own web pages, or web pages that you have authorization to index.
*
*
* @param urls
* Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to
* crawl.
*
* You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs.
*
*
* You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure
* (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from
* crawling.
*
*
* When selecting websites to index, you must adhere to the Amazon
* Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web
* Crawler to index your own web pages, or web pages that you have authorization to index.
*/
public void setUrls(Urls urls) {
this.urls = urls;
}
/**
*
* Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to crawl.
*
*
* You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs.
*
*
* You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure
* (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from crawling.
*
*
* When selecting websites to index, you must adhere to the Amazon
* Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web Crawler
* to index your own web pages, or web pages that you have authorization to index.
*
*
* @return Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to
* crawl.
*
* You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs.
*
*
* You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol
* Secure (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked
* from crawling.
*
*
* When selecting websites to index, you must adhere to the Amazon
* Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web
* Crawler to index your own web pages, or web pages that you have authorization to index.
*/
public Urls getUrls() {
return this.urls;
}
/**
*
* Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to crawl.
*
*
* You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs.
*
*
* You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure
* (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from crawling.
*
*
* When selecting websites to index, you must adhere to the Amazon
* Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web Crawler
* to index your own web pages, or web pages that you have authorization to index.
*
*
* @param urls
* Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to
* crawl.
*
* You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs.
*
*
* You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure
* (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from
* crawling.
*
*
* When selecting websites to index, you must adhere to the Amazon
* Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web
* Crawler to index your own web pages, or web pages that you have authorization to index.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public WebCrawlerConfiguration withUrls(Urls urls) {
setUrls(urls);
return this;
}
/**
*
* The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1 and any
* hyperlinks on this page that are also crawled are depth 2.
*
*
* @param crawlDepth
* The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1
* and any hyperlinks on this page that are also crawled are depth 2.
*/
public void setCrawlDepth(Integer crawlDepth) {
this.crawlDepth = crawlDepth;
}
/**
*
* The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1 and any
* hyperlinks on this page that are also crawled are depth 2.
*
*
* @return The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1
* and any hyperlinks on this page that are also crawled are depth 2.
*/
public Integer getCrawlDepth() {
return this.crawlDepth;
}
/**
*
* The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1 and any
* hyperlinks on this page that are also crawled are depth 2.
*
*
* @param crawlDepth
* The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1
* and any hyperlinks on this page that are also crawled are depth 2.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public WebCrawlerConfiguration withCrawlDepth(Integer crawlDepth) {
setCrawlDepth(crawlDepth);
return this;
}
/**
*
* The maximum number of URLs on a web page to include when crawling a website. This number is per web page.
*
*
* As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page are
* crawled in order of appearance.
*
*
* The default maximum links per page is 100.
*
*
* @param maxLinksPerPage
* The maximum number of URLs on a web page to include when crawling a website. This number is per web
* page.
*
* As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page
* are crawled in order of appearance.
*
*
* The default maximum links per page is 100.
*/
public void setMaxLinksPerPage(Integer maxLinksPerPage) {
this.maxLinksPerPage = maxLinksPerPage;
}
/**
*
* The maximum number of URLs on a web page to include when crawling a website. This number is per web page.
*
*
* As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page are
* crawled in order of appearance.
*
*
* The default maximum links per page is 100.
*
*
* @return The maximum number of URLs on a web page to include when crawling a website. This number is per web
* page.
*
* As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page
* are crawled in order of appearance.
*
*
* The default maximum links per page is 100.
*/
public Integer getMaxLinksPerPage() {
return this.maxLinksPerPage;
}
/**
*
* The maximum number of URLs on a web page to include when crawling a website. This number is per web page.
*
*
* As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page are
* crawled in order of appearance.
*
*
* The default maximum links per page is 100.
*
*
* @param maxLinksPerPage
* The maximum number of URLs on a web page to include when crawling a website. This number is per web
* page.
*
* As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page
* are crawled in order of appearance.
*
*
* The default maximum links per page is 100.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public WebCrawlerConfiguration withMaxLinksPerPage(Integer maxLinksPerPage) {
setMaxLinksPerPage(maxLinksPerPage);
return this;
}
/**
*
* The maximum size (in MB) of a web page or attachment to crawl.
*
*
* Files larger than this size (in MB) are skipped/not crawled.
*
*
* The default maximum size of a web page or attachment is set to 50 MB.
*
*
* @param maxContentSizePerPageInMegaBytes
* The maximum size (in MB) of a web page or attachment to crawl.
*
* Files larger than this size (in MB) are skipped/not crawled.
*
*
* The default maximum size of a web page or attachment is set to 50 MB.
*/
public void setMaxContentSizePerPageInMegaBytes(Float maxContentSizePerPageInMegaBytes) {
this.maxContentSizePerPageInMegaBytes = maxContentSizePerPageInMegaBytes;
}
/**
*
* The maximum size (in MB) of a web page or attachment to crawl.
*
*
* Files larger than this size (in MB) are skipped/not crawled.
*
*
* The default maximum size of a web page or attachment is set to 50 MB.
*
*
* @return The maximum size (in MB) of a web page or attachment to crawl.
*
* Files larger than this size (in MB) are skipped/not crawled.
*
*
* The default maximum size of a web page or attachment is set to 50 MB.
*/
public Float getMaxContentSizePerPageInMegaBytes() {
return this.maxContentSizePerPageInMegaBytes;
}
/**
*
* The maximum size (in MB) of a web page or attachment to crawl.
*
*
* Files larger than this size (in MB) are skipped/not crawled.
*
*
* The default maximum size of a web page or attachment is set to 50 MB.
*
*
* @param maxContentSizePerPageInMegaBytes
* The maximum size (in MB) of a web page or attachment to crawl.
*
* Files larger than this size (in MB) are skipped/not crawled.
*
*
* The default maximum size of a web page or attachment is set to 50 MB.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public WebCrawlerConfiguration withMaxContentSizePerPageInMegaBytes(Float maxContentSizePerPageInMegaBytes) {
setMaxContentSizePerPageInMegaBytes(maxContentSizePerPageInMegaBytes);
return this;
}
/**
*
* The maximum number of URLs crawled per website host per minute.
*
*
* A minimum of one URL is required.
*
*
* The default maximum number of URLs crawled per website host per minute is 300.
*
*
* @param maxUrlsPerMinuteCrawlRate
* The maximum number of URLs crawled per website host per minute.
*
* A minimum of one URL is required.
*
*
* The default maximum number of URLs crawled per website host per minute is 300.
*/
public void setMaxUrlsPerMinuteCrawlRate(Integer maxUrlsPerMinuteCrawlRate) {
this.maxUrlsPerMinuteCrawlRate = maxUrlsPerMinuteCrawlRate;
}
/**
*
* The maximum number of URLs crawled per website host per minute.
*
*
* A minimum of one URL is required.
*
*
* The default maximum number of URLs crawled per website host per minute is 300.
*
*
* @return The maximum number of URLs crawled per website host per minute.
*
* A minimum of one URL is required.
*
*
* The default maximum number of URLs crawled per website host per minute is 300.
*/
public Integer getMaxUrlsPerMinuteCrawlRate() {
return this.maxUrlsPerMinuteCrawlRate;
}
/**
*
* The maximum number of URLs crawled per website host per minute.
*
*
* A minimum of one URL is required.
*
*
* The default maximum number of URLs crawled per website host per minute is 300.
*
*
* @param maxUrlsPerMinuteCrawlRate
* The maximum number of URLs crawled per website host per minute.
*
* A minimum of one URL is required.
*
*
* The default maximum number of URLs crawled per website host per minute is 300.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public WebCrawlerConfiguration withMaxUrlsPerMinuteCrawlRate(Integer maxUrlsPerMinuteCrawlRate) {
setMaxUrlsPerMinuteCrawlRate(maxUrlsPerMinuteCrawlRate);
return this;
}
/**
*
* A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are included
* in the index. URLs that don't match the patterns are excluded from the index. If a URL matches both an inclusion
* and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
*
*
* @return A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are
* included in the index. URLs that don't match the patterns are excluded from the index. If a URL matches
* both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't
* included in the index.
*/
public java.util.List getUrlInclusionPatterns() {
return urlInclusionPatterns;
}
/**
*
* A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are included
* in the index. URLs that don't match the patterns are excluded from the index. If a URL matches both an inclusion
* and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
*
*
* @param urlInclusionPatterns
* A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are
* included in the index. URLs that don't match the patterns are excluded from the index. If a URL matches
* both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't
* included in the index.
*/
public void setUrlInclusionPatterns(java.util.Collection urlInclusionPatterns) {
if (urlInclusionPatterns == null) {
this.urlInclusionPatterns = null;
return;
}
this.urlInclusionPatterns = new java.util.ArrayList(urlInclusionPatterns);
}
/**
*
* A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are included
* in the index. URLs that don't match the patterns are excluded from the index. If a URL matches both an inclusion
* and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
*
*
* NOTE: This method appends the values to the existing list (if any). Use
* {@link #setUrlInclusionPatterns(java.util.Collection)} or {@link #withUrlInclusionPatterns(java.util.Collection)}
* if you want to override the existing values.
*
*
* @param urlInclusionPatterns
* A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are
* included in the index. URLs that don't match the patterns are excluded from the index. If a URL matches
* both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't
* included in the index.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public WebCrawlerConfiguration withUrlInclusionPatterns(String... urlInclusionPatterns) {
if (this.urlInclusionPatterns == null) {
setUrlInclusionPatterns(new java.util.ArrayList(urlInclusionPatterns.length));
}
for (String ele : urlInclusionPatterns) {
this.urlInclusionPatterns.add(ele);
}
return this;
}
/**
*
* A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are included
* in the index. URLs that don't match the patterns are excluded from the index. If a URL matches both an inclusion
* and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
*
*
* @param urlInclusionPatterns
* A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are
* included in the index. URLs that don't match the patterns are excluded from the index. If a URL matches
* both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't
* included in the index.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public WebCrawlerConfiguration withUrlInclusionPatterns(java.util.Collection urlInclusionPatterns) {
setUrlInclusionPatterns(urlInclusionPatterns);
return this;
}
/**
*
* A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are excluded
* from the index. URLs that don't match the patterns are included in the index. If a URL matches both an inclusion
* and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
*
*
* @return A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are
* excluded from the index. URLs that don't match the patterns are included in the index. If a URL matches
* both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't
* included in the index.
*/
public java.util.List getUrlExclusionPatterns() {
return urlExclusionPatterns;
}
/**
*
* A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are excluded
* from the index. URLs that don't match the patterns are included in the index. If a URL matches both an inclusion
* and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
*
*
* @param urlExclusionPatterns
* A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are
* excluded from the index. URLs that don't match the patterns are included in the index. If a URL matches
* both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't
* included in the index.
*/
public void setUrlExclusionPatterns(java.util.Collection urlExclusionPatterns) {
if (urlExclusionPatterns == null) {
this.urlExclusionPatterns = null;
return;
}
this.urlExclusionPatterns = new java.util.ArrayList(urlExclusionPatterns);
}
/**
*
* A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are excluded
* from the index. URLs that don't match the patterns are included in the index. If a URL matches both an inclusion
* and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
*
*
* NOTE: This method appends the values to the existing list (if any). Use
* {@link #setUrlExclusionPatterns(java.util.Collection)} or {@link #withUrlExclusionPatterns(java.util.Collection)}
* if you want to override the existing values.
*
*
* @param urlExclusionPatterns
* A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are
* excluded from the index. URLs that don't match the patterns are included in the index. If a URL matches
* both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't
* included in the index.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public WebCrawlerConfiguration withUrlExclusionPatterns(String... urlExclusionPatterns) {
if (this.urlExclusionPatterns == null) {
setUrlExclusionPatterns(new java.util.ArrayList(urlExclusionPatterns.length));
}
for (String ele : urlExclusionPatterns) {
this.urlExclusionPatterns.add(ele);
}
return this;
}
/**
*
* A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are excluded
* from the index. URLs that don't match the patterns are included in the index. If a URL matches both an inclusion
* and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
*
*
* @param urlExclusionPatterns
* A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are
* excluded from the index. URLs that don't match the patterns are included in the index. If a URL matches
* both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't
* included in the index.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public WebCrawlerConfiguration withUrlExclusionPatterns(java.util.Collection urlExclusionPatterns) {
setUrlExclusionPatterns(urlExclusionPatterns);
return this;
}
/**
*
* Configuration information required to connect to your internal websites via a web proxy.
*
*
* You must provide the website host name and port number. For example, the host name of
* https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
*
*
* Web proxy credentials are optional and you can use them to connect to a web proxy server that requires basic
* authentication. To store web proxy credentials, you use a secret in Secrets Manager.
*
*
* @param proxyConfiguration
* Configuration information required to connect to your internal websites via a web proxy.
*
* You must provide the website host name and port number. For example, the host name of
* https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
*
*
* Web proxy credentials are optional and you can use them to connect to a web proxy server that requires
* basic authentication. To store web proxy credentials, you use a secret in Secrets Manager.
*/
public void setProxyConfiguration(ProxyConfiguration proxyConfiguration) {
this.proxyConfiguration = proxyConfiguration;
}
/**
*
* Configuration information required to connect to your internal websites via a web proxy.
*
*
* You must provide the website host name and port number. For example, the host name of
* https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
*
*
* Web proxy credentials are optional and you can use them to connect to a web proxy server that requires basic
* authentication. To store web proxy credentials, you use a secret in Secrets Manager.
*
*
* @return Configuration information required to connect to your internal websites via a web proxy.
*
* You must provide the website host name and port number. For example, the host name of
* https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
*
*
* Web proxy credentials are optional and you can use them to connect to a web proxy server that requires
* basic authentication. To store web proxy credentials, you use a secret in Secrets Manager.
*/
public ProxyConfiguration getProxyConfiguration() {
return this.proxyConfiguration;
}
/**
*
* Configuration information required to connect to your internal websites via a web proxy.
*
*
* You must provide the website host name and port number. For example, the host name of
* https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
*
*
* Web proxy credentials are optional and you can use them to connect to a web proxy server that requires basic
* authentication. To store web proxy credentials, you use a secret in Secrets Manager.
*
*
* @param proxyConfiguration
* Configuration information required to connect to your internal websites via a web proxy.
*
* You must provide the website host name and port number. For example, the host name of
* https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
*
*
* Web proxy credentials are optional and you can use them to connect to a web proxy server that requires
* basic authentication. To store web proxy credentials, you use a secret in Secrets Manager.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public WebCrawlerConfiguration withProxyConfiguration(ProxyConfiguration proxyConfiguration) {
setProxyConfiguration(proxyConfiguration);
return this;
}
/**
*
* Configuration information required to connect to websites using authentication.
*
*
* You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store your
* authentication credentials.
*
*
* You must provide the website host name and port number. For example, the host name of
* https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
*
*
* @param authenticationConfiguration
* Configuration information required to connect to websites using authentication.
*
* You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store
* your authentication credentials.
*
*
* You must provide the website host name and port number. For example, the host name of
* https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
*/
public void setAuthenticationConfiguration(AuthenticationConfiguration authenticationConfiguration) {
this.authenticationConfiguration = authenticationConfiguration;
}
/**
*
* Configuration information required to connect to websites using authentication.
*
*
* You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store your
* authentication credentials.
*
*
* You must provide the website host name and port number. For example, the host name of
* https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
*
*
* @return Configuration information required to connect to websites using authentication.
*
* You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to
* store your authentication credentials.
*
*
* You must provide the website host name and port number. For example, the host name of
* https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
*/
public AuthenticationConfiguration getAuthenticationConfiguration() {
return this.authenticationConfiguration;
}
/**
*
* Configuration information required to connect to websites using authentication.
*
*
* You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store your
* authentication credentials.
*
*
* You must provide the website host name and port number. For example, the host name of
* https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
*
*
* @param authenticationConfiguration
* Configuration information required to connect to websites using authentication.
*
* You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store
* your authentication credentials.
*
*
* You must provide the website host name and port number. For example, the host name of
* https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public WebCrawlerConfiguration withAuthenticationConfiguration(AuthenticationConfiguration authenticationConfiguration) {
setAuthenticationConfiguration(authenticationConfiguration);
return this;
}
/**
* Returns a string representation of this object. This is useful for testing and debugging. Sensitive data will be
* redacted from this string using a placeholder value.
*
* @return A string representation of this object.
*
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("{");
if (getUrls() != null)
sb.append("Urls: ").append(getUrls()).append(",");
if (getCrawlDepth() != null)
sb.append("CrawlDepth: ").append(getCrawlDepth()).append(",");
if (getMaxLinksPerPage() != null)
sb.append("MaxLinksPerPage: ").append(getMaxLinksPerPage()).append(",");
if (getMaxContentSizePerPageInMegaBytes() != null)
sb.append("MaxContentSizePerPageInMegaBytes: ").append(getMaxContentSizePerPageInMegaBytes()).append(",");
if (getMaxUrlsPerMinuteCrawlRate() != null)
sb.append("MaxUrlsPerMinuteCrawlRate: ").append(getMaxUrlsPerMinuteCrawlRate()).append(",");
if (getUrlInclusionPatterns() != null)
sb.append("UrlInclusionPatterns: ").append(getUrlInclusionPatterns()).append(",");
if (getUrlExclusionPatterns() != null)
sb.append("UrlExclusionPatterns: ").append(getUrlExclusionPatterns()).append(",");
if (getProxyConfiguration() != null)
sb.append("ProxyConfiguration: ").append(getProxyConfiguration()).append(",");
if (getAuthenticationConfiguration() != null)
sb.append("AuthenticationConfiguration: ").append(getAuthenticationConfiguration());
sb.append("}");
return sb.toString();
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (obj instanceof WebCrawlerConfiguration == false)
return false;
WebCrawlerConfiguration other = (WebCrawlerConfiguration) obj;
if (other.getUrls() == null ^ this.getUrls() == null)
return false;
if (other.getUrls() != null && other.getUrls().equals(this.getUrls()) == false)
return false;
if (other.getCrawlDepth() == null ^ this.getCrawlDepth() == null)
return false;
if (other.getCrawlDepth() != null && other.getCrawlDepth().equals(this.getCrawlDepth()) == false)
return false;
if (other.getMaxLinksPerPage() == null ^ this.getMaxLinksPerPage() == null)
return false;
if (other.getMaxLinksPerPage() != null && other.getMaxLinksPerPage().equals(this.getMaxLinksPerPage()) == false)
return false;
if (other.getMaxContentSizePerPageInMegaBytes() == null ^ this.getMaxContentSizePerPageInMegaBytes() == null)
return false;
if (other.getMaxContentSizePerPageInMegaBytes() != null
&& other.getMaxContentSizePerPageInMegaBytes().equals(this.getMaxContentSizePerPageInMegaBytes()) == false)
return false;
if (other.getMaxUrlsPerMinuteCrawlRate() == null ^ this.getMaxUrlsPerMinuteCrawlRate() == null)
return false;
if (other.getMaxUrlsPerMinuteCrawlRate() != null && other.getMaxUrlsPerMinuteCrawlRate().equals(this.getMaxUrlsPerMinuteCrawlRate()) == false)
return false;
if (other.getUrlInclusionPatterns() == null ^ this.getUrlInclusionPatterns() == null)
return false;
if (other.getUrlInclusionPatterns() != null && other.getUrlInclusionPatterns().equals(this.getUrlInclusionPatterns()) == false)
return false;
if (other.getUrlExclusionPatterns() == null ^ this.getUrlExclusionPatterns() == null)
return false;
if (other.getUrlExclusionPatterns() != null && other.getUrlExclusionPatterns().equals(this.getUrlExclusionPatterns()) == false)
return false;
if (other.getProxyConfiguration() == null ^ this.getProxyConfiguration() == null)
return false;
if (other.getProxyConfiguration() != null && other.getProxyConfiguration().equals(this.getProxyConfiguration()) == false)
return false;
if (other.getAuthenticationConfiguration() == null ^ this.getAuthenticationConfiguration() == null)
return false;
if (other.getAuthenticationConfiguration() != null && other.getAuthenticationConfiguration().equals(this.getAuthenticationConfiguration()) == false)
return false;
return true;
}
@Override
public int hashCode() {
final int prime = 31;
int hashCode = 1;
hashCode = prime * hashCode + ((getUrls() == null) ? 0 : getUrls().hashCode());
hashCode = prime * hashCode + ((getCrawlDepth() == null) ? 0 : getCrawlDepth().hashCode());
hashCode = prime * hashCode + ((getMaxLinksPerPage() == null) ? 0 : getMaxLinksPerPage().hashCode());
hashCode = prime * hashCode + ((getMaxContentSizePerPageInMegaBytes() == null) ? 0 : getMaxContentSizePerPageInMegaBytes().hashCode());
hashCode = prime * hashCode + ((getMaxUrlsPerMinuteCrawlRate() == null) ? 0 : getMaxUrlsPerMinuteCrawlRate().hashCode());
hashCode = prime * hashCode + ((getUrlInclusionPatterns() == null) ? 0 : getUrlInclusionPatterns().hashCode());
hashCode = prime * hashCode + ((getUrlExclusionPatterns() == null) ? 0 : getUrlExclusionPatterns().hashCode());
hashCode = prime * hashCode + ((getProxyConfiguration() == null) ? 0 : getProxyConfiguration().hashCode());
hashCode = prime * hashCode + ((getAuthenticationConfiguration() == null) ? 0 : getAuthenticationConfiguration().hashCode());
return hashCode;
}
@Override
public WebCrawlerConfiguration clone() {
try {
return (WebCrawlerConfiguration) super.clone();
} catch (CloneNotSupportedException e) {
throw new IllegalStateException("Got a CloneNotSupportedException from Object.clone() " + "even though we're Cloneable!", e);
}
}
@com.amazonaws.annotation.SdkInternalApi
@Override
public void marshall(ProtocolMarshaller protocolMarshaller) {
com.amazonaws.services.kendra.model.transform.WebCrawlerConfigurationMarshaller.getInstance().marshall(this, protocolMarshaller);
}
}