All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.amazonaws.services.kendra.model.WebCrawlerConfiguration Maven / Gradle / Ivy

Go to download

The AWS Java SDK for AWSKendraFrontend module holds the client classes that are used for communicating with AWSKendraFrontend Service

There is a newer version: 1.12.780
Show newest version
/*
 * Copyright 2019-2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with
 * the License. A copy of the License is located at
 * 
 * http://aws.amazon.com/apache2.0
 * 
 * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */
package com.amazonaws.services.kendra.model;

import java.io.Serializable;
import javax.annotation.Generated;
import com.amazonaws.protocol.StructuredPojo;
import com.amazonaws.protocol.ProtocolMarshaller;

/**
 * 

* Provides the configuration information required for Amazon Kendra Web Crawler. *

* * @see AWS API * Documentation */ @Generated("com.amazonaws:aws-java-sdk-code-generator") public class WebCrawlerConfiguration implements Serializable, Cloneable, StructuredPojo { /** *

* Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to crawl. *

*

* You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs. *

*

* You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure * (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from crawling. *

*

* When selecting websites to index, you must adhere to the Amazon * Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web Crawler * to index your own web pages, or web pages that you have authorization to index. *

*/ private Urls urls; /** *

* The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1 and any * hyperlinks on this page that are also crawled are depth 2. *

*/ private Integer crawlDepth; /** *

* The maximum number of URLs on a web page to include when crawling a website. This number is per web page. *

*

* As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page are * crawled in order of appearance. *

*

* The default maximum links per page is 100. *

*/ private Integer maxLinksPerPage; /** *

* The maximum size (in MB) of a web page or attachment to crawl. *

*

* Files larger than this size (in MB) are skipped/not crawled. *

*

* The default maximum size of a web page or attachment is set to 50 MB. *

*/ private Float maxContentSizePerPageInMegaBytes; /** *

* The maximum number of URLs crawled per website host per minute. *

*

* A minimum of one URL is required. *

*

* The default maximum number of URLs crawled per website host per minute is 300. *

*/ private Integer maxUrlsPerMinuteCrawlRate; /** *

* A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are included * in the index. URLs that don't match the patterns are excluded from the index. If a URL matches both an inclusion * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index. *

*/ private java.util.List urlInclusionPatterns; /** *

* A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are excluded * from the index. URLs that don't match the patterns are included in the index. If a URL matches both an inclusion * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index. *

*/ private java.util.List urlExclusionPatterns; /** *

* Configuration information required to connect to your internal websites via a web proxy. *

*

* You must provide the website host name and port number. For example, the host name of * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS. *

*

* Web proxy credentials are optional and you can use them to connect to a web proxy server that requires basic * authentication. To store web proxy credentials, you use a secret in Secrets Manager. *

*/ private ProxyConfiguration proxyConfiguration; /** *

* Configuration information required to connect to websites using authentication. *

*

* You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store your * authentication credentials. *

*

* You must provide the website host name and port number. For example, the host name of * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS. *

*/ private AuthenticationConfiguration authenticationConfiguration; /** *

* Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to crawl. *

*

* You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs. *

*

* You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure * (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from crawling. *

*

* When selecting websites to index, you must adhere to the Amazon * Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web Crawler * to index your own web pages, or web pages that you have authorization to index. *

* * @param urls * Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to * crawl.

*

* You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs. *

*

* You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure * (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from * crawling. *

*

* When selecting websites to index, you must adhere to the Amazon * Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web * Crawler to index your own web pages, or web pages that you have authorization to index. */ public void setUrls(Urls urls) { this.urls = urls; } /** *

* Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to crawl. *

*

* You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs. *

*

* You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure * (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from crawling. *

*

* When selecting websites to index, you must adhere to the Amazon * Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web Crawler * to index your own web pages, or web pages that you have authorization to index. *

* * @return Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to * crawl.

*

* You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs. *

*

* You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol * Secure (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked * from crawling. *

*

* When selecting websites to index, you must adhere to the Amazon * Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web * Crawler to index your own web pages, or web pages that you have authorization to index. */ public Urls getUrls() { return this.urls; } /** *

* Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to crawl. *

*

* You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs. *

*

* You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure * (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from crawling. *

*

* When selecting websites to index, you must adhere to the Amazon * Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web Crawler * to index your own web pages, or web pages that you have authorization to index. *

* * @param urls * Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to * crawl.

*

* You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs. *

*

* You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure * (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from * crawling. *

*

* When selecting websites to index, you must adhere to the Amazon * Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web * Crawler to index your own web pages, or web pages that you have authorization to index. * @return Returns a reference to this object so that method calls can be chained together. */ public WebCrawlerConfiguration withUrls(Urls urls) { setUrls(urls); return this; } /** *

* The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1 and any * hyperlinks on this page that are also crawled are depth 2. *

* * @param crawlDepth * The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1 * and any hyperlinks on this page that are also crawled are depth 2. */ public void setCrawlDepth(Integer crawlDepth) { this.crawlDepth = crawlDepth; } /** *

* The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1 and any * hyperlinks on this page that are also crawled are depth 2. *

* * @return The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1 * and any hyperlinks on this page that are also crawled are depth 2. */ public Integer getCrawlDepth() { return this.crawlDepth; } /** *

* The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1 and any * hyperlinks on this page that are also crawled are depth 2. *

* * @param crawlDepth * The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1 * and any hyperlinks on this page that are also crawled are depth 2. * @return Returns a reference to this object so that method calls can be chained together. */ public WebCrawlerConfiguration withCrawlDepth(Integer crawlDepth) { setCrawlDepth(crawlDepth); return this; } /** *

* The maximum number of URLs on a web page to include when crawling a website. This number is per web page. *

*

* As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page are * crawled in order of appearance. *

*

* The default maximum links per page is 100. *

* * @param maxLinksPerPage * The maximum number of URLs on a web page to include when crawling a website. This number is per web * page.

*

* As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page * are crawled in order of appearance. *

*

* The default maximum links per page is 100. */ public void setMaxLinksPerPage(Integer maxLinksPerPage) { this.maxLinksPerPage = maxLinksPerPage; } /** *

* The maximum number of URLs on a web page to include when crawling a website. This number is per web page. *

*

* As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page are * crawled in order of appearance. *

*

* The default maximum links per page is 100. *

* * @return The maximum number of URLs on a web page to include when crawling a website. This number is per web * page.

*

* As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page * are crawled in order of appearance. *

*

* The default maximum links per page is 100. */ public Integer getMaxLinksPerPage() { return this.maxLinksPerPage; } /** *

* The maximum number of URLs on a web page to include when crawling a website. This number is per web page. *

*

* As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page are * crawled in order of appearance. *

*

* The default maximum links per page is 100. *

* * @param maxLinksPerPage * The maximum number of URLs on a web page to include when crawling a website. This number is per web * page.

*

* As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page * are crawled in order of appearance. *

*

* The default maximum links per page is 100. * @return Returns a reference to this object so that method calls can be chained together. */ public WebCrawlerConfiguration withMaxLinksPerPage(Integer maxLinksPerPage) { setMaxLinksPerPage(maxLinksPerPage); return this; } /** *

* The maximum size (in MB) of a web page or attachment to crawl. *

*

* Files larger than this size (in MB) are skipped/not crawled. *

*

* The default maximum size of a web page or attachment is set to 50 MB. *

* * @param maxContentSizePerPageInMegaBytes * The maximum size (in MB) of a web page or attachment to crawl.

*

* Files larger than this size (in MB) are skipped/not crawled. *

*

* The default maximum size of a web page or attachment is set to 50 MB. */ public void setMaxContentSizePerPageInMegaBytes(Float maxContentSizePerPageInMegaBytes) { this.maxContentSizePerPageInMegaBytes = maxContentSizePerPageInMegaBytes; } /** *

* The maximum size (in MB) of a web page or attachment to crawl. *

*

* Files larger than this size (in MB) are skipped/not crawled. *

*

* The default maximum size of a web page or attachment is set to 50 MB. *

* * @return The maximum size (in MB) of a web page or attachment to crawl.

*

* Files larger than this size (in MB) are skipped/not crawled. *

*

* The default maximum size of a web page or attachment is set to 50 MB. */ public Float getMaxContentSizePerPageInMegaBytes() { return this.maxContentSizePerPageInMegaBytes; } /** *

* The maximum size (in MB) of a web page or attachment to crawl. *

*

* Files larger than this size (in MB) are skipped/not crawled. *

*

* The default maximum size of a web page or attachment is set to 50 MB. *

* * @param maxContentSizePerPageInMegaBytes * The maximum size (in MB) of a web page or attachment to crawl.

*

* Files larger than this size (in MB) are skipped/not crawled. *

*

* The default maximum size of a web page or attachment is set to 50 MB. * @return Returns a reference to this object so that method calls can be chained together. */ public WebCrawlerConfiguration withMaxContentSizePerPageInMegaBytes(Float maxContentSizePerPageInMegaBytes) { setMaxContentSizePerPageInMegaBytes(maxContentSizePerPageInMegaBytes); return this; } /** *

* The maximum number of URLs crawled per website host per minute. *

*

* A minimum of one URL is required. *

*

* The default maximum number of URLs crawled per website host per minute is 300. *

* * @param maxUrlsPerMinuteCrawlRate * The maximum number of URLs crawled per website host per minute.

*

* A minimum of one URL is required. *

*

* The default maximum number of URLs crawled per website host per minute is 300. */ public void setMaxUrlsPerMinuteCrawlRate(Integer maxUrlsPerMinuteCrawlRate) { this.maxUrlsPerMinuteCrawlRate = maxUrlsPerMinuteCrawlRate; } /** *

* The maximum number of URLs crawled per website host per minute. *

*

* A minimum of one URL is required. *

*

* The default maximum number of URLs crawled per website host per minute is 300. *

* * @return The maximum number of URLs crawled per website host per minute.

*

* A minimum of one URL is required. *

*

* The default maximum number of URLs crawled per website host per minute is 300. */ public Integer getMaxUrlsPerMinuteCrawlRate() { return this.maxUrlsPerMinuteCrawlRate; } /** *

* The maximum number of URLs crawled per website host per minute. *

*

* A minimum of one URL is required. *

*

* The default maximum number of URLs crawled per website host per minute is 300. *

* * @param maxUrlsPerMinuteCrawlRate * The maximum number of URLs crawled per website host per minute.

*

* A minimum of one URL is required. *

*

* The default maximum number of URLs crawled per website host per minute is 300. * @return Returns a reference to this object so that method calls can be chained together. */ public WebCrawlerConfiguration withMaxUrlsPerMinuteCrawlRate(Integer maxUrlsPerMinuteCrawlRate) { setMaxUrlsPerMinuteCrawlRate(maxUrlsPerMinuteCrawlRate); return this; } /** *

* A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are included * in the index. URLs that don't match the patterns are excluded from the index. If a URL matches both an inclusion * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index. *

* * @return A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are * included in the index. URLs that don't match the patterns are excluded from the index. If a URL matches * both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't * included in the index. */ public java.util.List getUrlInclusionPatterns() { return urlInclusionPatterns; } /** *

* A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are included * in the index. URLs that don't match the patterns are excluded from the index. If a URL matches both an inclusion * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index. *

* * @param urlInclusionPatterns * A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are * included in the index. URLs that don't match the patterns are excluded from the index. If a URL matches * both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't * included in the index. */ public void setUrlInclusionPatterns(java.util.Collection urlInclusionPatterns) { if (urlInclusionPatterns == null) { this.urlInclusionPatterns = null; return; } this.urlInclusionPatterns = new java.util.ArrayList(urlInclusionPatterns); } /** *

* A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are included * in the index. URLs that don't match the patterns are excluded from the index. If a URL matches both an inclusion * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index. *

*

* NOTE: This method appends the values to the existing list (if any). Use * {@link #setUrlInclusionPatterns(java.util.Collection)} or {@link #withUrlInclusionPatterns(java.util.Collection)} * if you want to override the existing values. *

* * @param urlInclusionPatterns * A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are * included in the index. URLs that don't match the patterns are excluded from the index. If a URL matches * both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't * included in the index. * @return Returns a reference to this object so that method calls can be chained together. */ public WebCrawlerConfiguration withUrlInclusionPatterns(String... urlInclusionPatterns) { if (this.urlInclusionPatterns == null) { setUrlInclusionPatterns(new java.util.ArrayList(urlInclusionPatterns.length)); } for (String ele : urlInclusionPatterns) { this.urlInclusionPatterns.add(ele); } return this; } /** *

* A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are included * in the index. URLs that don't match the patterns are excluded from the index. If a URL matches both an inclusion * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index. *

* * @param urlInclusionPatterns * A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are * included in the index. URLs that don't match the patterns are excluded from the index. If a URL matches * both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't * included in the index. * @return Returns a reference to this object so that method calls can be chained together. */ public WebCrawlerConfiguration withUrlInclusionPatterns(java.util.Collection urlInclusionPatterns) { setUrlInclusionPatterns(urlInclusionPatterns); return this; } /** *

* A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are excluded * from the index. URLs that don't match the patterns are included in the index. If a URL matches both an inclusion * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index. *

* * @return A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are * excluded from the index. URLs that don't match the patterns are included in the index. If a URL matches * both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't * included in the index. */ public java.util.List getUrlExclusionPatterns() { return urlExclusionPatterns; } /** *

* A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are excluded * from the index. URLs that don't match the patterns are included in the index. If a URL matches both an inclusion * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index. *

* * @param urlExclusionPatterns * A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are * excluded from the index. URLs that don't match the patterns are included in the index. If a URL matches * both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't * included in the index. */ public void setUrlExclusionPatterns(java.util.Collection urlExclusionPatterns) { if (urlExclusionPatterns == null) { this.urlExclusionPatterns = null; return; } this.urlExclusionPatterns = new java.util.ArrayList(urlExclusionPatterns); } /** *

* A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are excluded * from the index. URLs that don't match the patterns are included in the index. If a URL matches both an inclusion * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index. *

*

* NOTE: This method appends the values to the existing list (if any). Use * {@link #setUrlExclusionPatterns(java.util.Collection)} or {@link #withUrlExclusionPatterns(java.util.Collection)} * if you want to override the existing values. *

* * @param urlExclusionPatterns * A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are * excluded from the index. URLs that don't match the patterns are included in the index. If a URL matches * both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't * included in the index. * @return Returns a reference to this object so that method calls can be chained together. */ public WebCrawlerConfiguration withUrlExclusionPatterns(String... urlExclusionPatterns) { if (this.urlExclusionPatterns == null) { setUrlExclusionPatterns(new java.util.ArrayList(urlExclusionPatterns.length)); } for (String ele : urlExclusionPatterns) { this.urlExclusionPatterns.add(ele); } return this; } /** *

* A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are excluded * from the index. URLs that don't match the patterns are included in the index. If a URL matches both an inclusion * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index. *

* * @param urlExclusionPatterns * A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are * excluded from the index. URLs that don't match the patterns are included in the index. If a URL matches * both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't * included in the index. * @return Returns a reference to this object so that method calls can be chained together. */ public WebCrawlerConfiguration withUrlExclusionPatterns(java.util.Collection urlExclusionPatterns) { setUrlExclusionPatterns(urlExclusionPatterns); return this; } /** *

* Configuration information required to connect to your internal websites via a web proxy. *

*

* You must provide the website host name and port number. For example, the host name of * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS. *

*

* Web proxy credentials are optional and you can use them to connect to a web proxy server that requires basic * authentication. To store web proxy credentials, you use a secret in Secrets Manager. *

* * @param proxyConfiguration * Configuration information required to connect to your internal websites via a web proxy.

*

* You must provide the website host name and port number. For example, the host name of * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS. *

*

* Web proxy credentials are optional and you can use them to connect to a web proxy server that requires * basic authentication. To store web proxy credentials, you use a secret in Secrets Manager. */ public void setProxyConfiguration(ProxyConfiguration proxyConfiguration) { this.proxyConfiguration = proxyConfiguration; } /** *

* Configuration information required to connect to your internal websites via a web proxy. *

*

* You must provide the website host name and port number. For example, the host name of * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS. *

*

* Web proxy credentials are optional and you can use them to connect to a web proxy server that requires basic * authentication. To store web proxy credentials, you use a secret in Secrets Manager. *

* * @return Configuration information required to connect to your internal websites via a web proxy.

*

* You must provide the website host name and port number. For example, the host name of * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS. *

*

* Web proxy credentials are optional and you can use them to connect to a web proxy server that requires * basic authentication. To store web proxy credentials, you use a secret in Secrets Manager. */ public ProxyConfiguration getProxyConfiguration() { return this.proxyConfiguration; } /** *

* Configuration information required to connect to your internal websites via a web proxy. *

*

* You must provide the website host name and port number. For example, the host name of * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS. *

*

* Web proxy credentials are optional and you can use them to connect to a web proxy server that requires basic * authentication. To store web proxy credentials, you use a secret in Secrets Manager. *

* * @param proxyConfiguration * Configuration information required to connect to your internal websites via a web proxy.

*

* You must provide the website host name and port number. For example, the host name of * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS. *

*

* Web proxy credentials are optional and you can use them to connect to a web proxy server that requires * basic authentication. To store web proxy credentials, you use a secret in Secrets Manager. * @return Returns a reference to this object so that method calls can be chained together. */ public WebCrawlerConfiguration withProxyConfiguration(ProxyConfiguration proxyConfiguration) { setProxyConfiguration(proxyConfiguration); return this; } /** *

* Configuration information required to connect to websites using authentication. *

*

* You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store your * authentication credentials. *

*

* You must provide the website host name and port number. For example, the host name of * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS. *

* * @param authenticationConfiguration * Configuration information required to connect to websites using authentication.

*

* You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store * your authentication credentials. *

*

* You must provide the website host name and port number. For example, the host name of * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS. */ public void setAuthenticationConfiguration(AuthenticationConfiguration authenticationConfiguration) { this.authenticationConfiguration = authenticationConfiguration; } /** *

* Configuration information required to connect to websites using authentication. *

*

* You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store your * authentication credentials. *

*

* You must provide the website host name and port number. For example, the host name of * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS. *

* * @return Configuration information required to connect to websites using authentication.

*

* You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to * store your authentication credentials. *

*

* You must provide the website host name and port number. For example, the host name of * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS. */ public AuthenticationConfiguration getAuthenticationConfiguration() { return this.authenticationConfiguration; } /** *

* Configuration information required to connect to websites using authentication. *

*

* You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store your * authentication credentials. *

*

* You must provide the website host name and port number. For example, the host name of * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS. *

* * @param authenticationConfiguration * Configuration information required to connect to websites using authentication.

*

* You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store * your authentication credentials. *

*

* You must provide the website host name and port number. For example, the host name of * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS. * @return Returns a reference to this object so that method calls can be chained together. */ public WebCrawlerConfiguration withAuthenticationConfiguration(AuthenticationConfiguration authenticationConfiguration) { setAuthenticationConfiguration(authenticationConfiguration); return this; } /** * Returns a string representation of this object. This is useful for testing and debugging. Sensitive data will be * redacted from this string using a placeholder value. * * @return A string representation of this object. * * @see java.lang.Object#toString() */ @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("{"); if (getUrls() != null) sb.append("Urls: ").append(getUrls()).append(","); if (getCrawlDepth() != null) sb.append("CrawlDepth: ").append(getCrawlDepth()).append(","); if (getMaxLinksPerPage() != null) sb.append("MaxLinksPerPage: ").append(getMaxLinksPerPage()).append(","); if (getMaxContentSizePerPageInMegaBytes() != null) sb.append("MaxContentSizePerPageInMegaBytes: ").append(getMaxContentSizePerPageInMegaBytes()).append(","); if (getMaxUrlsPerMinuteCrawlRate() != null) sb.append("MaxUrlsPerMinuteCrawlRate: ").append(getMaxUrlsPerMinuteCrawlRate()).append(","); if (getUrlInclusionPatterns() != null) sb.append("UrlInclusionPatterns: ").append(getUrlInclusionPatterns()).append(","); if (getUrlExclusionPatterns() != null) sb.append("UrlExclusionPatterns: ").append(getUrlExclusionPatterns()).append(","); if (getProxyConfiguration() != null) sb.append("ProxyConfiguration: ").append(getProxyConfiguration()).append(","); if (getAuthenticationConfiguration() != null) sb.append("AuthenticationConfiguration: ").append(getAuthenticationConfiguration()); sb.append("}"); return sb.toString(); } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (obj instanceof WebCrawlerConfiguration == false) return false; WebCrawlerConfiguration other = (WebCrawlerConfiguration) obj; if (other.getUrls() == null ^ this.getUrls() == null) return false; if (other.getUrls() != null && other.getUrls().equals(this.getUrls()) == false) return false; if (other.getCrawlDepth() == null ^ this.getCrawlDepth() == null) return false; if (other.getCrawlDepth() != null && other.getCrawlDepth().equals(this.getCrawlDepth()) == false) return false; if (other.getMaxLinksPerPage() == null ^ this.getMaxLinksPerPage() == null) return false; if (other.getMaxLinksPerPage() != null && other.getMaxLinksPerPage().equals(this.getMaxLinksPerPage()) == false) return false; if (other.getMaxContentSizePerPageInMegaBytes() == null ^ this.getMaxContentSizePerPageInMegaBytes() == null) return false; if (other.getMaxContentSizePerPageInMegaBytes() != null && other.getMaxContentSizePerPageInMegaBytes().equals(this.getMaxContentSizePerPageInMegaBytes()) == false) return false; if (other.getMaxUrlsPerMinuteCrawlRate() == null ^ this.getMaxUrlsPerMinuteCrawlRate() == null) return false; if (other.getMaxUrlsPerMinuteCrawlRate() != null && other.getMaxUrlsPerMinuteCrawlRate().equals(this.getMaxUrlsPerMinuteCrawlRate()) == false) return false; if (other.getUrlInclusionPatterns() == null ^ this.getUrlInclusionPatterns() == null) return false; if (other.getUrlInclusionPatterns() != null && other.getUrlInclusionPatterns().equals(this.getUrlInclusionPatterns()) == false) return false; if (other.getUrlExclusionPatterns() == null ^ this.getUrlExclusionPatterns() == null) return false; if (other.getUrlExclusionPatterns() != null && other.getUrlExclusionPatterns().equals(this.getUrlExclusionPatterns()) == false) return false; if (other.getProxyConfiguration() == null ^ this.getProxyConfiguration() == null) return false; if (other.getProxyConfiguration() != null && other.getProxyConfiguration().equals(this.getProxyConfiguration()) == false) return false; if (other.getAuthenticationConfiguration() == null ^ this.getAuthenticationConfiguration() == null) return false; if (other.getAuthenticationConfiguration() != null && other.getAuthenticationConfiguration().equals(this.getAuthenticationConfiguration()) == false) return false; return true; } @Override public int hashCode() { final int prime = 31; int hashCode = 1; hashCode = prime * hashCode + ((getUrls() == null) ? 0 : getUrls().hashCode()); hashCode = prime * hashCode + ((getCrawlDepth() == null) ? 0 : getCrawlDepth().hashCode()); hashCode = prime * hashCode + ((getMaxLinksPerPage() == null) ? 0 : getMaxLinksPerPage().hashCode()); hashCode = prime * hashCode + ((getMaxContentSizePerPageInMegaBytes() == null) ? 0 : getMaxContentSizePerPageInMegaBytes().hashCode()); hashCode = prime * hashCode + ((getMaxUrlsPerMinuteCrawlRate() == null) ? 0 : getMaxUrlsPerMinuteCrawlRate().hashCode()); hashCode = prime * hashCode + ((getUrlInclusionPatterns() == null) ? 0 : getUrlInclusionPatterns().hashCode()); hashCode = prime * hashCode + ((getUrlExclusionPatterns() == null) ? 0 : getUrlExclusionPatterns().hashCode()); hashCode = prime * hashCode + ((getProxyConfiguration() == null) ? 0 : getProxyConfiguration().hashCode()); hashCode = prime * hashCode + ((getAuthenticationConfiguration() == null) ? 0 : getAuthenticationConfiguration().hashCode()); return hashCode; } @Override public WebCrawlerConfiguration clone() { try { return (WebCrawlerConfiguration) super.clone(); } catch (CloneNotSupportedException e) { throw new IllegalStateException("Got a CloneNotSupportedException from Object.clone() " + "even though we're Cloneable!", e); } } @com.amazonaws.annotation.SdkInternalApi @Override public void marshall(ProtocolMarshaller protocolMarshaller) { com.amazonaws.services.kendra.model.transform.WebCrawlerConfigurationMarshaller.getInstance().marshall(this, protocolMarshaller); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy