com.amazonaws.services.kendra.model.WebCrawlerConfiguration Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of aws-java-sdk-kendra Show documentation
The AWS Java SDK for AWSKendraFrontend module holds the client classes that are used for communicating with AWSKendraFrontend Service
There is a newer version: 1.12.780
Show newest version
/*
 * Copyright 2019-2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with
 * the License. A copy of the License is located at
 * 
 * http://aws.amazon.com/apache2.0
 * 
 * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */
package com.amazonaws.services.kendra.model;

import java.io.Serializable;
import javax.annotation.Generated;
import com.amazonaws.protocol.StructuredPojo;
import com.amazonaws.protocol.ProtocolMarshaller;

/**
 * 
 * Provides the configuration information required for Amazon Kendra Web Crawler.
 * 
 * 
 * @see AWS API
 *      Documentation
 */
@Generated("com.amazonaws:aws-java-sdk-code-generator")
public class WebCrawlerConfiguration implements Serializable, Cloneable, StructuredPojo {

    /**
     * 
     * Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to crawl.
     * 
     * 
     * You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs.
     * 
     * 
     * You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure
     * (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from crawling.
     * 
     * 
     * When selecting websites to index, you must adhere to the Amazon
     * Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web Crawler
     * to index your own web pages, or web pages that you have authorization to index.
     * 
     */
    private Urls urls;
    /**
     * 
     * The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1 and any
     * hyperlinks on this page that are also crawled are depth 2.
     * 
     */
    private Integer crawlDepth;
    /**
     * 
     * The maximum number of URLs on a web page to include when crawling a website. This number is per web page.
     * 
     * 
     * As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page are
     * crawled in order of appearance.
     * 
     * 
     * The default maximum links per page is 100.
     * 
     */
    private Integer maxLinksPerPage;
    /**
     * 
     * The maximum size (in MB) of a web page or attachment to crawl.
     * 
     * 
     * Files larger than this size (in MB) are skipped/not crawled.
     * 
     * 
     * The default maximum size of a web page or attachment is set to 50 MB.
     * 
     */
    private Float maxContentSizePerPageInMegaBytes;
    /**
     * 
     * The maximum number of URLs crawled per website host per minute.
     * 
     * 
     * A minimum of one URL is required.
     * 
     * 
     * The default maximum number of URLs crawled per website host per minute is 300.
     * 
     */
    private Integer maxUrlsPerMinuteCrawlRate;
    /**
     * 
     * A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are included
     * in the index. URLs that don't match the patterns are excluded from the index. If a URL matches both an inclusion
     * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
     * 
     */
    private java.util.List urlInclusionPatterns;
    /**
     * 
     * A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are excluded
     * from the index. URLs that don't match the patterns are included in the index. If a URL matches both an inclusion
     * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
     * 
     */
    private java.util.List urlExclusionPatterns;
    /**
     * 
     * Configuration information required to connect to your internal websites via a web proxy.
     * 
     * 
     * You must provide the website host name and port number. For example, the host name of
     * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
     * 
     * 
     * Web proxy credentials are optional and you can use them to connect to a web proxy server that requires basic
     * authentication. To store web proxy credentials, you use a secret in Secrets Manager.
     * 
     */
    private ProxyConfiguration proxyConfiguration;
    /**
     * 
     * Configuration information required to connect to websites using authentication.
     * 
     * 
     * You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store your
     * authentication credentials.
     * 
     * 
     * You must provide the website host name and port number. For example, the host name of
     * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
     * 
     */
    private AuthenticationConfiguration authenticationConfiguration;

    /**
     * 
     * Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to crawl.
     * 
     * 
     * You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs.
     * 
     * 
     * You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure
     * (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from crawling.
     * 
     * 
     * When selecting websites to index, you must adhere to the Amazon
     * Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web Crawler
     * to index your own web pages, or web pages that you have authorization to index.
     * 
     * 
     * @param urls
     *        Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to
     *        crawl.
     *        
     *        You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs.
     *        
     *        
     *        You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure
     *        (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from
     *        crawling.
     *        
     *        
     *        When selecting websites to index, you must adhere to the Amazon
     *        Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web
     *        Crawler to index your own web pages, or web pages that you have authorization to index.
     */

    public void setUrls(Urls urls) {
        this.urls = urls;
    }

    /**
     * 

     * Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to crawl.
     * 
     * 
     * You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs.
     * 
     * 
     * You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure
     * (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from crawling.
     * 
     * 
     * When selecting websites to index, you must adhere to the Amazon
     * Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web Crawler
     * to index your own web pages, or web pages that you have authorization to index.
     * 
     * 
     * @return Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to
     *         crawl.
     *         
     *         You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs.
     *         
     *         
     *         You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol
     *         Secure (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked
     *         from crawling.
     *         
     *         
     *         When selecting websites to index, you must adhere to the Amazon
     *         Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web
     *         Crawler to index your own web pages, or web pages that you have authorization to index.
     */

    public Urls getUrls() {
        return this.urls;
    }

    /**
     * 

     * Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to crawl.
     * 
     * 
     * You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs.
     * 
     * 
     * You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure
     * (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from crawling.
     * 
     * 
     * When selecting websites to index, you must adhere to the Amazon
     * Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web Crawler
     * to index your own web pages, or web pages that you have authorization to index.
     * 
     * 
     * @param urls
     *        Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to
     *        crawl.
     *        
     *        You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs.
     *        
     *        
     *        You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure
     *        (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from
     *        crawling.
     *        
     *        
     *        When selecting websites to index, you must adhere to the Amazon
     *        Acceptable Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web
     *        Crawler to index your own web pages, or web pages that you have authorization to index.
     * @return Returns a reference to this object so that method calls can be chained together.
     */

    public WebCrawlerConfiguration withUrls(Urls urls) {
        setUrls(urls);
        return this;
    }

    /**
     * 

     * The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1 and any
     * hyperlinks on this page that are also crawled are depth 2.
     * 
     * 
     * @param crawlDepth
     *        The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1
     *        and any hyperlinks on this page that are also crawled are depth 2.
     */

    public void setCrawlDepth(Integer crawlDepth) {
        this.crawlDepth = crawlDepth;
    }

    /**
     * 
     * The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1 and any
     * hyperlinks on this page that are also crawled are depth 2.
     * 
     * 
     * @return The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1
     *         and any hyperlinks on this page that are also crawled are depth 2.
     */

    public Integer getCrawlDepth() {
        return this.crawlDepth;
    }

    /**
     * 
     * The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1 and any
     * hyperlinks on this page that are also crawled are depth 2.
     * 
     * 
     * @param crawlDepth
     *        The 'depth' or number of levels from the seed level to crawl. For example, the seed URL page is depth 1
     *        and any hyperlinks on this page that are also crawled are depth 2.
     * @return Returns a reference to this object so that method calls can be chained together.
     */

    public WebCrawlerConfiguration withCrawlDepth(Integer crawlDepth) {
        setCrawlDepth(crawlDepth);
        return this;
    }

    /**
     * 
     * The maximum number of URLs on a web page to include when crawling a website. This number is per web page.
     * 
     * 
     * As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page are
     * crawled in order of appearance.
     * 
     * 
     * The default maximum links per page is 100.
     * 
     * 
     * @param maxLinksPerPage
     *        The maximum number of URLs on a web page to include when crawling a website. This number is per web
     *        page.
     *        
     *        As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page
     *        are crawled in order of appearance.
     *        
     *        
     *        The default maximum links per page is 100.
     */

    public void setMaxLinksPerPage(Integer maxLinksPerPage) {
        this.maxLinksPerPage = maxLinksPerPage;
    }

    /**
     * 

     * The maximum number of URLs on a web page to include when crawling a website. This number is per web page.
     * 
     * 
     * As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page are
     * crawled in order of appearance.
     * 
     * 
     * The default maximum links per page is 100.
     * 
     * 
     * @return The maximum number of URLs on a web page to include when crawling a website. This number is per web
     *         page.
     *         
     *         As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page
     *         are crawled in order of appearance.
     *         
     *         
     *         The default maximum links per page is 100.
     */

    public Integer getMaxLinksPerPage() {
        return this.maxLinksPerPage;
    }

    /**
     * 

     * The maximum number of URLs on a web page to include when crawling a website. This number is per web page.
     * 
     * 
     * As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page are
     * crawled in order of appearance.
     * 
     * 
     * The default maximum links per page is 100.
     * 
     * 
     * @param maxLinksPerPage
     *        The maximum number of URLs on a web page to include when crawling a website. This number is per web
     *        page.
     *        
     *        As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page
     *        are crawled in order of appearance.
     *        
     *        
     *        The default maximum links per page is 100.
     * @return Returns a reference to this object so that method calls can be chained together.
     */

    public WebCrawlerConfiguration withMaxLinksPerPage(Integer maxLinksPerPage) {
        setMaxLinksPerPage(maxLinksPerPage);
        return this;
    }

    /**
     * 

     * The maximum size (in MB) of a web page or attachment to crawl.
     * 
     * 
     * Files larger than this size (in MB) are skipped/not crawled.
     * 
     * 
     * The default maximum size of a web page or attachment is set to 50 MB.
     * 
     * 
     * @param maxContentSizePerPageInMegaBytes
     *        The maximum size (in MB) of a web page or attachment to crawl.
     *        
     *        Files larger than this size (in MB) are skipped/not crawled.
     *        
     *        
     *        The default maximum size of a web page or attachment is set to 50 MB.
     */

    public void setMaxContentSizePerPageInMegaBytes(Float maxContentSizePerPageInMegaBytes) {
        this.maxContentSizePerPageInMegaBytes = maxContentSizePerPageInMegaBytes;
    }

    /**
     * 

     * The maximum size (in MB) of a web page or attachment to crawl.
     * 
     * 
     * Files larger than this size (in MB) are skipped/not crawled.
     * 
     * 
     * The default maximum size of a web page or attachment is set to 50 MB.
     * 
     * 
     * @return The maximum size (in MB) of a web page or attachment to crawl.
     *         
     *         Files larger than this size (in MB) are skipped/not crawled.
     *         
     *         
     *         The default maximum size of a web page or attachment is set to 50 MB.
     */

    public Float getMaxContentSizePerPageInMegaBytes() {
        return this.maxContentSizePerPageInMegaBytes;
    }

    /**
     * 

     * The maximum size (in MB) of a web page or attachment to crawl.
     * 
     * 
     * Files larger than this size (in MB) are skipped/not crawled.
     * 
     * 
     * The default maximum size of a web page or attachment is set to 50 MB.
     * 
     * 
     * @param maxContentSizePerPageInMegaBytes
     *        The maximum size (in MB) of a web page or attachment to crawl.
     *        
     *        Files larger than this size (in MB) are skipped/not crawled.
     *        
     *        
     *        The default maximum size of a web page or attachment is set to 50 MB.
     * @return Returns a reference to this object so that method calls can be chained together.
     */

    public WebCrawlerConfiguration withMaxContentSizePerPageInMegaBytes(Float maxContentSizePerPageInMegaBytes) {
        setMaxContentSizePerPageInMegaBytes(maxContentSizePerPageInMegaBytes);
        return this;
    }

    /**
     * 

     * The maximum number of URLs crawled per website host per minute.
     * 
     * 
     * A minimum of one URL is required.
     * 
     * 
     * The default maximum number of URLs crawled per website host per minute is 300.
     * 
     * 
     * @param maxUrlsPerMinuteCrawlRate
     *        The maximum number of URLs crawled per website host per minute.
     *        
     *        A minimum of one URL is required.
     *        
     *        
     *        The default maximum number of URLs crawled per website host per minute is 300.
     */

    public void setMaxUrlsPerMinuteCrawlRate(Integer maxUrlsPerMinuteCrawlRate) {
        this.maxUrlsPerMinuteCrawlRate = maxUrlsPerMinuteCrawlRate;
    }

    /**
     * 

     * The maximum number of URLs crawled per website host per minute.
     * 
     * 
     * A minimum of one URL is required.
     * 
     * 
     * The default maximum number of URLs crawled per website host per minute is 300.
     * 
     * 
     * @return The maximum number of URLs crawled per website host per minute.
     *         
     *         A minimum of one URL is required.
     *         
     *         
     *         The default maximum number of URLs crawled per website host per minute is 300.
     */

    public Integer getMaxUrlsPerMinuteCrawlRate() {
        return this.maxUrlsPerMinuteCrawlRate;
    }

    /**
     * 

     * The maximum number of URLs crawled per website host per minute.
     * 
     * 
     * A minimum of one URL is required.
     * 
     * 
     * The default maximum number of URLs crawled per website host per minute is 300.
     * 
     * 
     * @param maxUrlsPerMinuteCrawlRate
     *        The maximum number of URLs crawled per website host per minute.
     *        
     *        A minimum of one URL is required.
     *        
     *        
     *        The default maximum number of URLs crawled per website host per minute is 300.
     * @return Returns a reference to this object so that method calls can be chained together.
     */

    public WebCrawlerConfiguration withMaxUrlsPerMinuteCrawlRate(Integer maxUrlsPerMinuteCrawlRate) {
        setMaxUrlsPerMinuteCrawlRate(maxUrlsPerMinuteCrawlRate);
        return this;
    }

    /**
     * 

     * A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are included
     * in the index. URLs that don't match the patterns are excluded from the index. If a URL matches both an inclusion
     * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
     * 
     * 
     * @return A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are
     *         included in the index. URLs that don't match the patterns are excluded from the index. If a URL matches
     *         both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't
     *         included in the index.
     */

    public java.util.List getUrlInclusionPatterns() {
        return urlInclusionPatterns;
    }

    /**
     * 
     * A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are included
     * in the index. URLs that don't match the patterns are excluded from the index. If a URL matches both an inclusion
     * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
     * 
     * 
     * @param urlInclusionPatterns
     *        A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are
     *        included in the index. URLs that don't match the patterns are excluded from the index. If a URL matches
     *        both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't
     *        included in the index.
     */

    public void setUrlInclusionPatterns(java.util.Collection urlInclusionPatterns) {
        if (urlInclusionPatterns == null) {
            this.urlInclusionPatterns = null;
            return;
        }

        this.urlInclusionPatterns = new java.util.ArrayList(urlInclusionPatterns);
    }

    /**
     * 
     * A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are included
     * in the index. URLs that don't match the patterns are excluded from the index. If a URL matches both an inclusion
     * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
     * 
     * 
     * NOTE: This method appends the values to the existing list (if any). Use
     * {@link #setUrlInclusionPatterns(java.util.Collection)} or {@link #withUrlInclusionPatterns(java.util.Collection)}
     * if you want to override the existing values.
     * 
     * 
     * @param urlInclusionPatterns
     *        A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are
     *        included in the index. URLs that don't match the patterns are excluded from the index. If a URL matches
     *        both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't
     *        included in the index.
     * @return Returns a reference to this object so that method calls can be chained together.
     */

    public WebCrawlerConfiguration withUrlInclusionPatterns(String... urlInclusionPatterns) {
        if (this.urlInclusionPatterns == null) {
            setUrlInclusionPatterns(new java.util.ArrayList(urlInclusionPatterns.length));
        }
        for (String ele : urlInclusionPatterns) {
            this.urlInclusionPatterns.add(ele);
        }
        return this;
    }

    /**
     * 
     * A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are included
     * in the index. URLs that don't match the patterns are excluded from the index. If a URL matches both an inclusion
     * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
     * 
     * 
     * @param urlInclusionPatterns
     *        A list of regular expression patterns to include certain URLs to crawl. URLs that match the patterns are
     *        included in the index. URLs that don't match the patterns are excluded from the index. If a URL matches
     *        both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't
     *        included in the index.
     * @return Returns a reference to this object so that method calls can be chained together.
     */

    public WebCrawlerConfiguration withUrlInclusionPatterns(java.util.Collection urlInclusionPatterns) {
        setUrlInclusionPatterns(urlInclusionPatterns);
        return this;
    }

    /**
     * 
     * A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are excluded
     * from the index. URLs that don't match the patterns are included in the index. If a URL matches both an inclusion
     * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
     * 
     * 
     * @return A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are
     *         excluded from the index. URLs that don't match the patterns are included in the index. If a URL matches
     *         both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't
     *         included in the index.
     */

    public java.util.List getUrlExclusionPatterns() {
        return urlExclusionPatterns;
    }

    /**
     * 
     * A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are excluded
     * from the index. URLs that don't match the patterns are included in the index. If a URL matches both an inclusion
     * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
     * 
     * 
     * @param urlExclusionPatterns
     *        A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are
     *        excluded from the index. URLs that don't match the patterns are included in the index. If a URL matches
     *        both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't
     *        included in the index.
     */

    public void setUrlExclusionPatterns(java.util.Collection urlExclusionPatterns) {
        if (urlExclusionPatterns == null) {
            this.urlExclusionPatterns = null;
            return;
        }

        this.urlExclusionPatterns = new java.util.ArrayList(urlExclusionPatterns);
    }

    /**
     * 
     * A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are excluded
     * from the index. URLs that don't match the patterns are included in the index. If a URL matches both an inclusion
     * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
     * 
     * 
     * NOTE: This method appends the values to the existing list (if any). Use
     * {@link #setUrlExclusionPatterns(java.util.Collection)} or {@link #withUrlExclusionPatterns(java.util.Collection)}
     * if you want to override the existing values.
     * 
     * 
     * @param urlExclusionPatterns
     *        A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are
     *        excluded from the index. URLs that don't match the patterns are included in the index. If a URL matches
     *        both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't
     *        included in the index.
     * @return Returns a reference to this object so that method calls can be chained together.
     */

    public WebCrawlerConfiguration withUrlExclusionPatterns(String... urlExclusionPatterns) {
        if (this.urlExclusionPatterns == null) {
            setUrlExclusionPatterns(new java.util.ArrayList(urlExclusionPatterns.length));
        }
        for (String ele : urlExclusionPatterns) {
            this.urlExclusionPatterns.add(ele);
        }
        return this;
    }

    /**
     * 
     * A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are excluded
     * from the index. URLs that don't match the patterns are included in the index. If a URL matches both an inclusion
     * and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index.
     * 
     * 
     * @param urlExclusionPatterns
     *        A list of regular expression patterns to exclude certain URLs to crawl. URLs that match the patterns are
     *        excluded from the index. URLs that don't match the patterns are included in the index. If a URL matches
     *        both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't
     *        included in the index.
     * @return Returns a reference to this object so that method calls can be chained together.
     */

    public WebCrawlerConfiguration withUrlExclusionPatterns(java.util.Collection urlExclusionPatterns) {
        setUrlExclusionPatterns(urlExclusionPatterns);
        return this;
    }

    /**
     * 
     * Configuration information required to connect to your internal websites via a web proxy.
     * 
     * 
     * You must provide the website host name and port number. For example, the host name of
     * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
     * 
     * 
     * Web proxy credentials are optional and you can use them to connect to a web proxy server that requires basic
     * authentication. To store web proxy credentials, you use a secret in Secrets Manager.
     * 
     * 
     * @param proxyConfiguration
     *        Configuration information required to connect to your internal websites via a web proxy.
     *        
     *        You must provide the website host name and port number. For example, the host name of
     *        https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
     *        
     *        
     *        Web proxy credentials are optional and you can use them to connect to a web proxy server that requires
     *        basic authentication. To store web proxy credentials, you use a secret in Secrets Manager.
     */

    public void setProxyConfiguration(ProxyConfiguration proxyConfiguration) {
        this.proxyConfiguration = proxyConfiguration;
    }

    /**
     * 

     * Configuration information required to connect to your internal websites via a web proxy.
     * 
     * 
     * You must provide the website host name and port number. For example, the host name of
     * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
     * 
     * 
     * Web proxy credentials are optional and you can use them to connect to a web proxy server that requires basic
     * authentication. To store web proxy credentials, you use a secret in Secrets Manager.
     * 
     * 
     * @return Configuration information required to connect to your internal websites via a web proxy.
     *         
     *         You must provide the website host name and port number. For example, the host name of
     *         https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
     *         
     *         
     *         Web proxy credentials are optional and you can use them to connect to a web proxy server that requires
     *         basic authentication. To store web proxy credentials, you use a secret in Secrets Manager.
     */

    public ProxyConfiguration getProxyConfiguration() {
        return this.proxyConfiguration;
    }

    /**
     * 

     * Configuration information required to connect to your internal websites via a web proxy.
     * 
     * 
     * You must provide the website host name and port number. For example, the host name of
     * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
     * 
     * 
     * Web proxy credentials are optional and you can use them to connect to a web proxy server that requires basic
     * authentication. To store web proxy credentials, you use a secret in Secrets Manager.
     * 
     * 
     * @param proxyConfiguration
     *        Configuration information required to connect to your internal websites via a web proxy.
     *        
     *        You must provide the website host name and port number. For example, the host name of
     *        https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
     *        
     *        
     *        Web proxy credentials are optional and you can use them to connect to a web proxy server that requires
     *        basic authentication. To store web proxy credentials, you use a secret in Secrets Manager.
     * @return Returns a reference to this object so that method calls can be chained together.
     */

    public WebCrawlerConfiguration withProxyConfiguration(ProxyConfiguration proxyConfiguration) {
        setProxyConfiguration(proxyConfiguration);
        return this;
    }

    /**
     * 

     * Configuration information required to connect to websites using authentication.
     * 
     * 
     * You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store your
     * authentication credentials.
     * 
     * 
     * You must provide the website host name and port number. For example, the host name of
     * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
     * 
     * 
     * @param authenticationConfiguration
     *        Configuration information required to connect to websites using authentication.
     *        
     *        You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store
     *        your authentication credentials.
     *        
     *        
     *        You must provide the website host name and port number. For example, the host name of
     *        https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
     */

    public void setAuthenticationConfiguration(AuthenticationConfiguration authenticationConfiguration) {
        this.authenticationConfiguration = authenticationConfiguration;
    }

    /**
     * 

     * Configuration information required to connect to websites using authentication.
     * 
     * 
     * You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store your
     * authentication credentials.
     * 
     * 
     * You must provide the website host name and port number. For example, the host name of
     * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
     * 
     * 
     * @return Configuration information required to connect to websites using authentication.
     *         
     *         You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to
     *         store your authentication credentials.
     *         
     *         
     *         You must provide the website host name and port number. For example, the host name of
     *         https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
     */

    public AuthenticationConfiguration getAuthenticationConfiguration() {
        return this.authenticationConfiguration;
    }

    /**
     * 

     * Configuration information required to connect to websites using authentication.
     * 
     * 
     * You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store your
     * authentication credentials.
     * 
     * 
     * You must provide the website host name and port number. For example, the host name of
     * https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
     * 
     * 
     * @param authenticationConfiguration
     *        Configuration information required to connect to websites using authentication.
     *        
     *        You can connect to websites using basic authentication of user name and password. You use a secret in Secrets Manager to store
     *        your authentication credentials.
     *        
     *        
     *        You must provide the website host name and port number. For example, the host name of
     *        https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS.
     * @return Returns a reference to this object so that method calls can be chained together.
     */

    public WebCrawlerConfiguration withAuthenticationConfiguration(AuthenticationConfiguration authenticationConfiguration) {
        setAuthenticationConfiguration(authenticationConfiguration);
        return this;
    }

    /**
     * Returns a string representation of this object. This is useful for testing and debugging. Sensitive data will be
     * redacted from this string using a placeholder value.
     *
     * @return A string representation of this object.
     *
     * @see java.lang.Object#toString()
     */
    @Override
    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append("{");
        if (getUrls() != null)
            sb.append("Urls: ").append(getUrls()).append(",");
        if (getCrawlDepth() != null)
            sb.append("CrawlDepth: ").append(getCrawlDepth()).append(",");
        if (getMaxLinksPerPage() != null)
            sb.append("MaxLinksPerPage: ").append(getMaxLinksPerPage()).append(",");
        if (getMaxContentSizePerPageInMegaBytes() != null)
            sb.append("MaxContentSizePerPageInMegaBytes: ").append(getMaxContentSizePerPageInMegaBytes()).append(",");
        if (getMaxUrlsPerMinuteCrawlRate() != null)
            sb.append("MaxUrlsPerMinuteCrawlRate: ").append(getMaxUrlsPerMinuteCrawlRate()).append(",");
        if (getUrlInclusionPatterns() != null)
            sb.append("UrlInclusionPatterns: ").append(getUrlInclusionPatterns()).append(",");
        if (getUrlExclusionPatterns() != null)
            sb.append("UrlExclusionPatterns: ").append(getUrlExclusionPatterns()).append(",");
        if (getProxyConfiguration() != null)
            sb.append("ProxyConfiguration: ").append(getProxyConfiguration()).append(",");
        if (getAuthenticationConfiguration() != null)
            sb.append("AuthenticationConfiguration: ").append(getAuthenticationConfiguration());
        sb.append("}");
        return sb.toString();
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj)
            return true;
        if (obj == null)
            return false;

        if (obj instanceof WebCrawlerConfiguration == false)
            return false;
        WebCrawlerConfiguration other = (WebCrawlerConfiguration) obj;
        if (other.getUrls() == null ^ this.getUrls() == null)
            return false;
        if (other.getUrls() != null && other.getUrls().equals(this.getUrls()) == false)
            return false;
        if (other.getCrawlDepth() == null ^ this.getCrawlDepth() == null)
            return false;
        if (other.getCrawlDepth() != null && other.getCrawlDepth().equals(this.getCrawlDepth()) == false)
            return false;
        if (other.getMaxLinksPerPage() == null ^ this.getMaxLinksPerPage() == null)
            return false;
        if (other.getMaxLinksPerPage() != null && other.getMaxLinksPerPage().equals(this.getMaxLinksPerPage()) == false)
            return false;
        if (other.getMaxContentSizePerPageInMegaBytes() == null ^ this.getMaxContentSizePerPageInMegaBytes() == null)
            return false;
        if (other.getMaxContentSizePerPageInMegaBytes() != null
                && other.getMaxContentSizePerPageInMegaBytes().equals(this.getMaxContentSizePerPageInMegaBytes()) == false)
            return false;
        if (other.getMaxUrlsPerMinuteCrawlRate() == null ^ this.getMaxUrlsPerMinuteCrawlRate() == null)
            return false;
        if (other.getMaxUrlsPerMinuteCrawlRate() != null && other.getMaxUrlsPerMinuteCrawlRate().equals(this.getMaxUrlsPerMinuteCrawlRate()) == false)
            return false;
        if (other.getUrlInclusionPatterns() == null ^ this.getUrlInclusionPatterns() == null)
            return false;
        if (other.getUrlInclusionPatterns() != null && other.getUrlInclusionPatterns().equals(this.getUrlInclusionPatterns()) == false)
            return false;
        if (other.getUrlExclusionPatterns() == null ^ this.getUrlExclusionPatterns() == null)
            return false;
        if (other.getUrlExclusionPatterns() != null && other.getUrlExclusionPatterns().equals(this.getUrlExclusionPatterns()) == false)
            return false;
        if (other.getProxyConfiguration() == null ^ this.getProxyConfiguration() == null)
            return false;
        if (other.getProxyConfiguration() != null && other.getProxyConfiguration().equals(this.getProxyConfiguration()) == false)
            return false;
        if (other.getAuthenticationConfiguration() == null ^ this.getAuthenticationConfiguration() == null)
            return false;
        if (other.getAuthenticationConfiguration() != null && other.getAuthenticationConfiguration().equals(this.getAuthenticationConfiguration()) == false)
            return false;
        return true;
    }

    @Override
    public int hashCode() {
        final int prime = 31;
        int hashCode = 1;

        hashCode = prime * hashCode + ((getUrls() == null) ? 0 : getUrls().hashCode());
        hashCode = prime * hashCode + ((getCrawlDepth() == null) ? 0 : getCrawlDepth().hashCode());
        hashCode = prime * hashCode + ((getMaxLinksPerPage() == null) ? 0 : getMaxLinksPerPage().hashCode());
        hashCode = prime * hashCode + ((getMaxContentSizePerPageInMegaBytes() == null) ? 0 : getMaxContentSizePerPageInMegaBytes().hashCode());
        hashCode = prime * hashCode + ((getMaxUrlsPerMinuteCrawlRate() == null) ? 0 : getMaxUrlsPerMinuteCrawlRate().hashCode());
        hashCode = prime * hashCode + ((getUrlInclusionPatterns() == null) ? 0 : getUrlInclusionPatterns().hashCode());
        hashCode = prime * hashCode + ((getUrlExclusionPatterns() == null) ? 0 : getUrlExclusionPatterns().hashCode());
        hashCode = prime * hashCode + ((getProxyConfiguration() == null) ? 0 : getProxyConfiguration().hashCode());
        hashCode = prime * hashCode + ((getAuthenticationConfiguration() == null) ? 0 : getAuthenticationConfiguration().hashCode());
        return hashCode;
    }

    @Override
    public WebCrawlerConfiguration clone() {
        try {
            return (WebCrawlerConfiguration) super.clone();
        } catch (CloneNotSupportedException e) {
            throw new IllegalStateException("Got a CloneNotSupportedException from Object.clone() " + "even though we're Cloneable!", e);
        }
    }

    @com.amazonaws.annotation.SdkInternalApi
    @Override
    public void marshall(ProtocolMarshaller protocolMarshaller) {
        com.amazonaws.services.kendra.model.transform.WebCrawlerConfigurationMarshaller.getInstance().marshall(this, protocolMarshaller);
    }
}