com.amazonaws.services.sagemaker.model.S3DataSource Maven / Gradle / Ivy

Go to download
/*
 * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with
 * the License. A copy of the License is located at
 * 
 * http://aws.amazon.com/apache2.0
 * 
 * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */
package com.amazonaws.services.sagemaker.model;

import java.io.Serializable;
import javax.annotation.Generated;
import com.amazonaws.protocol.StructuredPojo;
import com.amazonaws.protocol.ProtocolMarshaller;

/**
 * 
 * Describes the S3 data source.
 * 
 * 
 * @see AWS API
 *      Documentation
 */
@Generated("com.amazonaws:aws-java-sdk-code-generator")
public class S3DataSource implements Serializable, Cloneable, StructuredPojo {

    /**
     * 
     * If you choose S3Prefix, S3Uri identifies a key name prefix. Amazon SageMaker uses all
     * objects that match the specified key name prefix for model training.
     * 
     * 
     * If you choose ManifestFile, S3Uri identifies an object that is a manifest file
     * containing a list of object keys that you want Amazon SageMaker to use for model training.
     * 
     * 
     * If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented manifest file
     * in JSON lines format. This file contains the data you want to use for model training.
     * AugmentedManifestFile can only be used if the Channel's input mode is Pipe.
     * 
     */
    private String s3DataType;
    /**
     * 
     * Depending on the value specified for the S3DataType, identifies either a key name prefix or a
     * manifest. For example:
     * 
     * 
     * 
     * 
     * A key name prefix might look like this: s3://bucketname/exampleprefix
     * 
     * 
     * 
     * 
     * A manifest might look like this: s3://bucketname/example.manifest
     * 
     * 
     * A manifest is an S3 object which is a JSON file consisting of an array of elements. The first element is a prefix
     * which is followed by one or more suffixes. SageMaker appends the suffix elements to the prefix to get a full set
     * of S3Uri. Note that the prefix must be a valid non-empty S3Uri that precludes users
     * from specifying a manifest whose individual S3Uri is sourced from different S3 buckets.
     * 
     * 
     * The following code example shows a valid manifest format:
     * 
     * 
     * [ {"prefix": "s3://customer_bucket/some/prefix/"},
     * 
     * 
     *  "relative/path/to/custdata-1",
     * 
     * 
     *  "relative/path/custdata-2",
     * 
     * 
     *  ...
     * 
     * 
     *  "relative/path/custdata-N"
     * 
     * 
     * ]
     * 
     * 
     * This JSON is equivalent to the following S3Uri list:
     * 
     * 
     * s3://customer_bucket/some/prefix/relative/path/to/custdata-1
     * 
     * 
     * s3://customer_bucket/some/prefix/relative/path/custdata-2
     * 
     * 
     * ...
     * 
     * 
     * s3://customer_bucket/some/prefix/relative/path/custdata-N
     * 
     * 
     * The complete set of S3Uri in this manifest is the input data for the channel for this data source.
     * The object that each S3Uri points to must be readable by the IAM role that Amazon SageMaker uses to
     * perform tasks on your behalf.
     * 
     * 
     * 
     */
    private String s3Uri;
    /**
     * 
     * If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched for
     * model training, specify FullyReplicated.
     * 
     * 
     * If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched for model
     * training, specify ShardedByS3Key. If there are n ML compute instances launched for a training
     * job, each instance gets approximately 1/n of the number of S3 objects. In this case, model training on
     * each machine uses only the subset of training data.
     * 
     * 
     * Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't get
     * any data and you will pay for nodes that aren't getting any training data. This applies in both File and Pipe
     * modes. Keep this in mind when developing algorithms.
     * 
     * 
     * In distributed training, where you use multiple ML compute EC2 instances, you might choose
     * ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume (when
     * TrainingInputMode is set to File), this copies 1/n of the number of objects.
     * 
     */
    private String s3DataDistributionType;
    /**
     * 
     * A list of one or more attribute names to use that are found in a specified augmented manifest file.
     * 
     */
    private java.util.List attributeNames;

    /**
     * 
     * If you choose S3Prefix, S3Uri identifies a key name prefix. Amazon SageMaker uses all
     * objects that match the specified key name prefix for model training.
     * 
     * 
     * If you choose ManifestFile, S3Uri identifies an object that is a manifest file
     * containing a list of object keys that you want Amazon SageMaker to use for model training.
     * 
     * 
     * If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented manifest file
     * in JSON lines format. This file contains the data you want to use for model training.
     * AugmentedManifestFile can only be used if the Channel's input mode is Pipe.
     * 
     * 
     * @param s3DataType
     *        If you choose S3Prefix, S3Uri identifies a key name prefix. Amazon SageMaker
     *        uses all objects that match the specified key name prefix for model training. 
     *        
     *        If you choose ManifestFile, S3Uri identifies an object that is a manifest file
     *        containing a list of object keys that you want Amazon SageMaker to use for model training.
     *        
     *        
     *        If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented manifest
     *        file in JSON lines format. This file contains the data you want to use for model training.
     *        AugmentedManifestFile can only be used if the Channel's input mode is Pipe.
     * @see S3DataType
     */

    public void setS3DataType(String s3DataType) {
        this.s3DataType = s3DataType;
    }

    /**
     * 

     * If you choose S3Prefix, S3Uri identifies a key name prefix. Amazon SageMaker uses all
     * objects that match the specified key name prefix for model training.
     * 
     * 
     * If you choose ManifestFile, S3Uri identifies an object that is a manifest file
     * containing a list of object keys that you want Amazon SageMaker to use for model training.
     * 
     * 
     * If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented manifest file
     * in JSON lines format. This file contains the data you want to use for model training.
     * AugmentedManifestFile can only be used if the Channel's input mode is Pipe.
     * 
     * 
     * @return If you choose S3Prefix, S3Uri identifies a key name prefix. Amazon SageMaker
     *         uses all objects that match the specified key name prefix for model training. 
     *         
     *         If you choose ManifestFile, S3Uri identifies an object that is a manifest file
     *         containing a list of object keys that you want Amazon SageMaker to use for model training.
     *         
     *         
     *         If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented
     *         manifest file in JSON lines format. This file contains the data you want to use for model training.
     *         AugmentedManifestFile can only be used if the Channel's input mode is Pipe.
     * @see S3DataType
     */

    public String getS3DataType() {
        return this.s3DataType;
    }

    /**
     * 

     * If you choose S3Prefix, S3Uri identifies a key name prefix. Amazon SageMaker uses all
     * objects that match the specified key name prefix for model training.
     * 
     * 
     * If you choose ManifestFile, S3Uri identifies an object that is a manifest file
     * containing a list of object keys that you want Amazon SageMaker to use for model training.
     * 
     * 
     * If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented manifest file
     * in JSON lines format. This file contains the data you want to use for model training.
     * AugmentedManifestFile can only be used if the Channel's input mode is Pipe.
     * 
     * 
     * @param s3DataType
     *        If you choose S3Prefix, S3Uri identifies a key name prefix. Amazon SageMaker
     *        uses all objects that match the specified key name prefix for model training. 
     *        
     *        If you choose ManifestFile, S3Uri identifies an object that is a manifest file
     *        containing a list of object keys that you want Amazon SageMaker to use for model training.
     *        
     *        
     *        If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented manifest
     *        file in JSON lines format. This file contains the data you want to use for model training.
     *        AugmentedManifestFile can only be used if the Channel's input mode is Pipe.
     * @return Returns a reference to this object so that method calls can be chained together.
     * @see S3DataType
     */

    public S3DataSource withS3DataType(String s3DataType) {
        setS3DataType(s3DataType);
        return this;
    }

    /**
     * 

     * If you choose S3Prefix, S3Uri identifies a key name prefix. Amazon SageMaker uses all
     * objects that match the specified key name prefix for model training.
     * 
     * 
     * If you choose ManifestFile, S3Uri identifies an object that is a manifest file
     * containing a list of object keys that you want Amazon SageMaker to use for model training.
     * 
     * 
     * If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented manifest file
     * in JSON lines format. This file contains the data you want to use for model training.
     * AugmentedManifestFile can only be used if the Channel's input mode is Pipe.
     * 
     * 
     * @param s3DataType
     *        If you choose S3Prefix, S3Uri identifies a key name prefix. Amazon SageMaker
     *        uses all objects that match the specified key name prefix for model training. 
     *        
     *        If you choose ManifestFile, S3Uri identifies an object that is a manifest file
     *        containing a list of object keys that you want Amazon SageMaker to use for model training.
     *        
     *        
     *        If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented manifest
     *        file in JSON lines format. This file contains the data you want to use for model training.
     *        AugmentedManifestFile can only be used if the Channel's input mode is Pipe.
     * @return Returns a reference to this object so that method calls can be chained together.
     * @see S3DataType
     */

    public S3DataSource withS3DataType(S3DataType s3DataType) {
        this.s3DataType = s3DataType.toString();
        return this;
    }

    /**
     * 

     * Depending on the value specified for the S3DataType, identifies either a key name prefix or a
     * manifest. For example:
     * 
     * 
     * 
     * 
     * A key name prefix might look like this: s3://bucketname/exampleprefix
     * 
     * 
     * 
     * 
     * A manifest might look like this: s3://bucketname/example.manifest
     * 
     * 
     * A manifest is an S3 object which is a JSON file consisting of an array of elements. The first element is a prefix
     * which is followed by one or more suffixes. SageMaker appends the suffix elements to the prefix to get a full set
     * of S3Uri. Note that the prefix must be a valid non-empty S3Uri that precludes users
     * from specifying a manifest whose individual S3Uri is sourced from different S3 buckets.
     * 
     * 
     * The following code example shows a valid manifest format:
     * 
     * 
     * [ {"prefix": "s3://customer_bucket/some/prefix/"},
     * 
     * 
     *  "relative/path/to/custdata-1",
     * 
     * 
     *  "relative/path/custdata-2",
     * 
     * 
     *  ...
     * 
     * 
     *  "relative/path/custdata-N"
     * 
     * 
     * ]
     * 
     * 
     * This JSON is equivalent to the following S3Uri list:
     * 
     * 
     * s3://customer_bucket/some/prefix/relative/path/to/custdata-1
     * 
     * 
     * s3://customer_bucket/some/prefix/relative/path/custdata-2
     * 
     * 
     * ...
     * 
     * 
     * s3://customer_bucket/some/prefix/relative/path/custdata-N
     * 
     * 
     * The complete set of S3Uri in this manifest is the input data for the channel for this data source.
     * The object that each S3Uri points to must be readable by the IAM role that Amazon SageMaker uses to
     * perform tasks on your behalf.
     * 
     * 
     * 
     * 
     * @param s3Uri
     *        Depending on the value specified for the S3DataType, identifies either a key name prefix or a
     *        manifest. For example: 
     *        
     *        
     *        
     *        A key name prefix might look like this: s3://bucketname/exampleprefix
     *        
     *        
     *        
     *        
     *        A manifest might look like this: s3://bucketname/example.manifest
     *        
     *        
     *        A manifest is an S3 object which is a JSON file consisting of an array of elements. The first element is a
     *        prefix which is followed by one or more suffixes. SageMaker appends the suffix elements to the prefix to
     *        get a full set of S3Uri. Note that the prefix must be a valid non-empty S3Uri
     *        that precludes users from specifying a manifest whose individual S3Uri is sourced from
     *        different S3 buckets.
     *        
     *        
     *        The following code example shows a valid manifest format:
     *        
     *        
     *        [ {"prefix": "s3://customer_bucket/some/prefix/"},
     *        
     *        
     *         "relative/path/to/custdata-1",
     *        
     *        
     *         "relative/path/custdata-2",
     *        
     *        
     *         ...
     *        
     *        
     *         "relative/path/custdata-N"
     *        
     *        
     *        ]
     *        
     *        
     *        This JSON is equivalent to the following S3Uri list:
     *        
     *        
     *        s3://customer_bucket/some/prefix/relative/path/to/custdata-1
     *        
     *        
     *        s3://customer_bucket/some/prefix/relative/path/custdata-2
     *        
     *        
     *        ...
     *        
     *        
     *        s3://customer_bucket/some/prefix/relative/path/custdata-N
     *        
     *        
     *        The complete set of S3Uri in this manifest is the input data for the channel for this data
     *        source. The object that each S3Uri points to must be readable by the IAM role that Amazon
     *        SageMaker uses to perform tasks on your behalf.
     *        
     *        
     */

    public void setS3Uri(String s3Uri) {
        this.s3Uri = s3Uri;
    }

    /**
     * 
     * Depending on the value specified for the S3DataType, identifies either a key name prefix or a
     * manifest. For example:
     * 
     * 
     * 
     * 
     * A key name prefix might look like this: s3://bucketname/exampleprefix
     * 
     * 
     * 
     * 
     * A manifest might look like this: s3://bucketname/example.manifest
     * 
     * 
     * A manifest is an S3 object which is a JSON file consisting of an array of elements. The first element is a prefix
     * which is followed by one or more suffixes. SageMaker appends the suffix elements to the prefix to get a full set
     * of S3Uri. Note that the prefix must be a valid non-empty S3Uri that precludes users
     * from specifying a manifest whose individual S3Uri is sourced from different S3 buckets.
     * 
     * 
     * The following code example shows a valid manifest format:
     * 
     * 
     * [ {"prefix": "s3://customer_bucket/some/prefix/"},
     * 
     * 
     *  "relative/path/to/custdata-1",
     * 
     * 
     *  "relative/path/custdata-2",
     * 
     * 
     *  ...
     * 
     * 
     *  "relative/path/custdata-N"
     * 
     * 
     * ]
     * 
     * 
     * This JSON is equivalent to the following S3Uri list:
     * 
     * 
     * s3://customer_bucket/some/prefix/relative/path/to/custdata-1
     * 
     * 
     * s3://customer_bucket/some/prefix/relative/path/custdata-2
     * 
     * 
     * ...
     * 
     * 
     * s3://customer_bucket/some/prefix/relative/path/custdata-N
     * 
     * 
     * The complete set of S3Uri in this manifest is the input data for the channel for this data source.
     * The object that each S3Uri points to must be readable by the IAM role that Amazon SageMaker uses to
     * perform tasks on your behalf.
     * 
     * 
     * 
     * 
     * @return Depending on the value specified for the S3DataType, identifies either a key name prefix or
     *         a manifest. For example: 
     *         
     *         
     *         
     *         A key name prefix might look like this: s3://bucketname/exampleprefix
     *         
     *         
     *         
     *         
     *         A manifest might look like this: s3://bucketname/example.manifest
     *         
     *         
     *         A manifest is an S3 object which is a JSON file consisting of an array of elements. The first element is
     *         a prefix which is followed by one or more suffixes. SageMaker appends the suffix elements to the prefix
     *         to get a full set of S3Uri. Note that the prefix must be a valid non-empty
     *         S3Uri that precludes users from specifying a manifest whose individual S3Uri is
     *         sourced from different S3 buckets.
     *         
     *         
     *         The following code example shows a valid manifest format:
     *         
     *         
     *         [ {"prefix": "s3://customer_bucket/some/prefix/"},
     *         
     *         
     *          "relative/path/to/custdata-1",
     *         
     *         
     *          "relative/path/custdata-2",
     *         
     *         
     *          ...
     *         
     *         
     *          "relative/path/custdata-N"
     *         
     *         
     *         ]
     *         
     *         
     *         This JSON is equivalent to the following S3Uri list:
     *         
     *         
     *         s3://customer_bucket/some/prefix/relative/path/to/custdata-1
     *         
     *         
     *         s3://customer_bucket/some/prefix/relative/path/custdata-2
     *         
     *         
     *         ...
     *         
     *         
     *         s3://customer_bucket/some/prefix/relative/path/custdata-N
     *         
     *         
     *         The complete set of S3Uri in this manifest is the input data for the channel for this data
     *         source. The object that each S3Uri points to must be readable by the IAM role that Amazon
     *         SageMaker uses to perform tasks on your behalf.
     *         
     *         
     */

    public String getS3Uri() {
        return this.s3Uri;
    }

    /**
     * 
     * Depending on the value specified for the S3DataType, identifies either a key name prefix or a
     * manifest. For example:
     * 
     * 
     * 
     * 
     * A key name prefix might look like this: s3://bucketname/exampleprefix
     * 
     * 
     * 
     * 
     * A manifest might look like this: s3://bucketname/example.manifest
     * 
     * 
     * A manifest is an S3 object which is a JSON file consisting of an array of elements. The first element is a prefix
     * which is followed by one or more suffixes. SageMaker appends the suffix elements to the prefix to get a full set
     * of S3Uri. Note that the prefix must be a valid non-empty S3Uri that precludes users
     * from specifying a manifest whose individual S3Uri is sourced from different S3 buckets.
     * 
     * 
     * The following code example shows a valid manifest format:
     * 
     * 
     * [ {"prefix": "s3://customer_bucket/some/prefix/"},
     * 
     * 
     *  "relative/path/to/custdata-1",
     * 
     * 
     *  "relative/path/custdata-2",
     * 
     * 
     *  ...
     * 
     * 
     *  "relative/path/custdata-N"
     * 
     * 
     * ]
     * 
     * 
     * This JSON is equivalent to the following S3Uri list:
     * 
     * 
     * s3://customer_bucket/some/prefix/relative/path/to/custdata-1
     * 
     * 
     * s3://customer_bucket/some/prefix/relative/path/custdata-2
     * 
     * 
     * ...
     * 
     * 
     * s3://customer_bucket/some/prefix/relative/path/custdata-N
     * 
     * 
     * The complete set of S3Uri in this manifest is the input data for the channel for this data source.
     * The object that each S3Uri points to must be readable by the IAM role that Amazon SageMaker uses to
     * perform tasks on your behalf.
     * 
     * 
     * 
     * 
     * @param s3Uri
     *        Depending on the value specified for the S3DataType, identifies either a key name prefix or a
     *        manifest. For example: 
     *        
     *        
     *        
     *        A key name prefix might look like this: s3://bucketname/exampleprefix
     *        
     *        
     *        
     *        
     *        A manifest might look like this: s3://bucketname/example.manifest
     *        
     *        
     *        A manifest is an S3 object which is a JSON file consisting of an array of elements. The first element is a
     *        prefix which is followed by one or more suffixes. SageMaker appends the suffix elements to the prefix to
     *        get a full set of S3Uri. Note that the prefix must be a valid non-empty S3Uri
     *        that precludes users from specifying a manifest whose individual S3Uri is sourced from
     *        different S3 buckets.
     *        
     *        
     *        The following code example shows a valid manifest format:
     *        
     *        
     *        [ {"prefix": "s3://customer_bucket/some/prefix/"},
     *        
     *        
     *         "relative/path/to/custdata-1",
     *        
     *        
     *         "relative/path/custdata-2",
     *        
     *        
     *         ...
     *        
     *        
     *         "relative/path/custdata-N"
     *        
     *        
     *        ]
     *        
     *        
     *        This JSON is equivalent to the following S3Uri list:
     *        
     *        
     *        s3://customer_bucket/some/prefix/relative/path/to/custdata-1
     *        
     *        
     *        s3://customer_bucket/some/prefix/relative/path/custdata-2
     *        
     *        
     *        ...
     *        
     *        
     *        s3://customer_bucket/some/prefix/relative/path/custdata-N
     *        
     *        
     *        The complete set of S3Uri in this manifest is the input data for the channel for this data
     *        source. The object that each S3Uri points to must be readable by the IAM role that Amazon
     *        SageMaker uses to perform tasks on your behalf.
     *        
     *        
     * @return Returns a reference to this object so that method calls can be chained together.
     */

    public S3DataSource withS3Uri(String s3Uri) {
        setS3Uri(s3Uri);
        return this;
    }

    /**
     * 
     * If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched for
     * model training, specify FullyReplicated.
     * 
     * 
     * If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched for model
     * training, specify ShardedByS3Key. If there are n ML compute instances launched for a training
     * job, each instance gets approximately 1/n of the number of S3 objects. In this case, model training on
     * each machine uses only the subset of training data.
     * 
     * 
     * Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't get
     * any data and you will pay for nodes that aren't getting any training data. This applies in both File and Pipe
     * modes. Keep this in mind when developing algorithms.
     * 
     * 
     * In distributed training, where you use multiple ML compute EC2 instances, you might choose
     * ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume (when
     * TrainingInputMode is set to File), this copies 1/n of the number of objects.
     * 
     * 
     * @param s3DataDistributionType
     *        If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched
     *        for model training, specify FullyReplicated. 
     *        
     *        If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched
     *        for model training, specify ShardedByS3Key. If there are n ML compute instances
     *        launched for a training job, each instance gets approximately 1/n of the number of S3 objects. In
     *        this case, model training on each machine uses only the subset of training data.
     *        
     *        
     *        Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't
     *        get any data and you will pay for nodes that aren't getting any training data. This applies in both File
     *        and Pipe modes. Keep this in mind when developing algorithms.
     *        
     *        
     *        In distributed training, where you use multiple ML compute EC2 instances, you might choose
     *        ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume
     *        (when TrainingInputMode is set to File), this copies 1/n of the number of
     *        objects.
     * @see S3DataDistribution
     */

    public void setS3DataDistributionType(String s3DataDistributionType) {
        this.s3DataDistributionType = s3DataDistributionType;
    }

    /**
     * 

     * If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched for
     * model training, specify FullyReplicated.
     * 
     * 
     * If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched for model
     * training, specify ShardedByS3Key. If there are n ML compute instances launched for a training
     * job, each instance gets approximately 1/n of the number of S3 objects. In this case, model training on
     * each machine uses only the subset of training data.
     * 
     * 
     * Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't get
     * any data and you will pay for nodes that aren't getting any training data. This applies in both File and Pipe
     * modes. Keep this in mind when developing algorithms.
     * 
     * 
     * In distributed training, where you use multiple ML compute EC2 instances, you might choose
     * ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume (when
     * TrainingInputMode is set to File), this copies 1/n of the number of objects.
     * 
     * 
     * @return If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched
     *         for model training, specify FullyReplicated. 
     *         
     *         If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched
     *         for model training, specify ShardedByS3Key. If there are n ML compute instances
     *         launched for a training job, each instance gets approximately 1/n of the number of S3 objects. In
     *         this case, model training on each machine uses only the subset of training data.
     *         
     *         
     *         Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes
     *         won't get any data and you will pay for nodes that aren't getting any training data. This applies in both
     *         File and Pipe modes. Keep this in mind when developing algorithms.
     *         
     *         
     *         In distributed training, where you use multiple ML compute EC2 instances, you might choose
     *         ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume
     *         (when TrainingInputMode is set to File), this copies 1/n of the number
     *         of objects.
     * @see S3DataDistribution
     */

    public String getS3DataDistributionType() {
        return this.s3DataDistributionType;
    }

    /**
     * 

     * If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched for
     * model training, specify FullyReplicated.
     * 
     * 
     * If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched for model
     * training, specify ShardedByS3Key. If there are n ML compute instances launched for a training
     * job, each instance gets approximately 1/n of the number of S3 objects. In this case, model training on
     * each machine uses only the subset of training data.
     * 
     * 
     * Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't get
     * any data and you will pay for nodes that aren't getting any training data. This applies in both File and Pipe
     * modes. Keep this in mind when developing algorithms.
     * 
     * 
     * In distributed training, where you use multiple ML compute EC2 instances, you might choose
     * ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume (when
     * TrainingInputMode is set to File), this copies 1/n of the number of objects.
     * 
     * 
     * @param s3DataDistributionType
     *        If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched
     *        for model training, specify FullyReplicated. 
     *        
     *        If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched
     *        for model training, specify ShardedByS3Key. If there are n ML compute instances
     *        launched for a training job, each instance gets approximately 1/n of the number of S3 objects. In
     *        this case, model training on each machine uses only the subset of training data.
     *        
     *        
     *        Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't
     *        get any data and you will pay for nodes that aren't getting any training data. This applies in both File
     *        and Pipe modes. Keep this in mind when developing algorithms.
     *        
     *        
     *        In distributed training, where you use multiple ML compute EC2 instances, you might choose
     *        ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume
     *        (when TrainingInputMode is set to File), this copies 1/n of the number of
     *        objects.
     * @return Returns a reference to this object so that method calls can be chained together.
     * @see S3DataDistribution
     */

    public S3DataSource withS3DataDistributionType(String s3DataDistributionType) {
        setS3DataDistributionType(s3DataDistributionType);
        return this;
    }

    /**
     * 

     * If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched for
     * model training, specify FullyReplicated.
     * 
     * 
     * If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched for model
     * training, specify ShardedByS3Key. If there are n ML compute instances launched for a training
     * job, each instance gets approximately 1/n of the number of S3 objects. In this case, model training on
     * each machine uses only the subset of training data.
     * 
     * 
     * Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't get
     * any data and you will pay for nodes that aren't getting any training data. This applies in both File and Pipe
     * modes. Keep this in mind when developing algorithms.
     * 
     * 
     * In distributed training, where you use multiple ML compute EC2 instances, you might choose
     * ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume (when
     * TrainingInputMode is set to File), this copies 1/n of the number of objects.
     * 
     * 
     * @param s3DataDistributionType
     *        If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched
     *        for model training, specify FullyReplicated. 
     *        
     *        If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched
     *        for model training, specify ShardedByS3Key. If there are n ML compute instances
     *        launched for a training job, each instance gets approximately 1/n of the number of S3 objects. In
     *        this case, model training on each machine uses only the subset of training data.
     *        
     *        
     *        Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't
     *        get any data and you will pay for nodes that aren't getting any training data. This applies in both File
     *        and Pipe modes. Keep this in mind when developing algorithms.
     *        
     *        
     *        In distributed training, where you use multiple ML compute EC2 instances, you might choose
     *        ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume
     *        (when TrainingInputMode is set to File), this copies 1/n of the number of
     *        objects.
     * @return Returns a reference to this object so that method calls can be chained together.
     * @see S3DataDistribution
     */

    public S3DataSource withS3DataDistributionType(S3DataDistribution s3DataDistributionType) {
        this.s3DataDistributionType = s3DataDistributionType.toString();
        return this;
    }

    /**
     * 

     * A list of one or more attribute names to use that are found in a specified augmented manifest file.
     * 
     * 
     * @return A list of one or more attribute names to use that are found in a specified augmented manifest file.
     */

    public java.util.List getAttributeNames() {
        return attributeNames;
    }

    /**
     * 
     * A list of one or more attribute names to use that are found in a specified augmented manifest file.
     * 
     * 
     * @param attributeNames
     *        A list of one or more attribute names to use that are found in a specified augmented manifest file.
     */

    public void setAttributeNames(java.util.Collection attributeNames) {
        if (attributeNames == null) {
            this.attributeNames = null;
            return;
        }

        this.attributeNames = new java.util.ArrayList(attributeNames);
    }

    /**
     * 
     * A list of one or more attribute names to use that are found in a specified augmented manifest file.
     * 
     * 
     * NOTE: This method appends the values to the existing list (if any). Use
     * {@link #setAttributeNames(java.util.Collection)} or {@link #withAttributeNames(java.util.Collection)} if you want
     * to override the existing values.
     * 
     * 
     * @param attributeNames
     *        A list of one or more attribute names to use that are found in a specified augmented manifest file.
     * @return Returns a reference to this object so that method calls can be chained together.
     */

    public S3DataSource withAttributeNames(String... attributeNames) {
        if (this.attributeNames == null) {
            setAttributeNames(new java.util.ArrayList(attributeNames.length));
        }
        for (String ele : attributeNames) {
            this.attributeNames.add(ele);
        }
        return this;
    }

    /**
     * 
     * A list of one or more attribute names to use that are found in a specified augmented manifest file.
     * 
     * 
     * @param attributeNames
     *        A list of one or more attribute names to use that are found in a specified augmented manifest file.
     * @return Returns a reference to this object so that method calls can be chained together.
     */

    public S3DataSource withAttributeNames(java.util.Collection attributeNames) {
        setAttributeNames(attributeNames);
        return this;
    }

    /**
     * Returns a string representation of this object. This is useful for testing and debugging. Sensitive data will be
     * redacted from this string using a placeholder value.
     *
     * @return A string representation of this object.
     *
     * @see java.lang.Object#toString()
     */
    @Override
    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append("{");
        if (getS3DataType() != null)
            sb.append("S3DataType: ").append(getS3DataType()).append(",");
        if (getS3Uri() != null)
            sb.append("S3Uri: ").append(getS3Uri()).append(",");
        if (getS3DataDistributionType() != null)
            sb.append("S3DataDistributionType: ").append(getS3DataDistributionType()).append(",");
        if (getAttributeNames() != null)
            sb.append("AttributeNames: ").append(getAttributeNames());
        sb.append("}");
        return sb.toString();
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj)
            return true;
        if (obj == null)
            return false;

        if (obj instanceof S3DataSource == false)
            return false;
        S3DataSource other = (S3DataSource) obj;
        if (other.getS3DataType() == null ^ this.getS3DataType() == null)
            return false;
        if (other.getS3DataType() != null && other.getS3DataType().equals(this.getS3DataType()) == false)
            return false;
        if (other.getS3Uri() == null ^ this.getS3Uri() == null)
            return false;
        if (other.getS3Uri() != null && other.getS3Uri().equals(this.getS3Uri()) == false)
            return false;
        if (other.getS3DataDistributionType() == null ^ this.getS3DataDistributionType() == null)
            return false;
        if (other.getS3DataDistributionType() != null && other.getS3DataDistributionType().equals(this.getS3DataDistributionType()) == false)
            return false;
        if (other.getAttributeNames() == null ^ this.getAttributeNames() == null)
            return false;
        if (other.getAttributeNames() != null && other.getAttributeNames().equals(this.getAttributeNames()) == false)
            return false;
        return true;
    }

    @Override
    public int hashCode() {
        final int prime = 31;
        int hashCode = 1;

        hashCode = prime * hashCode + ((getS3DataType() == null) ? 0 : getS3DataType().hashCode());
        hashCode = prime * hashCode + ((getS3Uri() == null) ? 0 : getS3Uri().hashCode());
        hashCode = prime * hashCode + ((getS3DataDistributionType() == null) ? 0 : getS3DataDistributionType().hashCode());
        hashCode = prime * hashCode + ((getAttributeNames() == null) ? 0 : getAttributeNames().hashCode());
        return hashCode;
    }

    @Override
    public S3DataSource clone() {
        try {
            return (S3DataSource) super.clone();
        } catch (CloneNotSupportedException e) {
            throw new IllegalStateException("Got a CloneNotSupportedException from Object.clone() " + "even though we're Cloneable!", e);
        }
    }

    @com.amazonaws.annotation.SdkInternalApi
    @Override
    public void marshall(ProtocolMarshaller protocolMarshaller) {
        com.amazonaws.services.sagemaker.model.transform.S3DataSourceMarshaller.getInstance().marshall(this, protocolMarshaller);
    }
}