
com.amazonaws.services.sagemaker.model.S3DataSource Maven / Gradle / Ivy
/*
* Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with
* the License. A copy of the License is located at
*
* http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
* and limitations under the License.
*/
package com.amazonaws.services.sagemaker.model;
import java.io.Serializable;
import javax.annotation.Generated;
import com.amazonaws.protocol.StructuredPojo;
import com.amazonaws.protocol.ProtocolMarshaller;
/**
*
* Describes the S3 data source.
*
*
* @see AWS API
* Documentation
*/
@Generated("com.amazonaws:aws-java-sdk-code-generator")
public class S3DataSource implements Serializable, Cloneable, StructuredPojo {
/**
*
* If you choose S3Prefix
, S3Uri
identifies a key name prefix. Amazon SageMaker uses all
* objects that match the specified key name prefix for model training.
*
*
* If you choose ManifestFile
, S3Uri
identifies an object that is a manifest file
* containing a list of object keys that you want Amazon SageMaker to use for model training.
*
*
* If you choose AugmentedManifestFile
, S3Uri identifies an object that is an augmented manifest file
* in JSON lines format. This file contains the data you want to use for model training.
* AugmentedManifestFile
can only be used if the Channel's input mode is Pipe
.
*
*/
private String s3DataType;
/**
*
* Depending on the value specified for the S3DataType
, identifies either a key name prefix or a
* manifest. For example:
*
*
* -
*
* A key name prefix might look like this: s3://bucketname/exampleprefix
*
*
* -
*
* A manifest might look like this: s3://bucketname/example.manifest
*
*
* A manifest is an S3 object which is a JSON file consisting of an array of elements. The first element is a prefix
* which is followed by one or more suffixes. SageMaker appends the suffix elements to the prefix to get a full set
* of S3Uri
. Note that the prefix must be a valid non-empty S3Uri
that precludes users
* from specifying a manifest whose individual S3Uri
is sourced from different S3 buckets.
*
*
* The following code example shows a valid manifest format:
*
*
* [ {"prefix": "s3://customer_bucket/some/prefix/"},
*
*
* "relative/path/to/custdata-1",
*
*
* "relative/path/custdata-2",
*
*
* ...
*
*
* "relative/path/custdata-N"
*
*
* ]
*
*
* This JSON is equivalent to the following S3Uri
list:
*
*
* s3://customer_bucket/some/prefix/relative/path/to/custdata-1
*
*
* s3://customer_bucket/some/prefix/relative/path/custdata-2
*
*
* ...
*
*
* s3://customer_bucket/some/prefix/relative/path/custdata-N
*
*
* The complete set of S3Uri
in this manifest is the input data for the channel for this data source.
* The object that each S3Uri
points to must be readable by the IAM role that Amazon SageMaker uses to
* perform tasks on your behalf.
*
*
*
*/
private String s3Uri;
/**
*
* If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched for
* model training, specify FullyReplicated
.
*
*
* If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched for model
* training, specify ShardedByS3Key
. If there are n ML compute instances launched for a training
* job, each instance gets approximately 1/n of the number of S3 objects. In this case, model training on
* each machine uses only the subset of training data.
*
*
* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't get
* any data and you will pay for nodes that aren't getting any training data. This applies in both File and Pipe
* modes. Keep this in mind when developing algorithms.
*
*
* In distributed training, where you use multiple ML compute EC2 instances, you might choose
* ShardedByS3Key
. If the algorithm requires copying training data to the ML storage volume (when
* TrainingInputMode
is set to File
), this copies 1/n of the number of objects.
*
*/
private String s3DataDistributionType;
/**
*
* A list of one or more attribute names to use that are found in a specified augmented manifest file.
*
*/
private java.util.List attributeNames;
/**
*
* If you choose S3Prefix
, S3Uri
identifies a key name prefix. Amazon SageMaker uses all
* objects that match the specified key name prefix for model training.
*
*
* If you choose ManifestFile
, S3Uri
identifies an object that is a manifest file
* containing a list of object keys that you want Amazon SageMaker to use for model training.
*
*
* If you choose AugmentedManifestFile
, S3Uri identifies an object that is an augmented manifest file
* in JSON lines format. This file contains the data you want to use for model training.
* AugmentedManifestFile
can only be used if the Channel's input mode is Pipe
.
*
*
* @param s3DataType
* If you choose S3Prefix
, S3Uri
identifies a key name prefix. Amazon SageMaker
* uses all objects that match the specified key name prefix for model training.
*
* If you choose ManifestFile
, S3Uri
identifies an object that is a manifest file
* containing a list of object keys that you want Amazon SageMaker to use for model training.
*
*
* If you choose AugmentedManifestFile
, S3Uri identifies an object that is an augmented manifest
* file in JSON lines format. This file contains the data you want to use for model training.
* AugmentedManifestFile
can only be used if the Channel's input mode is Pipe
.
* @see S3DataType
*/
public void setS3DataType(String s3DataType) {
this.s3DataType = s3DataType;
}
/**
*
* If you choose S3Prefix
, S3Uri
identifies a key name prefix. Amazon SageMaker uses all
* objects that match the specified key name prefix for model training.
*
*
* If you choose ManifestFile
, S3Uri
identifies an object that is a manifest file
* containing a list of object keys that you want Amazon SageMaker to use for model training.
*
*
* If you choose AugmentedManifestFile
, S3Uri identifies an object that is an augmented manifest file
* in JSON lines format. This file contains the data you want to use for model training.
* AugmentedManifestFile
can only be used if the Channel's input mode is Pipe
.
*
*
* @return If you choose S3Prefix
, S3Uri
identifies a key name prefix. Amazon SageMaker
* uses all objects that match the specified key name prefix for model training.
*
* If you choose ManifestFile
, S3Uri
identifies an object that is a manifest file
* containing a list of object keys that you want Amazon SageMaker to use for model training.
*
*
* If you choose AugmentedManifestFile
, S3Uri identifies an object that is an augmented
* manifest file in JSON lines format. This file contains the data you want to use for model training.
* AugmentedManifestFile
can only be used if the Channel's input mode is Pipe
.
* @see S3DataType
*/
public String getS3DataType() {
return this.s3DataType;
}
/**
*
* If you choose S3Prefix
, S3Uri
identifies a key name prefix. Amazon SageMaker uses all
* objects that match the specified key name prefix for model training.
*
*
* If you choose ManifestFile
, S3Uri
identifies an object that is a manifest file
* containing a list of object keys that you want Amazon SageMaker to use for model training.
*
*
* If you choose AugmentedManifestFile
, S3Uri identifies an object that is an augmented manifest file
* in JSON lines format. This file contains the data you want to use for model training.
* AugmentedManifestFile
can only be used if the Channel's input mode is Pipe
.
*
*
* @param s3DataType
* If you choose S3Prefix
, S3Uri
identifies a key name prefix. Amazon SageMaker
* uses all objects that match the specified key name prefix for model training.
*
* If you choose ManifestFile
, S3Uri
identifies an object that is a manifest file
* containing a list of object keys that you want Amazon SageMaker to use for model training.
*
*
* If you choose AugmentedManifestFile
, S3Uri identifies an object that is an augmented manifest
* file in JSON lines format. This file contains the data you want to use for model training.
* AugmentedManifestFile
can only be used if the Channel's input mode is Pipe
.
* @return Returns a reference to this object so that method calls can be chained together.
* @see S3DataType
*/
public S3DataSource withS3DataType(String s3DataType) {
setS3DataType(s3DataType);
return this;
}
/**
*
* If you choose S3Prefix
, S3Uri
identifies a key name prefix. Amazon SageMaker uses all
* objects that match the specified key name prefix for model training.
*
*
* If you choose ManifestFile
, S3Uri
identifies an object that is a manifest file
* containing a list of object keys that you want Amazon SageMaker to use for model training.
*
*
* If you choose AugmentedManifestFile
, S3Uri identifies an object that is an augmented manifest file
* in JSON lines format. This file contains the data you want to use for model training.
* AugmentedManifestFile
can only be used if the Channel's input mode is Pipe
.
*
*
* @param s3DataType
* If you choose S3Prefix
, S3Uri
identifies a key name prefix. Amazon SageMaker
* uses all objects that match the specified key name prefix for model training.
*
* If you choose ManifestFile
, S3Uri
identifies an object that is a manifest file
* containing a list of object keys that you want Amazon SageMaker to use for model training.
*
*
* If you choose AugmentedManifestFile
, S3Uri identifies an object that is an augmented manifest
* file in JSON lines format. This file contains the data you want to use for model training.
* AugmentedManifestFile
can only be used if the Channel's input mode is Pipe
.
* @return Returns a reference to this object so that method calls can be chained together.
* @see S3DataType
*/
public S3DataSource withS3DataType(S3DataType s3DataType) {
this.s3DataType = s3DataType.toString();
return this;
}
/**
*
* Depending on the value specified for the S3DataType
, identifies either a key name prefix or a
* manifest. For example:
*
*
* -
*
* A key name prefix might look like this: s3://bucketname/exampleprefix
*
*
* -
*
* A manifest might look like this: s3://bucketname/example.manifest
*
*
* A manifest is an S3 object which is a JSON file consisting of an array of elements. The first element is a prefix
* which is followed by one or more suffixes. SageMaker appends the suffix elements to the prefix to get a full set
* of S3Uri
. Note that the prefix must be a valid non-empty S3Uri
that precludes users
* from specifying a manifest whose individual S3Uri
is sourced from different S3 buckets.
*
*
* The following code example shows a valid manifest format:
*
*
* [ {"prefix": "s3://customer_bucket/some/prefix/"},
*
*
* "relative/path/to/custdata-1",
*
*
* "relative/path/custdata-2",
*
*
* ...
*
*
* "relative/path/custdata-N"
*
*
* ]
*
*
* This JSON is equivalent to the following S3Uri
list:
*
*
* s3://customer_bucket/some/prefix/relative/path/to/custdata-1
*
*
* s3://customer_bucket/some/prefix/relative/path/custdata-2
*
*
* ...
*
*
* s3://customer_bucket/some/prefix/relative/path/custdata-N
*
*
* The complete set of S3Uri
in this manifest is the input data for the channel for this data source.
* The object that each S3Uri
points to must be readable by the IAM role that Amazon SageMaker uses to
* perform tasks on your behalf.
*
*
*
*
* @param s3Uri
* Depending on the value specified for the S3DataType
, identifies either a key name prefix or a
* manifest. For example:
*
* -
*
* A key name prefix might look like this: s3://bucketname/exampleprefix
*
*
* -
*
* A manifest might look like this: s3://bucketname/example.manifest
*
*
* A manifest is an S3 object which is a JSON file consisting of an array of elements. The first element is a
* prefix which is followed by one or more suffixes. SageMaker appends the suffix elements to the prefix to
* get a full set of S3Uri
. Note that the prefix must be a valid non-empty S3Uri
* that precludes users from specifying a manifest whose individual S3Uri
is sourced from
* different S3 buckets.
*
*
* The following code example shows a valid manifest format:
*
*
* [ {"prefix": "s3://customer_bucket/some/prefix/"},
*
*
* "relative/path/to/custdata-1",
*
*
* "relative/path/custdata-2",
*
*
* ...
*
*
* "relative/path/custdata-N"
*
*
* ]
*
*
* This JSON is equivalent to the following S3Uri
list:
*
*
* s3://customer_bucket/some/prefix/relative/path/to/custdata-1
*
*
* s3://customer_bucket/some/prefix/relative/path/custdata-2
*
*
* ...
*
*
* s3://customer_bucket/some/prefix/relative/path/custdata-N
*
*
* The complete set of S3Uri
in this manifest is the input data for the channel for this data
* source. The object that each S3Uri
points to must be readable by the IAM role that Amazon
* SageMaker uses to perform tasks on your behalf.
*
*
*/
public void setS3Uri(String s3Uri) {
this.s3Uri = s3Uri;
}
/**
*
* Depending on the value specified for the S3DataType
, identifies either a key name prefix or a
* manifest. For example:
*
*
* -
*
* A key name prefix might look like this: s3://bucketname/exampleprefix
*
*
* -
*
* A manifest might look like this: s3://bucketname/example.manifest
*
*
* A manifest is an S3 object which is a JSON file consisting of an array of elements. The first element is a prefix
* which is followed by one or more suffixes. SageMaker appends the suffix elements to the prefix to get a full set
* of S3Uri
. Note that the prefix must be a valid non-empty S3Uri
that precludes users
* from specifying a manifest whose individual S3Uri
is sourced from different S3 buckets.
*
*
* The following code example shows a valid manifest format:
*
*
* [ {"prefix": "s3://customer_bucket/some/prefix/"},
*
*
* "relative/path/to/custdata-1",
*
*
* "relative/path/custdata-2",
*
*
* ...
*
*
* "relative/path/custdata-N"
*
*
* ]
*
*
* This JSON is equivalent to the following S3Uri
list:
*
*
* s3://customer_bucket/some/prefix/relative/path/to/custdata-1
*
*
* s3://customer_bucket/some/prefix/relative/path/custdata-2
*
*
* ...
*
*
* s3://customer_bucket/some/prefix/relative/path/custdata-N
*
*
* The complete set of S3Uri
in this manifest is the input data for the channel for this data source.
* The object that each S3Uri
points to must be readable by the IAM role that Amazon SageMaker uses to
* perform tasks on your behalf.
*
*
*
*
* @return Depending on the value specified for the S3DataType
, identifies either a key name prefix or
* a manifest. For example:
*
* -
*
* A key name prefix might look like this: s3://bucketname/exampleprefix
*
*
* -
*
* A manifest might look like this: s3://bucketname/example.manifest
*
*
* A manifest is an S3 object which is a JSON file consisting of an array of elements. The first element is
* a prefix which is followed by one or more suffixes. SageMaker appends the suffix elements to the prefix
* to get a full set of S3Uri
. Note that the prefix must be a valid non-empty
* S3Uri
that precludes users from specifying a manifest whose individual S3Uri
is
* sourced from different S3 buckets.
*
*
* The following code example shows a valid manifest format:
*
*
* [ {"prefix": "s3://customer_bucket/some/prefix/"},
*
*
* "relative/path/to/custdata-1",
*
*
* "relative/path/custdata-2",
*
*
* ...
*
*
* "relative/path/custdata-N"
*
*
* ]
*
*
* This JSON is equivalent to the following S3Uri
list:
*
*
* s3://customer_bucket/some/prefix/relative/path/to/custdata-1
*
*
* s3://customer_bucket/some/prefix/relative/path/custdata-2
*
*
* ...
*
*
* s3://customer_bucket/some/prefix/relative/path/custdata-N
*
*
* The complete set of S3Uri
in this manifest is the input data for the channel for this data
* source. The object that each S3Uri
points to must be readable by the IAM role that Amazon
* SageMaker uses to perform tasks on your behalf.
*
*
*/
public String getS3Uri() {
return this.s3Uri;
}
/**
*
* Depending on the value specified for the S3DataType
, identifies either a key name prefix or a
* manifest. For example:
*
*
* -
*
* A key name prefix might look like this: s3://bucketname/exampleprefix
*
*
* -
*
* A manifest might look like this: s3://bucketname/example.manifest
*
*
* A manifest is an S3 object which is a JSON file consisting of an array of elements. The first element is a prefix
* which is followed by one or more suffixes. SageMaker appends the suffix elements to the prefix to get a full set
* of S3Uri
. Note that the prefix must be a valid non-empty S3Uri
that precludes users
* from specifying a manifest whose individual S3Uri
is sourced from different S3 buckets.
*
*
* The following code example shows a valid manifest format:
*
*
* [ {"prefix": "s3://customer_bucket/some/prefix/"},
*
*
* "relative/path/to/custdata-1",
*
*
* "relative/path/custdata-2",
*
*
* ...
*
*
* "relative/path/custdata-N"
*
*
* ]
*
*
* This JSON is equivalent to the following S3Uri
list:
*
*
* s3://customer_bucket/some/prefix/relative/path/to/custdata-1
*
*
* s3://customer_bucket/some/prefix/relative/path/custdata-2
*
*
* ...
*
*
* s3://customer_bucket/some/prefix/relative/path/custdata-N
*
*
* The complete set of S3Uri
in this manifest is the input data for the channel for this data source.
* The object that each S3Uri
points to must be readable by the IAM role that Amazon SageMaker uses to
* perform tasks on your behalf.
*
*
*
*
* @param s3Uri
* Depending on the value specified for the S3DataType
, identifies either a key name prefix or a
* manifest. For example:
*
* -
*
* A key name prefix might look like this: s3://bucketname/exampleprefix
*
*
* -
*
* A manifest might look like this: s3://bucketname/example.manifest
*
*
* A manifest is an S3 object which is a JSON file consisting of an array of elements. The first element is a
* prefix which is followed by one or more suffixes. SageMaker appends the suffix elements to the prefix to
* get a full set of S3Uri
. Note that the prefix must be a valid non-empty S3Uri
* that precludes users from specifying a manifest whose individual S3Uri
is sourced from
* different S3 buckets.
*
*
* The following code example shows a valid manifest format:
*
*
* [ {"prefix": "s3://customer_bucket/some/prefix/"},
*
*
* "relative/path/to/custdata-1",
*
*
* "relative/path/custdata-2",
*
*
* ...
*
*
* "relative/path/custdata-N"
*
*
* ]
*
*
* This JSON is equivalent to the following S3Uri
list:
*
*
* s3://customer_bucket/some/prefix/relative/path/to/custdata-1
*
*
* s3://customer_bucket/some/prefix/relative/path/custdata-2
*
*
* ...
*
*
* s3://customer_bucket/some/prefix/relative/path/custdata-N
*
*
* The complete set of S3Uri
in this manifest is the input data for the channel for this data
* source. The object that each S3Uri
points to must be readable by the IAM role that Amazon
* SageMaker uses to perform tasks on your behalf.
*
*
* @return Returns a reference to this object so that method calls can be chained together.
*/
public S3DataSource withS3Uri(String s3Uri) {
setS3Uri(s3Uri);
return this;
}
/**
*
* If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched for
* model training, specify FullyReplicated
.
*
*
* If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched for model
* training, specify ShardedByS3Key
. If there are n ML compute instances launched for a training
* job, each instance gets approximately 1/n of the number of S3 objects. In this case, model training on
* each machine uses only the subset of training data.
*
*
* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't get
* any data and you will pay for nodes that aren't getting any training data. This applies in both File and Pipe
* modes. Keep this in mind when developing algorithms.
*
*
* In distributed training, where you use multiple ML compute EC2 instances, you might choose
* ShardedByS3Key
. If the algorithm requires copying training data to the ML storage volume (when
* TrainingInputMode
is set to File
), this copies 1/n of the number of objects.
*
*
* @param s3DataDistributionType
* If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched
* for model training, specify FullyReplicated
.
*
* If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched
* for model training, specify ShardedByS3Key
. If there are n ML compute instances
* launched for a training job, each instance gets approximately 1/n of the number of S3 objects. In
* this case, model training on each machine uses only the subset of training data.
*
*
* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't
* get any data and you will pay for nodes that aren't getting any training data. This applies in both File
* and Pipe modes. Keep this in mind when developing algorithms.
*
*
* In distributed training, where you use multiple ML compute EC2 instances, you might choose
* ShardedByS3Key
. If the algorithm requires copying training data to the ML storage volume
* (when TrainingInputMode
is set to File
), this copies 1/n of the number of
* objects.
* @see S3DataDistribution
*/
public void setS3DataDistributionType(String s3DataDistributionType) {
this.s3DataDistributionType = s3DataDistributionType;
}
/**
*
* If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched for
* model training, specify FullyReplicated
.
*
*
* If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched for model
* training, specify ShardedByS3Key
. If there are n ML compute instances launched for a training
* job, each instance gets approximately 1/n of the number of S3 objects. In this case, model training on
* each machine uses only the subset of training data.
*
*
* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't get
* any data and you will pay for nodes that aren't getting any training data. This applies in both File and Pipe
* modes. Keep this in mind when developing algorithms.
*
*
* In distributed training, where you use multiple ML compute EC2 instances, you might choose
* ShardedByS3Key
. If the algorithm requires copying training data to the ML storage volume (when
* TrainingInputMode
is set to File
), this copies 1/n of the number of objects.
*
*
* @return If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched
* for model training, specify FullyReplicated
.
*
* If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched
* for model training, specify ShardedByS3Key
. If there are n ML compute instances
* launched for a training job, each instance gets approximately 1/n of the number of S3 objects. In
* this case, model training on each machine uses only the subset of training data.
*
*
* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes
* won't get any data and you will pay for nodes that aren't getting any training data. This applies in both
* File and Pipe modes. Keep this in mind when developing algorithms.
*
*
* In distributed training, where you use multiple ML compute EC2 instances, you might choose
* ShardedByS3Key
. If the algorithm requires copying training data to the ML storage volume
* (when TrainingInputMode
is set to File
), this copies 1/n of the number
* of objects.
* @see S3DataDistribution
*/
public String getS3DataDistributionType() {
return this.s3DataDistributionType;
}
/**
*
* If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched for
* model training, specify FullyReplicated
.
*
*
* If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched for model
* training, specify ShardedByS3Key
. If there are n ML compute instances launched for a training
* job, each instance gets approximately 1/n of the number of S3 objects. In this case, model training on
* each machine uses only the subset of training data.
*
*
* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't get
* any data and you will pay for nodes that aren't getting any training data. This applies in both File and Pipe
* modes. Keep this in mind when developing algorithms.
*
*
* In distributed training, where you use multiple ML compute EC2 instances, you might choose
* ShardedByS3Key
. If the algorithm requires copying training data to the ML storage volume (when
* TrainingInputMode
is set to File
), this copies 1/n of the number of objects.
*
*
* @param s3DataDistributionType
* If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched
* for model training, specify FullyReplicated
.
*
* If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched
* for model training, specify ShardedByS3Key
. If there are n ML compute instances
* launched for a training job, each instance gets approximately 1/n of the number of S3 objects. In
* this case, model training on each machine uses only the subset of training data.
*
*
* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't
* get any data and you will pay for nodes that aren't getting any training data. This applies in both File
* and Pipe modes. Keep this in mind when developing algorithms.
*
*
* In distributed training, where you use multiple ML compute EC2 instances, you might choose
* ShardedByS3Key
. If the algorithm requires copying training data to the ML storage volume
* (when TrainingInputMode
is set to File
), this copies 1/n of the number of
* objects.
* @return Returns a reference to this object so that method calls can be chained together.
* @see S3DataDistribution
*/
public S3DataSource withS3DataDistributionType(String s3DataDistributionType) {
setS3DataDistributionType(s3DataDistributionType);
return this;
}
/**
*
* If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched for
* model training, specify FullyReplicated
.
*
*
* If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched for model
* training, specify ShardedByS3Key
. If there are n ML compute instances launched for a training
* job, each instance gets approximately 1/n of the number of S3 objects. In this case, model training on
* each machine uses only the subset of training data.
*
*
* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't get
* any data and you will pay for nodes that aren't getting any training data. This applies in both File and Pipe
* modes. Keep this in mind when developing algorithms.
*
*
* In distributed training, where you use multiple ML compute EC2 instances, you might choose
* ShardedByS3Key
. If the algorithm requires copying training data to the ML storage volume (when
* TrainingInputMode
is set to File
), this copies 1/n of the number of objects.
*
*
* @param s3DataDistributionType
* If you want Amazon SageMaker to replicate the entire dataset on each ML compute instance that is launched
* for model training, specify FullyReplicated
.
*
* If you want Amazon SageMaker to replicate a subset of data on each ML compute instance that is launched
* for model training, specify ShardedByS3Key
. If there are n ML compute instances
* launched for a training job, each instance gets approximately 1/n of the number of S3 objects. In
* this case, model training on each machine uses only the subset of training data.
*
*
* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't
* get any data and you will pay for nodes that aren't getting any training data. This applies in both File
* and Pipe modes. Keep this in mind when developing algorithms.
*
*
* In distributed training, where you use multiple ML compute EC2 instances, you might choose
* ShardedByS3Key
. If the algorithm requires copying training data to the ML storage volume
* (when TrainingInputMode
is set to File
), this copies 1/n of the number of
* objects.
* @return Returns a reference to this object so that method calls can be chained together.
* @see S3DataDistribution
*/
public S3DataSource withS3DataDistributionType(S3DataDistribution s3DataDistributionType) {
this.s3DataDistributionType = s3DataDistributionType.toString();
return this;
}
/**
*
* A list of one or more attribute names to use that are found in a specified augmented manifest file.
*
*
* @return A list of one or more attribute names to use that are found in a specified augmented manifest file.
*/
public java.util.List getAttributeNames() {
return attributeNames;
}
/**
*
* A list of one or more attribute names to use that are found in a specified augmented manifest file.
*
*
* @param attributeNames
* A list of one or more attribute names to use that are found in a specified augmented manifest file.
*/
public void setAttributeNames(java.util.Collection attributeNames) {
if (attributeNames == null) {
this.attributeNames = null;
return;
}
this.attributeNames = new java.util.ArrayList(attributeNames);
}
/**
*
* A list of one or more attribute names to use that are found in a specified augmented manifest file.
*
*
* NOTE: This method appends the values to the existing list (if any). Use
* {@link #setAttributeNames(java.util.Collection)} or {@link #withAttributeNames(java.util.Collection)} if you want
* to override the existing values.
*
*
* @param attributeNames
* A list of one or more attribute names to use that are found in a specified augmented manifest file.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public S3DataSource withAttributeNames(String... attributeNames) {
if (this.attributeNames == null) {
setAttributeNames(new java.util.ArrayList(attributeNames.length));
}
for (String ele : attributeNames) {
this.attributeNames.add(ele);
}
return this;
}
/**
*
* A list of one or more attribute names to use that are found in a specified augmented manifest file.
*
*
* @param attributeNames
* A list of one or more attribute names to use that are found in a specified augmented manifest file.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public S3DataSource withAttributeNames(java.util.Collection attributeNames) {
setAttributeNames(attributeNames);
return this;
}
/**
* Returns a string representation of this object. This is useful for testing and debugging. Sensitive data will be
* redacted from this string using a placeholder value.
*
* @return A string representation of this object.
*
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("{");
if (getS3DataType() != null)
sb.append("S3DataType: ").append(getS3DataType()).append(",");
if (getS3Uri() != null)
sb.append("S3Uri: ").append(getS3Uri()).append(",");
if (getS3DataDistributionType() != null)
sb.append("S3DataDistributionType: ").append(getS3DataDistributionType()).append(",");
if (getAttributeNames() != null)
sb.append("AttributeNames: ").append(getAttributeNames());
sb.append("}");
return sb.toString();
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (obj instanceof S3DataSource == false)
return false;
S3DataSource other = (S3DataSource) obj;
if (other.getS3DataType() == null ^ this.getS3DataType() == null)
return false;
if (other.getS3DataType() != null && other.getS3DataType().equals(this.getS3DataType()) == false)
return false;
if (other.getS3Uri() == null ^ this.getS3Uri() == null)
return false;
if (other.getS3Uri() != null && other.getS3Uri().equals(this.getS3Uri()) == false)
return false;
if (other.getS3DataDistributionType() == null ^ this.getS3DataDistributionType() == null)
return false;
if (other.getS3DataDistributionType() != null && other.getS3DataDistributionType().equals(this.getS3DataDistributionType()) == false)
return false;
if (other.getAttributeNames() == null ^ this.getAttributeNames() == null)
return false;
if (other.getAttributeNames() != null && other.getAttributeNames().equals(this.getAttributeNames()) == false)
return false;
return true;
}
@Override
public int hashCode() {
final int prime = 31;
int hashCode = 1;
hashCode = prime * hashCode + ((getS3DataType() == null) ? 0 : getS3DataType().hashCode());
hashCode = prime * hashCode + ((getS3Uri() == null) ? 0 : getS3Uri().hashCode());
hashCode = prime * hashCode + ((getS3DataDistributionType() == null) ? 0 : getS3DataDistributionType().hashCode());
hashCode = prime * hashCode + ((getAttributeNames() == null) ? 0 : getAttributeNames().hashCode());
return hashCode;
}
@Override
public S3DataSource clone() {
try {
return (S3DataSource) super.clone();
} catch (CloneNotSupportedException e) {
throw new IllegalStateException("Got a CloneNotSupportedException from Object.clone() " + "even though we're Cloneable!", e);
}
}
@com.amazonaws.annotation.SdkInternalApi
@Override
public void marshall(ProtocolMarshaller protocolMarshaller) {
com.amazonaws.services.sagemaker.model.transform.S3DataSourceMarshaller.getInstance().marshall(this, protocolMarshaller);
}
}