software.amazon.awssdk.services.machinelearning.model.RedshiftDataSpec Maven / Gradle / Ivy
/*
* Copyright 2014-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with
* the License. A copy of the License is located at
*
* http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
* and limitations under the License.
*/
package software.amazon.awssdk.services.machinelearning.model;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import java.util.function.Function;
import software.amazon.awssdk.annotations.Generated;
import software.amazon.awssdk.core.SdkField;
import software.amazon.awssdk.core.SdkPojo;
import software.amazon.awssdk.core.protocol.MarshallLocation;
import software.amazon.awssdk.core.protocol.MarshallingType;
import software.amazon.awssdk.core.traits.LocationTrait;
import software.amazon.awssdk.utils.ToString;
import software.amazon.awssdk.utils.builder.CopyableBuilder;
import software.amazon.awssdk.utils.builder.ToCopyableBuilder;
/**
*
* Describes the data specification of an Amazon Redshift DataSource
.
*
*/
@Generated("software.amazon.awssdk:codegen")
public final class RedshiftDataSpec implements SdkPojo, Serializable,
ToCopyableBuilder {
private static final SdkField DATABASE_INFORMATION_FIELD = SdkField
. builder(MarshallingType.SDK_POJO).getter(getter(RedshiftDataSpec::databaseInformation))
.setter(setter(Builder::databaseInformation)).constructor(RedshiftDatabase::builder)
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("DatabaseInformation").build())
.build();
private static final SdkField SELECT_SQL_QUERY_FIELD = SdkField. builder(MarshallingType.STRING)
.getter(getter(RedshiftDataSpec::selectSqlQuery)).setter(setter(Builder::selectSqlQuery))
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("SelectSqlQuery").build()).build();
private static final SdkField DATABASE_CREDENTIALS_FIELD = SdkField
. builder(MarshallingType.SDK_POJO)
.getter(getter(RedshiftDataSpec::databaseCredentials)).setter(setter(Builder::databaseCredentials))
.constructor(RedshiftDatabaseCredentials::builder)
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("DatabaseCredentials").build())
.build();
private static final SdkField S3_STAGING_LOCATION_FIELD = SdkField. builder(MarshallingType.STRING)
.getter(getter(RedshiftDataSpec::s3StagingLocation)).setter(setter(Builder::s3StagingLocation))
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("S3StagingLocation").build()).build();
private static final SdkField DATA_REARRANGEMENT_FIELD = SdkField. builder(MarshallingType.STRING)
.getter(getter(RedshiftDataSpec::dataRearrangement)).setter(setter(Builder::dataRearrangement))
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("DataRearrangement").build()).build();
private static final SdkField DATA_SCHEMA_FIELD = SdkField. builder(MarshallingType.STRING)
.getter(getter(RedshiftDataSpec::dataSchema)).setter(setter(Builder::dataSchema))
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("DataSchema").build()).build();
private static final SdkField DATA_SCHEMA_URI_FIELD = SdkField. builder(MarshallingType.STRING)
.getter(getter(RedshiftDataSpec::dataSchemaUri)).setter(setter(Builder::dataSchemaUri))
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("DataSchemaUri").build()).build();
private static final List> SDK_FIELDS = Collections.unmodifiableList(Arrays.asList(DATABASE_INFORMATION_FIELD,
SELECT_SQL_QUERY_FIELD, DATABASE_CREDENTIALS_FIELD, S3_STAGING_LOCATION_FIELD, DATA_REARRANGEMENT_FIELD,
DATA_SCHEMA_FIELD, DATA_SCHEMA_URI_FIELD));
private static final long serialVersionUID = 1L;
private final RedshiftDatabase databaseInformation;
private final String selectSqlQuery;
private final RedshiftDatabaseCredentials databaseCredentials;
private final String s3StagingLocation;
private final String dataRearrangement;
private final String dataSchema;
private final String dataSchemaUri;
private RedshiftDataSpec(BuilderImpl builder) {
this.databaseInformation = builder.databaseInformation;
this.selectSqlQuery = builder.selectSqlQuery;
this.databaseCredentials = builder.databaseCredentials;
this.s3StagingLocation = builder.s3StagingLocation;
this.dataRearrangement = builder.dataRearrangement;
this.dataSchema = builder.dataSchema;
this.dataSchemaUri = builder.dataSchemaUri;
}
/**
*
* Describes the DatabaseName
and ClusterIdentifier
for an Amazon Redshift
* DataSource
.
*
*
* @return Describes the DatabaseName
and ClusterIdentifier
for an Amazon Redshift
* DataSource
.
*/
public RedshiftDatabase databaseInformation() {
return databaseInformation;
}
/**
*
* Describes the SQL Query to execute on an Amazon Redshift database for an Amazon Redshift DataSource
.
*
*
* @return Describes the SQL Query to execute on an Amazon Redshift database for an Amazon Redshift
* DataSource
.
*/
public String selectSqlQuery() {
return selectSqlQuery;
}
/**
*
* Describes AWS Identity and Access Management (IAM) credentials that are used connect to the Amazon Redshift
* database.
*
*
* @return Describes AWS Identity and Access Management (IAM) credentials that are used connect to the Amazon
* Redshift database.
*/
public RedshiftDatabaseCredentials databaseCredentials() {
return databaseCredentials;
}
/**
*
* Describes an Amazon S3 location to store the result set of the SelectSqlQuery
query.
*
*
* @return Describes an Amazon S3 location to store the result set of the SelectSqlQuery
query.
*/
public String s3StagingLocation() {
return s3StagingLocation;
}
/**
*
* A JSON string that represents the splitting and rearrangement processing to be applied to a
* DataSource
. If the DataRearrangement
parameter is not provided, all of the input data
* is used to create the Datasource
.
*
*
* There are multiple parameters that control what data is used to create a datasource:
*
*
* -
*
* percentBegin
*
*
* Use percentBegin
to indicate the beginning of the range of the data used to create the Datasource.
* If you do not include percentBegin
and percentEnd
, Amazon ML includes all of the data
* when creating the datasource.
*
*
* -
*
* percentEnd
*
*
* Use percentEnd
to indicate the end of the range of the data used to create the Datasource. If you do
* not include percentBegin
and percentEnd
, Amazon ML includes all of the data when
* creating the datasource.
*
*
* -
*
* complement
*
*
* The complement
parameter instructs Amazon ML to use the data that is not included in the range of
* percentBegin
to percentEnd
to create a datasource. The complement
* parameter is useful if you need to create complementary datasources for training and evaluation. To create a
* complementary datasource, use the same values for percentBegin
and percentEnd
, along
* with the complement
parameter.
*
*
* For example, the following two datasources do not share any data, and can be used to train and evaluate a model.
* The first datasource has 25 percent of the data, and the second one has 75 percent of the data.
*
*
* Datasource for evaluation: {"splitting":{"percentBegin":0, "percentEnd":25}}
*
*
* Datasource for training: {"splitting":{"percentBegin":0, "percentEnd":25, "complement":"true"}}
*
*
* -
*
* strategy
*
*
* To change how Amazon ML splits the data for a datasource, use the strategy
parameter.
*
*
* The default value for the strategy
parameter is sequential
, meaning that Amazon ML
* takes all of the data records between the percentBegin
and percentEnd
parameters for
* the datasource, in the order that the records appear in the input data.
*
*
* The following two DataRearrangement
lines are examples of sequentially ordered training and
* evaluation datasources:
*
*
* Datasource for evaluation:
* {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"sequential"}}
*
*
* Datasource for training:
* {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"sequential", "complement":"true"}}
*
*
* To randomly split the input data into the proportions indicated by the percentBegin and percentEnd parameters,
* set the strategy
parameter to random
and provide a string that is used as the seed
* value for the random data splitting (for example, you can use the S3 path to your data as the random seed
* string). If you choose the random split strategy, Amazon ML assigns each row of data a pseudo-random number
* between 0 and 100, and then selects the rows that have an assigned number between percentBegin
and
* percentEnd
. Pseudo-random numbers are assigned using both the input seed string value and the byte
* offset as a seed, so changing the data results in a different split. Any existing ordering is preserved. The
* random splitting strategy ensures that variables in the training and evaluation data are distributed similarly.
* It is useful in the cases where the input data may have an implicit sort order, which would otherwise result in
* training and evaluation datasources containing non-similar data records.
*
*
* The following two DataRearrangement
lines are examples of non-sequentially ordered training and
* evaluation datasources:
*
*
* Datasource for evaluation:
* {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv"}}
*
*
* Datasource for training:
* {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv", "complement":"true"}}
*
*
*
*
* @return A JSON string that represents the splitting and rearrangement processing to be applied to a
* DataSource
. If the DataRearrangement
parameter is not provided, all of the
* input data is used to create the Datasource
.
*
* There are multiple parameters that control what data is used to create a datasource:
*
*
* -
*
* percentBegin
*
*
* Use percentBegin
to indicate the beginning of the range of the data used to create the
* Datasource. If you do not include percentBegin
and percentEnd
, Amazon ML
* includes all of the data when creating the datasource.
*
*
* -
*
* percentEnd
*
*
* Use percentEnd
to indicate the end of the range of the data used to create the Datasource.
* If you do not include percentBegin
and percentEnd
, Amazon ML includes all of
* the data when creating the datasource.
*
*
* -
*
* complement
*
*
* The complement
parameter instructs Amazon ML to use the data that is not included in the
* range of percentBegin
to percentEnd
to create a datasource. The
* complement
parameter is useful if you need to create complementary datasources for training
* and evaluation. To create a complementary datasource, use the same values for percentBegin
* and percentEnd
, along with the complement
parameter.
*
*
* For example, the following two datasources do not share any data, and can be used to train and evaluate a
* model. The first datasource has 25 percent of the data, and the second one has 75 percent of the data.
*
*
* Datasource for evaluation: {"splitting":{"percentBegin":0, "percentEnd":25}}
*
*
* Datasource for training:
* {"splitting":{"percentBegin":0, "percentEnd":25, "complement":"true"}}
*
*
* -
*
* strategy
*
*
* To change how Amazon ML splits the data for a datasource, use the strategy
parameter.
*
*
* The default value for the strategy
parameter is sequential
, meaning that Amazon
* ML takes all of the data records between the percentBegin
and percentEnd
* parameters for the datasource, in the order that the records appear in the input data.
*
*
* The following two DataRearrangement
lines are examples of sequentially ordered training and
* evaluation datasources:
*
*
* Datasource for evaluation:
* {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"sequential"}}
*
*
* Datasource for training:
* {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"sequential", "complement":"true"}}
*
*
* To randomly split the input data into the proportions indicated by the percentBegin and percentEnd
* parameters, set the strategy
parameter to random
and provide a string that is
* used as the seed value for the random data splitting (for example, you can use the S3 path to your data
* as the random seed string). If you choose the random split strategy, Amazon ML assigns each row of data a
* pseudo-random number between 0 and 100, and then selects the rows that have an assigned number between
* percentBegin
and percentEnd
. Pseudo-random numbers are assigned using both the
* input seed string value and the byte offset as a seed, so changing the data results in a different split.
* Any existing ordering is preserved. The random splitting strategy ensures that variables in the training
* and evaluation data are distributed similarly. It is useful in the cases where the input data may have an
* implicit sort order, which would otherwise result in training and evaluation datasources containing
* non-similar data records.
*
*
* The following two DataRearrangement
lines are examples of non-sequentially ordered training
* and evaluation datasources:
*
*
* Datasource for evaluation:
* {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv"}}
*
*
* Datasource for training:
* {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv", "complement":"true"}}
*
*
*/
public String dataRearrangement() {
return dataRearrangement;
}
/**
*
* A JSON string that represents the schema for an Amazon Redshift DataSource
. The
* DataSchema
defines the structure of the observation data in the data file(s) referenced in the
* DataSource
.
*
*
* A DataSchema
is not required if you specify a DataSchemaUri
.
*
*
* Define your DataSchema
as a series of key-value pairs. attributes
and
* excludedVariableNames
have an array of key-value pairs for their value. Use the following format to
* define your DataSchema
.
*
*
* { "version": "1.0",
*
*
* "recordAnnotationFieldName": "F1",
*
*
* "recordWeightFieldName": "F2",
*
*
* "targetFieldName": "F3",
*
*
* "dataFormat": "CSV",
*
*
* "dataFileContainsHeader": true,
*
*
* "attributes": [
*
*
* { "fieldName": "F1", "fieldType": "TEXT" }, { "fieldName": "F2", "fieldType": "NUMERIC" }, { "fieldName": "F3",
* "fieldType": "CATEGORICAL" }, { "fieldName": "F4", "fieldType": "NUMERIC" }, { "fieldName": "F5", "fieldType":
* "CATEGORICAL" }, { "fieldName": "F6", "fieldType": "TEXT" }, { "fieldName": "F7", "fieldType":
* "WEIGHTED_INT_SEQUENCE" }, { "fieldName": "F8", "fieldType": "WEIGHTED_STRING_SEQUENCE" } ],
*
*
* "excludedVariableNames": [ "F6" ] }
*
*
* @return A JSON string that represents the schema for an Amazon Redshift DataSource
. The
* DataSchema
defines the structure of the observation data in the data file(s) referenced in
* the DataSource
.
*
* A DataSchema
is not required if you specify a DataSchemaUri
.
*
*
* Define your DataSchema
as a series of key-value pairs. attributes
and
* excludedVariableNames
have an array of key-value pairs for their value. Use the following
* format to define your DataSchema
.
*
*
* { "version": "1.0",
*
*
* "recordAnnotationFieldName": "F1",
*
*
* "recordWeightFieldName": "F2",
*
*
* "targetFieldName": "F3",
*
*
* "dataFormat": "CSV",
*
*
* "dataFileContainsHeader": true,
*
*
* "attributes": [
*
*
* { "fieldName": "F1", "fieldType": "TEXT" }, { "fieldName": "F2", "fieldType": "NUMERIC" }, { "fieldName":
* "F3", "fieldType": "CATEGORICAL" }, { "fieldName": "F4", "fieldType": "NUMERIC" }, { "fieldName": "F5",
* "fieldType": "CATEGORICAL" }, { "fieldName": "F6", "fieldType": "TEXT" }, { "fieldName": "F7",
* "fieldType": "WEIGHTED_INT_SEQUENCE" }, { "fieldName": "F8", "fieldType": "WEIGHTED_STRING_SEQUENCE" } ],
*
*
* "excludedVariableNames": [ "F6" ] }
*/
public String dataSchema() {
return dataSchema;
}
/**
*
* Describes the schema location for an Amazon Redshift DataSource
.
*
*
* @return Describes the schema location for an Amazon Redshift DataSource
.
*/
public String dataSchemaUri() {
return dataSchemaUri;
}
@Override
public Builder toBuilder() {
return new BuilderImpl(this);
}
public static Builder builder() {
return new BuilderImpl();
}
public static Class extends Builder> serializableBuilderClass() {
return BuilderImpl.class;
}
@Override
public int hashCode() {
int hashCode = 1;
hashCode = 31 * hashCode + Objects.hashCode(databaseInformation());
hashCode = 31 * hashCode + Objects.hashCode(selectSqlQuery());
hashCode = 31 * hashCode + Objects.hashCode(databaseCredentials());
hashCode = 31 * hashCode + Objects.hashCode(s3StagingLocation());
hashCode = 31 * hashCode + Objects.hashCode(dataRearrangement());
hashCode = 31 * hashCode + Objects.hashCode(dataSchema());
hashCode = 31 * hashCode + Objects.hashCode(dataSchemaUri());
return hashCode;
}
@Override
public boolean equals(Object obj) {
return equalsBySdkFields(obj);
}
@Override
public boolean equalsBySdkFields(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (!(obj instanceof RedshiftDataSpec)) {
return false;
}
RedshiftDataSpec other = (RedshiftDataSpec) obj;
return Objects.equals(databaseInformation(), other.databaseInformation())
&& Objects.equals(selectSqlQuery(), other.selectSqlQuery())
&& Objects.equals(databaseCredentials(), other.databaseCredentials())
&& Objects.equals(s3StagingLocation(), other.s3StagingLocation())
&& Objects.equals(dataRearrangement(), other.dataRearrangement())
&& Objects.equals(dataSchema(), other.dataSchema()) && Objects.equals(dataSchemaUri(), other.dataSchemaUri());
}
/**
* Returns a string representation of this object. This is useful for testing and debugging. Sensitive data will be
* redacted from this string using a placeholder value.
*/
@Override
public String toString() {
return ToString.builder("RedshiftDataSpec").add("DatabaseInformation", databaseInformation())
.add("SelectSqlQuery", selectSqlQuery()).add("DatabaseCredentials", databaseCredentials())
.add("S3StagingLocation", s3StagingLocation()).add("DataRearrangement", dataRearrangement())
.add("DataSchema", dataSchema()).add("DataSchemaUri", dataSchemaUri()).build();
}
public Optional getValueForField(String fieldName, Class clazz) {
switch (fieldName) {
case "DatabaseInformation":
return Optional.ofNullable(clazz.cast(databaseInformation()));
case "SelectSqlQuery":
return Optional.ofNullable(clazz.cast(selectSqlQuery()));
case "DatabaseCredentials":
return Optional.ofNullable(clazz.cast(databaseCredentials()));
case "S3StagingLocation":
return Optional.ofNullable(clazz.cast(s3StagingLocation()));
case "DataRearrangement":
return Optional.ofNullable(clazz.cast(dataRearrangement()));
case "DataSchema":
return Optional.ofNullable(clazz.cast(dataSchema()));
case "DataSchemaUri":
return Optional.ofNullable(clazz.cast(dataSchemaUri()));
default:
return Optional.empty();
}
}
@Override
public List> sdkFields() {
return SDK_FIELDS;
}
private static Function