All Downloads are FREE. Search and download functionalities are using the official Maven repository.

software.amazon.awssdk.services.machinelearning.model.S3DataSpec Maven / Gradle / Ivy

Go to download

The AWS Java SDK for Amazon Machine Learning module holds the client classes that is used for communicating with Amazon Machine Learning Service

There is a newer version: 2.29.39
Show newest version
/*
 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with
 * the License. A copy of the License is located at
 * 
 * http://aws.amazon.com/apache2.0
 * 
 * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */

package software.amazon.awssdk.services.machinelearning.model;

import java.io.Serializable;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.BiConsumer;
import java.util.function.Function;
import software.amazon.awssdk.annotations.Generated;
import software.amazon.awssdk.core.SdkField;
import software.amazon.awssdk.core.SdkPojo;
import software.amazon.awssdk.core.protocol.MarshallLocation;
import software.amazon.awssdk.core.protocol.MarshallingType;
import software.amazon.awssdk.core.traits.LocationTrait;
import software.amazon.awssdk.utils.ToString;
import software.amazon.awssdk.utils.builder.CopyableBuilder;
import software.amazon.awssdk.utils.builder.ToCopyableBuilder;

/**
 * 

* Describes the data specification of a DataSource. *

*/ @Generated("software.amazon.awssdk:codegen") public final class S3DataSpec implements SdkPojo, Serializable, ToCopyableBuilder { private static final SdkField DATA_LOCATION_S3_FIELD = SdkField. builder(MarshallingType.STRING) .memberName("DataLocationS3").getter(getter(S3DataSpec::dataLocationS3)).setter(setter(Builder::dataLocationS3)) .traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("DataLocationS3").build()).build(); private static final SdkField DATA_REARRANGEMENT_FIELD = SdkField. builder(MarshallingType.STRING) .memberName("DataRearrangement").getter(getter(S3DataSpec::dataRearrangement)) .setter(setter(Builder::dataRearrangement)) .traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("DataRearrangement").build()).build(); private static final SdkField DATA_SCHEMA_FIELD = SdkField. builder(MarshallingType.STRING) .memberName("DataSchema").getter(getter(S3DataSpec::dataSchema)).setter(setter(Builder::dataSchema)) .traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("DataSchema").build()).build(); private static final SdkField DATA_SCHEMA_LOCATION_S3_FIELD = SdkField. builder(MarshallingType.STRING) .memberName("DataSchemaLocationS3").getter(getter(S3DataSpec::dataSchemaLocationS3)) .setter(setter(Builder::dataSchemaLocationS3)) .traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("DataSchemaLocationS3").build()) .build(); private static final List> SDK_FIELDS = Collections.unmodifiableList(Arrays.asList(DATA_LOCATION_S3_FIELD, DATA_REARRANGEMENT_FIELD, DATA_SCHEMA_FIELD, DATA_SCHEMA_LOCATION_S3_FIELD)); private static final long serialVersionUID = 1L; private final String dataLocationS3; private final String dataRearrangement; private final String dataSchema; private final String dataSchemaLocationS3; private S3DataSpec(BuilderImpl builder) { this.dataLocationS3 = builder.dataLocationS3; this.dataRearrangement = builder.dataRearrangement; this.dataSchema = builder.dataSchema; this.dataSchemaLocationS3 = builder.dataSchemaLocationS3; } /** *

* The location of the data file(s) used by a DataSource. The URI specifies a data file or an Amazon * Simple Storage Service (Amazon S3) directory or bucket containing data files. *

* * @return The location of the data file(s) used by a DataSource. The URI specifies a data file or an * Amazon Simple Storage Service (Amazon S3) directory or bucket containing data files. */ public final String dataLocationS3() { return dataLocationS3; } /** *

* A JSON string that represents the splitting and rearrangement processing to be applied to a * DataSource. If the DataRearrangement parameter is not provided, all of the input data * is used to create the Datasource. *

*

* There are multiple parameters that control what data is used to create a datasource: *

*
    *
  • *

    * percentBegin *

    *

    * Use percentBegin to indicate the beginning of the range of the data used to create the Datasource. * If you do not include percentBegin and percentEnd, Amazon ML includes all of the data * when creating the datasource. *

    *
  • *
  • *

    * percentEnd *

    *

    * Use percentEnd to indicate the end of the range of the data used to create the Datasource. If you do * not include percentBegin and percentEnd, Amazon ML includes all of the data when * creating the datasource. *

    *
  • *
  • *

    * complement *

    *

    * The complement parameter instructs Amazon ML to use the data that is not included in the range of * percentBegin to percentEnd to create a datasource. The complement * parameter is useful if you need to create complementary datasources for training and evaluation. To create a * complementary datasource, use the same values for percentBegin and percentEnd, along * with the complement parameter. *

    *

    * For example, the following two datasources do not share any data, and can be used to train and evaluate a model. * The first datasource has 25 percent of the data, and the second one has 75 percent of the data. *

    *

    * Datasource for evaluation: {"splitting":{"percentBegin":0, "percentEnd":25}} *

    *

    * Datasource for training: {"splitting":{"percentBegin":0, "percentEnd":25, "complement":"true"}} *

    *
  • *
  • *

    * strategy *

    *

    * To change how Amazon ML splits the data for a datasource, use the strategy parameter. *

    *

    * The default value for the strategy parameter is sequential, meaning that Amazon ML * takes all of the data records between the percentBegin and percentEnd parameters for * the datasource, in the order that the records appear in the input data. *

    *

    * The following two DataRearrangement lines are examples of sequentially ordered training and * evaluation datasources: *

    *

    * Datasource for evaluation: * {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"sequential"}} *

    *

    * Datasource for training: * {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"sequential", "complement":"true"}} *

    *

    * To randomly split the input data into the proportions indicated by the percentBegin and percentEnd parameters, * set the strategy parameter to random and provide a string that is used as the seed * value for the random data splitting (for example, you can use the S3 path to your data as the random seed * string). If you choose the random split strategy, Amazon ML assigns each row of data a pseudo-random number * between 0 and 100, and then selects the rows that have an assigned number between percentBegin and * percentEnd. Pseudo-random numbers are assigned using both the input seed string value and the byte * offset as a seed, so changing the data results in a different split. Any existing ordering is preserved. The * random splitting strategy ensures that variables in the training and evaluation data are distributed similarly. * It is useful in the cases where the input data may have an implicit sort order, which would otherwise result in * training and evaluation datasources containing non-similar data records. *

    *

    * The following two DataRearrangement lines are examples of non-sequentially ordered training and * evaluation datasources: *

    *

    * Datasource for evaluation: * {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv"}} *

    *

    * Datasource for training: * {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv", "complement":"true"}} *

    *
  • *
* * @return A JSON string that represents the splitting and rearrangement processing to be applied to a * DataSource. If the DataRearrangement parameter is not provided, all of the * input data is used to create the Datasource.

*

* There are multiple parameters that control what data is used to create a datasource: *

*
    *
  • *

    * percentBegin *

    *

    * Use percentBegin to indicate the beginning of the range of the data used to create the * Datasource. If you do not include percentBegin and percentEnd, Amazon ML * includes all of the data when creating the datasource. *

    *
  • *
  • *

    * percentEnd *

    *

    * Use percentEnd to indicate the end of the range of the data used to create the Datasource. * If you do not include percentBegin and percentEnd, Amazon ML includes all of * the data when creating the datasource. *

    *
  • *
  • *

    * complement *

    *

    * The complement parameter instructs Amazon ML to use the data that is not included in the * range of percentBegin to percentEnd to create a datasource. The * complement parameter is useful if you need to create complementary datasources for training * and evaluation. To create a complementary datasource, use the same values for percentBegin * and percentEnd, along with the complement parameter. *

    *

    * For example, the following two datasources do not share any data, and can be used to train and evaluate a * model. The first datasource has 25 percent of the data, and the second one has 75 percent of the data. *

    *

    * Datasource for evaluation: {"splitting":{"percentBegin":0, "percentEnd":25}} *

    *

    * Datasource for training: * {"splitting":{"percentBegin":0, "percentEnd":25, "complement":"true"}} *

    *
  • *
  • *

    * strategy *

    *

    * To change how Amazon ML splits the data for a datasource, use the strategy parameter. *

    *

    * The default value for the strategy parameter is sequential, meaning that Amazon * ML takes all of the data records between the percentBegin and percentEnd * parameters for the datasource, in the order that the records appear in the input data. *

    *

    * The following two DataRearrangement lines are examples of sequentially ordered training and * evaluation datasources: *

    *

    * Datasource for evaluation: * {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"sequential"}} *

    *

    * Datasource for training: * {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"sequential", "complement":"true"}} *

    *

    * To randomly split the input data into the proportions indicated by the percentBegin and percentEnd * parameters, set the strategy parameter to random and provide a string that is * used as the seed value for the random data splitting (for example, you can use the S3 path to your data * as the random seed string). If you choose the random split strategy, Amazon ML assigns each row of data a * pseudo-random number between 0 and 100, and then selects the rows that have an assigned number between * percentBegin and percentEnd. Pseudo-random numbers are assigned using both the * input seed string value and the byte offset as a seed, so changing the data results in a different split. * Any existing ordering is preserved. The random splitting strategy ensures that variables in the training * and evaluation data are distributed similarly. It is useful in the cases where the input data may have an * implicit sort order, which would otherwise result in training and evaluation datasources containing * non-similar data records. *

    *

    * The following two DataRearrangement lines are examples of non-sequentially ordered training * and evaluation datasources: *

    *

    * Datasource for evaluation: * {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv"}} *

    *

    * Datasource for training: * {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv", "complement":"true"}} *

    *
  • */ public final String dataRearrangement() { return dataRearrangement; } /** *

    * A JSON string that represents the schema for an Amazon S3 DataSource. The DataSchema * defines the structure of the observation data in the data file(s) referenced in the DataSource. *

    *

    * You must provide either the DataSchema or the DataSchemaLocationS3. *

    *

    * Define your DataSchema as a series of key-value pairs. attributes and * excludedVariableNames have an array of key-value pairs for their value. Use the following format to * define your DataSchema. *

    *

    * { "version": "1.0", *

    *

    * "recordAnnotationFieldName": "F1", *

    *

    * "recordWeightFieldName": "F2", *

    *

    * "targetFieldName": "F3", *

    *

    * "dataFormat": "CSV", *

    *

    * "dataFileContainsHeader": true, *

    *

    * "attributes": [ *

    *

    * { "fieldName": "F1", "fieldType": "TEXT" }, { "fieldName": "F2", "fieldType": "NUMERIC" }, { "fieldName": "F3", * "fieldType": "CATEGORICAL" }, { "fieldName": "F4", "fieldType": "NUMERIC" }, { "fieldName": "F5", "fieldType": * "CATEGORICAL" }, { "fieldName": "F6", "fieldType": "TEXT" }, { "fieldName": "F7", "fieldType": * "WEIGHTED_INT_SEQUENCE" }, { "fieldName": "F8", "fieldType": "WEIGHTED_STRING_SEQUENCE" } ], *

    *

    * "excludedVariableNames": [ "F6" ] } *

    * * @return A JSON string that represents the schema for an Amazon S3 DataSource. The * DataSchema defines the structure of the observation data in the data file(s) referenced in * the DataSource.

    *

    * You must provide either the DataSchema or the DataSchemaLocationS3. *

    *

    * Define your DataSchema as a series of key-value pairs. attributes and * excludedVariableNames have an array of key-value pairs for their value. Use the following * format to define your DataSchema. *

    *

    * { "version": "1.0", *

    *

    * "recordAnnotationFieldName": "F1", *

    *

    * "recordWeightFieldName": "F2", *

    *

    * "targetFieldName": "F3", *

    *

    * "dataFormat": "CSV", *

    *

    * "dataFileContainsHeader": true, *

    *

    * "attributes": [ *

    *

    * { "fieldName": "F1", "fieldType": "TEXT" }, { "fieldName": "F2", "fieldType": "NUMERIC" }, { "fieldName": * "F3", "fieldType": "CATEGORICAL" }, { "fieldName": "F4", "fieldType": "NUMERIC" }, { "fieldName": "F5", * "fieldType": "CATEGORICAL" }, { "fieldName": "F6", "fieldType": "TEXT" }, { "fieldName": "F7", * "fieldType": "WEIGHTED_INT_SEQUENCE" }, { "fieldName": "F8", "fieldType": "WEIGHTED_STRING_SEQUENCE" } ], *

    *

    * "excludedVariableNames": [ "F6" ] } */ public final String dataSchema() { return dataSchema; } /** *

    * Describes the schema location in Amazon S3. You must provide either the DataSchema or the * DataSchemaLocationS3. *

    * * @return Describes the schema location in Amazon S3. You must provide either the DataSchema or the * DataSchemaLocationS3. */ public final String dataSchemaLocationS3() { return dataSchemaLocationS3; } @Override public Builder toBuilder() { return new BuilderImpl(this); } public static Builder builder() { return new BuilderImpl(); } public static Class serializableBuilderClass() { return BuilderImpl.class; } @Override public final int hashCode() { int hashCode = 1; hashCode = 31 * hashCode + Objects.hashCode(dataLocationS3()); hashCode = 31 * hashCode + Objects.hashCode(dataRearrangement()); hashCode = 31 * hashCode + Objects.hashCode(dataSchema()); hashCode = 31 * hashCode + Objects.hashCode(dataSchemaLocationS3()); return hashCode; } @Override public final boolean equals(Object obj) { return equalsBySdkFields(obj); } @Override public final boolean equalsBySdkFields(Object obj) { if (this == obj) { return true; } if (obj == null) { return false; } if (!(obj instanceof S3DataSpec)) { return false; } S3DataSpec other = (S3DataSpec) obj; return Objects.equals(dataLocationS3(), other.dataLocationS3()) && Objects.equals(dataRearrangement(), other.dataRearrangement()) && Objects.equals(dataSchema(), other.dataSchema()) && Objects.equals(dataSchemaLocationS3(), other.dataSchemaLocationS3()); } /** * Returns a string representation of this object. This is useful for testing and debugging. Sensitive data will be * redacted from this string using a placeholder value. */ @Override public final String toString() { return ToString.builder("S3DataSpec").add("DataLocationS3", dataLocationS3()) .add("DataRearrangement", dataRearrangement()).add("DataSchema", dataSchema()) .add("DataSchemaLocationS3", dataSchemaLocationS3()).build(); } public final Optional getValueForField(String fieldName, Class clazz) { switch (fieldName) { case "DataLocationS3": return Optional.ofNullable(clazz.cast(dataLocationS3())); case "DataRearrangement": return Optional.ofNullable(clazz.cast(dataRearrangement())); case "DataSchema": return Optional.ofNullable(clazz.cast(dataSchema())); case "DataSchemaLocationS3": return Optional.ofNullable(clazz.cast(dataSchemaLocationS3())); default: return Optional.empty(); } } @Override public final List> sdkFields() { return SDK_FIELDS; } private static Function getter(Function g) { return obj -> g.apply((S3DataSpec) obj); } private static BiConsumer setter(BiConsumer s) { return (obj, val) -> s.accept((Builder) obj, val); } public interface Builder extends SdkPojo, CopyableBuilder { /** *

    * The location of the data file(s) used by a DataSource. The URI specifies a data file or an * Amazon Simple Storage Service (Amazon S3) directory or bucket containing data files. *

    * * @param dataLocationS3 * The location of the data file(s) used by a DataSource. The URI specifies a data file or * an Amazon Simple Storage Service (Amazon S3) directory or bucket containing data files. * @return Returns a reference to this object so that method calls can be chained together. */ Builder dataLocationS3(String dataLocationS3); /** *

    * A JSON string that represents the splitting and rearrangement processing to be applied to a * DataSource. If the DataRearrangement parameter is not provided, all of the input * data is used to create the Datasource. *

    *

    * There are multiple parameters that control what data is used to create a datasource: *

    *
      *
    • *

      * percentBegin *

      *

      * Use percentBegin to indicate the beginning of the range of the data used to create the * Datasource. If you do not include percentBegin and percentEnd, Amazon ML includes * all of the data when creating the datasource. *

      *
    • *
    • *

      * percentEnd *

      *

      * Use percentEnd to indicate the end of the range of the data used to create the Datasource. If * you do not include percentBegin and percentEnd, Amazon ML includes all of the data * when creating the datasource. *

      *
    • *
    • *

      * complement *

      *

      * The complement parameter instructs Amazon ML to use the data that is not included in the range * of percentBegin to percentEnd to create a datasource. The complement * parameter is useful if you need to create complementary datasources for training and evaluation. To create a * complementary datasource, use the same values for percentBegin and percentEnd, * along with the complement parameter. *

      *

      * For example, the following two datasources do not share any data, and can be used to train and evaluate a * model. The first datasource has 25 percent of the data, and the second one has 75 percent of the data. *

      *

      * Datasource for evaluation: {"splitting":{"percentBegin":0, "percentEnd":25}} *

      *

      * Datasource for training: {"splitting":{"percentBegin":0, "percentEnd":25, "complement":"true"}} *

      *
    • *
    • *

      * strategy *

      *

      * To change how Amazon ML splits the data for a datasource, use the strategy parameter. *

      *

      * The default value for the strategy parameter is sequential, meaning that Amazon ML * takes all of the data records between the percentBegin and percentEnd parameters * for the datasource, in the order that the records appear in the input data. *

      *

      * The following two DataRearrangement lines are examples of sequentially ordered training and * evaluation datasources: *

      *

      * Datasource for evaluation: * {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"sequential"}} *

      *

      * Datasource for training: * {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"sequential", "complement":"true"}} *

      *

      * To randomly split the input data into the proportions indicated by the percentBegin and percentEnd * parameters, set the strategy parameter to random and provide a string that is used * as the seed value for the random data splitting (for example, you can use the S3 path to your data as the * random seed string). If you choose the random split strategy, Amazon ML assigns each row of data a * pseudo-random number between 0 and 100, and then selects the rows that have an assigned number between * percentBegin and percentEnd. Pseudo-random numbers are assigned using both the * input seed string value and the byte offset as a seed, so changing the data results in a different split. Any * existing ordering is preserved. The random splitting strategy ensures that variables in the training and * evaluation data are distributed similarly. It is useful in the cases where the input data may have an * implicit sort order, which would otherwise result in training and evaluation datasources containing * non-similar data records. *

      *

      * The following two DataRearrangement lines are examples of non-sequentially ordered training and * evaluation datasources: *

      *

      * Datasource for evaluation: * {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv"}} *

      *

      * Datasource for training: * {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv", "complement":"true"}} *

      *
    • *
    * * @param dataRearrangement * A JSON string that represents the splitting and rearrangement processing to be applied to a * DataSource. If the DataRearrangement parameter is not provided, all of the * input data is used to create the Datasource.

    *

    * There are multiple parameters that control what data is used to create a datasource: *

    *
      *
    • *

      * percentBegin *

      *

      * Use percentBegin to indicate the beginning of the range of the data used to create the * Datasource. If you do not include percentBegin and percentEnd, Amazon ML * includes all of the data when creating the datasource. *

      *
    • *
    • *

      * percentEnd *

      *

      * Use percentEnd to indicate the end of the range of the data used to create the * Datasource. If you do not include percentBegin and percentEnd, Amazon ML * includes all of the data when creating the datasource. *

      *
    • *
    • *

      * complement *

      *

      * The complement parameter instructs Amazon ML to use the data that is not included in the * range of percentBegin to percentEnd to create a datasource. The * complement parameter is useful if you need to create complementary datasources for * training and evaluation. To create a complementary datasource, use the same values for * percentBegin and percentEnd, along with the complement * parameter. *

      *

      * For example, the following two datasources do not share any data, and can be used to train and * evaluate a model. The first datasource has 25 percent of the data, and the second one has 75 percent * of the data. *

      *

      * Datasource for evaluation: {"splitting":{"percentBegin":0, "percentEnd":25}} *

      *

      * Datasource for training: * {"splitting":{"percentBegin":0, "percentEnd":25, "complement":"true"}} *

      *
    • *
    • *

      * strategy *

      *

      * To change how Amazon ML splits the data for a datasource, use the strategy parameter. *

      *

      * The default value for the strategy parameter is sequential, meaning that * Amazon ML takes all of the data records between the percentBegin and * percentEnd parameters for the datasource, in the order that the records appear in the * input data. *

      *

      * The following two DataRearrangement lines are examples of sequentially ordered training * and evaluation datasources: *

      *

      * Datasource for evaluation: * {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"sequential"}} *

      *

      * Datasource for training: * {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"sequential", "complement":"true"}} *

      *

      * To randomly split the input data into the proportions indicated by the percentBegin and percentEnd * parameters, set the strategy parameter to random and provide a string that * is used as the seed value for the random data splitting (for example, you can use the S3 path to your * data as the random seed string). If you choose the random split strategy, Amazon ML assigns each row * of data a pseudo-random number between 0 and 100, and then selects the rows that have an assigned * number between percentBegin and percentEnd. Pseudo-random numbers are * assigned using both the input seed string value and the byte offset as a seed, so changing the data * results in a different split. Any existing ordering is preserved. The random splitting strategy * ensures that variables in the training and evaluation data are distributed similarly. It is useful in * the cases where the input data may have an implicit sort order, which would otherwise result in * training and evaluation datasources containing non-similar data records. *

      *

      * The following two DataRearrangement lines are examples of non-sequentially ordered * training and evaluation datasources: *

      *

      * Datasource for evaluation: * {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv"}} *

      *

      * Datasource for training: * {"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv", "complement":"true"}} *

      *
    • * @return Returns a reference to this object so that method calls can be chained together. */ Builder dataRearrangement(String dataRearrangement); /** *

      * A JSON string that represents the schema for an Amazon S3 DataSource. The * DataSchema defines the structure of the observation data in the data file(s) referenced in the * DataSource. *

      *

      * You must provide either the DataSchema or the DataSchemaLocationS3. *

      *

      * Define your DataSchema as a series of key-value pairs. attributes and * excludedVariableNames have an array of key-value pairs for their value. Use the following format * to define your DataSchema. *

      *

      * { "version": "1.0", *

      *

      * "recordAnnotationFieldName": "F1", *

      *

      * "recordWeightFieldName": "F2", *

      *

      * "targetFieldName": "F3", *

      *

      * "dataFormat": "CSV", *

      *

      * "dataFileContainsHeader": true, *

      *

      * "attributes": [ *

      *

      * { "fieldName": "F1", "fieldType": "TEXT" }, { "fieldName": "F2", "fieldType": "NUMERIC" }, { "fieldName": * "F3", "fieldType": "CATEGORICAL" }, { "fieldName": "F4", "fieldType": "NUMERIC" }, { "fieldName": "F5", * "fieldType": "CATEGORICAL" }, { "fieldName": "F6", "fieldType": "TEXT" }, { "fieldName": "F7", "fieldType": * "WEIGHTED_INT_SEQUENCE" }, { "fieldName": "F8", "fieldType": "WEIGHTED_STRING_SEQUENCE" } ], *

      *

      * "excludedVariableNames": [ "F6" ] } *

      * * @param dataSchema * A JSON string that represents the schema for an Amazon S3 DataSource. The * DataSchema defines the structure of the observation data in the data file(s) referenced * in the DataSource.

      *

      * You must provide either the DataSchema or the DataSchemaLocationS3. *

      *

      * Define your DataSchema as a series of key-value pairs. attributes and * excludedVariableNames have an array of key-value pairs for their value. Use the following * format to define your DataSchema. *

      *

      * { "version": "1.0", *

      *

      * "recordAnnotationFieldName": "F1", *

      *

      * "recordWeightFieldName": "F2", *

      *

      * "targetFieldName": "F3", *

      *

      * "dataFormat": "CSV", *

      *

      * "dataFileContainsHeader": true, *

      *

      * "attributes": [ *

      *

      * { "fieldName": "F1", "fieldType": "TEXT" }, { "fieldName": "F2", "fieldType": "NUMERIC" }, { * "fieldName": "F3", "fieldType": "CATEGORICAL" }, { "fieldName": "F4", "fieldType": "NUMERIC" }, { * "fieldName": "F5", "fieldType": "CATEGORICAL" }, { "fieldName": "F6", "fieldType": "TEXT" }, { * "fieldName": "F7", "fieldType": "WEIGHTED_INT_SEQUENCE" }, { "fieldName": "F8", "fieldType": * "WEIGHTED_STRING_SEQUENCE" } ], *

      *

      * "excludedVariableNames": [ "F6" ] } * @return Returns a reference to this object so that method calls can be chained together. */ Builder dataSchema(String dataSchema); /** *

      * Describes the schema location in Amazon S3. You must provide either the DataSchema or the * DataSchemaLocationS3. *

      * * @param dataSchemaLocationS3 * Describes the schema location in Amazon S3. You must provide either the DataSchema or the * DataSchemaLocationS3. * @return Returns a reference to this object so that method calls can be chained together. */ Builder dataSchemaLocationS3(String dataSchemaLocationS3); } static final class BuilderImpl implements Builder { private String dataLocationS3; private String dataRearrangement; private String dataSchema; private String dataSchemaLocationS3; private BuilderImpl() { } private BuilderImpl(S3DataSpec model) { dataLocationS3(model.dataLocationS3); dataRearrangement(model.dataRearrangement); dataSchema(model.dataSchema); dataSchemaLocationS3(model.dataSchemaLocationS3); } public final String getDataLocationS3() { return dataLocationS3; } public final void setDataLocationS3(String dataLocationS3) { this.dataLocationS3 = dataLocationS3; } @Override public final Builder dataLocationS3(String dataLocationS3) { this.dataLocationS3 = dataLocationS3; return this; } public final String getDataRearrangement() { return dataRearrangement; } public final void setDataRearrangement(String dataRearrangement) { this.dataRearrangement = dataRearrangement; } @Override public final Builder dataRearrangement(String dataRearrangement) { this.dataRearrangement = dataRearrangement; return this; } public final String getDataSchema() { return dataSchema; } public final void setDataSchema(String dataSchema) { this.dataSchema = dataSchema; } @Override public final Builder dataSchema(String dataSchema) { this.dataSchema = dataSchema; return this; } public final String getDataSchemaLocationS3() { return dataSchemaLocationS3; } public final void setDataSchemaLocationS3(String dataSchemaLocationS3) { this.dataSchemaLocationS3 = dataSchemaLocationS3; } @Override public final Builder dataSchemaLocationS3(String dataSchemaLocationS3) { this.dataSchemaLocationS3 = dataSchemaLocationS3; return this; } @Override public S3DataSpec build() { return new S3DataSpec(this); } @Override public List> sdkFields() { return SDK_FIELDS; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy