All Downloads are FREE. Search and download functionalities are using the official Maven repository.

software.amazon.awssdk.services.sagemaker.model.TabularJobConfig Maven / Gradle / Ivy

Go to download

The AWS Java SDK for Amazon SageMaker module holds the client classes that are used for communicating with Amazon SageMaker Service

There is a newer version: 2.29.39
Show newest version
/*
 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with
 * the License. A copy of the License is located at
 * 
 * http://aws.amazon.com/apache2.0
 * 
 * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */

package software.amazon.awssdk.services.sagemaker.model;

import java.io.Serializable;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import java.util.function.Function;
import software.amazon.awssdk.annotations.Generated;
import software.amazon.awssdk.core.SdkField;
import software.amazon.awssdk.core.SdkPojo;
import software.amazon.awssdk.core.protocol.MarshallLocation;
import software.amazon.awssdk.core.protocol.MarshallingType;
import software.amazon.awssdk.core.traits.LocationTrait;
import software.amazon.awssdk.utils.ToString;
import software.amazon.awssdk.utils.builder.CopyableBuilder;
import software.amazon.awssdk.utils.builder.ToCopyableBuilder;

/**
 * 

* The collection of settings used by an AutoML job V2 for the tabular problem type. *

*/ @Generated("software.amazon.awssdk:codegen") public final class TabularJobConfig implements SdkPojo, Serializable, ToCopyableBuilder { private static final SdkField CANDIDATE_GENERATION_CONFIG_FIELD = SdkField . builder(MarshallingType.SDK_POJO).memberName("CandidateGenerationConfig") .getter(getter(TabularJobConfig::candidateGenerationConfig)).setter(setter(Builder::candidateGenerationConfig)) .constructor(CandidateGenerationConfig::builder) .traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("CandidateGenerationConfig").build()) .build(); private static final SdkField COMPLETION_CRITERIA_FIELD = SdkField . builder(MarshallingType.SDK_POJO).memberName("CompletionCriteria") .getter(getter(TabularJobConfig::completionCriteria)).setter(setter(Builder::completionCriteria)) .constructor(AutoMLJobCompletionCriteria::builder) .traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("CompletionCriteria").build()) .build(); private static final SdkField FEATURE_SPECIFICATION_S3_URI_FIELD = SdkField. builder(MarshallingType.STRING) .memberName("FeatureSpecificationS3Uri").getter(getter(TabularJobConfig::featureSpecificationS3Uri)) .setter(setter(Builder::featureSpecificationS3Uri)) .traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("FeatureSpecificationS3Uri").build()) .build(); private static final SdkField MODE_FIELD = SdkField. builder(MarshallingType.STRING).memberName("Mode") .getter(getter(TabularJobConfig::modeAsString)).setter(setter(Builder::mode)) .traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("Mode").build()).build(); private static final SdkField GENERATE_CANDIDATE_DEFINITIONS_ONLY_FIELD = SdkField . builder(MarshallingType.BOOLEAN) .memberName("GenerateCandidateDefinitionsOnly") .getter(getter(TabularJobConfig::generateCandidateDefinitionsOnly)) .setter(setter(Builder::generateCandidateDefinitionsOnly)) .traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("GenerateCandidateDefinitionsOnly") .build()).build(); private static final SdkField PROBLEM_TYPE_FIELD = SdkField. builder(MarshallingType.STRING) .memberName("ProblemType").getter(getter(TabularJobConfig::problemTypeAsString)).setter(setter(Builder::problemType)) .traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("ProblemType").build()).build(); private static final SdkField TARGET_ATTRIBUTE_NAME_FIELD = SdkField. builder(MarshallingType.STRING) .memberName("TargetAttributeName").getter(getter(TabularJobConfig::targetAttributeName)) .setter(setter(Builder::targetAttributeName)) .traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("TargetAttributeName").build()) .build(); private static final SdkField SAMPLE_WEIGHT_ATTRIBUTE_NAME_FIELD = SdkField. builder(MarshallingType.STRING) .memberName("SampleWeightAttributeName").getter(getter(TabularJobConfig::sampleWeightAttributeName)) .setter(setter(Builder::sampleWeightAttributeName)) .traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("SampleWeightAttributeName").build()) .build(); private static final List> SDK_FIELDS = Collections.unmodifiableList(Arrays.asList( CANDIDATE_GENERATION_CONFIG_FIELD, COMPLETION_CRITERIA_FIELD, FEATURE_SPECIFICATION_S3_URI_FIELD, MODE_FIELD, GENERATE_CANDIDATE_DEFINITIONS_ONLY_FIELD, PROBLEM_TYPE_FIELD, TARGET_ATTRIBUTE_NAME_FIELD, SAMPLE_WEIGHT_ATTRIBUTE_NAME_FIELD)); private static final long serialVersionUID = 1L; private final CandidateGenerationConfig candidateGenerationConfig; private final AutoMLJobCompletionCriteria completionCriteria; private final String featureSpecificationS3Uri; private final String mode; private final Boolean generateCandidateDefinitionsOnly; private final String problemType; private final String targetAttributeName; private final String sampleWeightAttributeName; private TabularJobConfig(BuilderImpl builder) { this.candidateGenerationConfig = builder.candidateGenerationConfig; this.completionCriteria = builder.completionCriteria; this.featureSpecificationS3Uri = builder.featureSpecificationS3Uri; this.mode = builder.mode; this.generateCandidateDefinitionsOnly = builder.generateCandidateDefinitionsOnly; this.problemType = builder.problemType; this.targetAttributeName = builder.targetAttributeName; this.sampleWeightAttributeName = builder.sampleWeightAttributeName; } /** *

* The configuration information of how model candidates are generated. *

* * @return The configuration information of how model candidates are generated. */ public final CandidateGenerationConfig candidateGenerationConfig() { return candidateGenerationConfig; } /** * Returns the value of the CompletionCriteria property for this object. * * @return The value of the CompletionCriteria property for this object. */ public final AutoMLJobCompletionCriteria completionCriteria() { return completionCriteria; } /** *

* A URL to the Amazon S3 data source containing selected features from the input data source to run an Autopilot * job V2. You can input FeatureAttributeNames (optional) in JSON format as shown below: *

*

* { "FeatureAttributeNames":["col1", "col2", ...] }. *

*

* You can also specify the data type of the feature (optional) in the format shown below: *

*

* { "FeatureDataTypes":{"col1":"numeric", "col2":"categorical" ... } } *

* *

* These column keys may not include the target column. *

*
*

* In ensembling mode, Autopilot only supports the following data types: numeric, * categorical, text, and datetime. In HPO mode, Autopilot can support * numeric, categorical, text, datetime, and * sequence. *

*

* If only FeatureDataTypes is provided, the column keys (col1, col2,..) * should be a subset of the column names in the input data. *

*

* If both FeatureDataTypes and FeatureAttributeNames are provided, then the column keys * should be a subset of the column names provided in FeatureAttributeNames. *

*

* The key name FeatureAttributeNames is fixed. The values listed in ["col1", "col2", ...] * are case sensitive and should be a list of strings containing unique values that are a subset of the column names * in the input data. The list of columns provided must not include the target column. *

* * @return A URL to the Amazon S3 data source containing selected features from the input data source to run an * Autopilot job V2. You can input FeatureAttributeNames (optional) in JSON format as shown * below:

*

* { "FeatureAttributeNames":["col1", "col2", ...] }. *

*

* You can also specify the data type of the feature (optional) in the format shown below: *

*

* { "FeatureDataTypes":{"col1":"numeric", "col2":"categorical" ... } } *

* *

* These column keys may not include the target column. *

*
*

* In ensembling mode, Autopilot only supports the following data types: numeric, * categorical, text, and datetime. In HPO mode, Autopilot can * support numeric, categorical, text, datetime, and * sequence. *

*

* If only FeatureDataTypes is provided, the column keys (col1, col2 * ,..) should be a subset of the column names in the input data. *

*

* If both FeatureDataTypes and FeatureAttributeNames are provided, then the * column keys should be a subset of the column names provided in FeatureAttributeNames. *

*

* The key name FeatureAttributeNames is fixed. The values listed in * ["col1", "col2", ...] are case sensitive and should be a list of strings containing unique * values that are a subset of the column names in the input data. The list of columns provided must not * include the target column. */ public final String featureSpecificationS3Uri() { return featureSpecificationS3Uri; } /** *

* The method that Autopilot uses to train the data. You can either specify the mode manually or let Autopilot * choose for you based on the dataset size by selecting AUTO. In AUTO mode, Autopilot * chooses ENSEMBLING for datasets smaller than 100 MB, and HYPERPARAMETER_TUNING for * larger ones. *

*

* The ENSEMBLING mode uses a multi-stack ensemble model to predict classification and regression tasks * directly from your dataset. This machine learning mode combines several base models to produce an optimal * predictive model. It then uses a stacking ensemble method to combine predictions from contributing members. A * multi-stack ensemble model can provide better performance over a single model by combining the predictive * capabilities of multiple models. See Autopilot algorithm support for a list of algorithms supported by ENSEMBLING mode. *

*

* The HYPERPARAMETER_TUNING (HPO) mode uses the best hyperparameters to train the best version of a * model. HPO automatically selects an algorithm for the type of problem you want to solve. Then HPO finds the best * hyperparameters according to your objective metric. See Autopilot algorithm support for a list of algorithms supported by HYPERPARAMETER_TUNING mode. *

*

* If the service returns an enum value that is not available in the current SDK version, {@link #mode} will return * {@link AutoMLMode#UNKNOWN_TO_SDK_VERSION}. The raw value returned by the service is available from * {@link #modeAsString}. *

* * @return The method that Autopilot uses to train the data. You can either specify the mode manually or let * Autopilot choose for you based on the dataset size by selecting AUTO. In AUTO * mode, Autopilot chooses ENSEMBLING for datasets smaller than 100 MB, and * HYPERPARAMETER_TUNING for larger ones.

*

* The ENSEMBLING mode uses a multi-stack ensemble model to predict classification and * regression tasks directly from your dataset. This machine learning mode combines several base models to * produce an optimal predictive model. It then uses a stacking ensemble method to combine predictions from * contributing members. A multi-stack ensemble model can provide better performance over a single model by * combining the predictive capabilities of multiple models. See Autopilot algorithm support for a list of algorithms supported by ENSEMBLING mode. *

*

* The HYPERPARAMETER_TUNING (HPO) mode uses the best hyperparameters to train the best version * of a model. HPO automatically selects an algorithm for the type of problem you want to solve. Then HPO * finds the best hyperparameters according to your objective metric. See Autopilot algorithm support for a list of algorithms supported by HYPERPARAMETER_TUNING * mode. * @see AutoMLMode */ public final AutoMLMode mode() { return AutoMLMode.fromValue(mode); } /** *

* The method that Autopilot uses to train the data. You can either specify the mode manually or let Autopilot * choose for you based on the dataset size by selecting AUTO. In AUTO mode, Autopilot * chooses ENSEMBLING for datasets smaller than 100 MB, and HYPERPARAMETER_TUNING for * larger ones. *

*

* The ENSEMBLING mode uses a multi-stack ensemble model to predict classification and regression tasks * directly from your dataset. This machine learning mode combines several base models to produce an optimal * predictive model. It then uses a stacking ensemble method to combine predictions from contributing members. A * multi-stack ensemble model can provide better performance over a single model by combining the predictive * capabilities of multiple models. See Autopilot algorithm support for a list of algorithms supported by ENSEMBLING mode. *

*

* The HYPERPARAMETER_TUNING (HPO) mode uses the best hyperparameters to train the best version of a * model. HPO automatically selects an algorithm for the type of problem you want to solve. Then HPO finds the best * hyperparameters according to your objective metric. See Autopilot algorithm support for a list of algorithms supported by HYPERPARAMETER_TUNING mode. *

*

* If the service returns an enum value that is not available in the current SDK version, {@link #mode} will return * {@link AutoMLMode#UNKNOWN_TO_SDK_VERSION}. The raw value returned by the service is available from * {@link #modeAsString}. *

* * @return The method that Autopilot uses to train the data. You can either specify the mode manually or let * Autopilot choose for you based on the dataset size by selecting AUTO. In AUTO * mode, Autopilot chooses ENSEMBLING for datasets smaller than 100 MB, and * HYPERPARAMETER_TUNING for larger ones.

*

* The ENSEMBLING mode uses a multi-stack ensemble model to predict classification and * regression tasks directly from your dataset. This machine learning mode combines several base models to * produce an optimal predictive model. It then uses a stacking ensemble method to combine predictions from * contributing members. A multi-stack ensemble model can provide better performance over a single model by * combining the predictive capabilities of multiple models. See Autopilot algorithm support for a list of algorithms supported by ENSEMBLING mode. *

*

* The HYPERPARAMETER_TUNING (HPO) mode uses the best hyperparameters to train the best version * of a model. HPO automatically selects an algorithm for the type of problem you want to solve. Then HPO * finds the best hyperparameters according to your objective metric. See Autopilot algorithm support for a list of algorithms supported by HYPERPARAMETER_TUNING * mode. * @see AutoMLMode */ public final String modeAsString() { return mode; } /** *

* Generates possible candidates without training the models. A model candidate is a combination of data * preprocessors, algorithms, and algorithm parameter settings. *

* * @return Generates possible candidates without training the models. A model candidate is a combination of data * preprocessors, algorithms, and algorithm parameter settings. */ public final Boolean generateCandidateDefinitionsOnly() { return generateCandidateDefinitionsOnly; } /** *

* The type of supervised learning problem available for the model candidates of the AutoML job V2. For more * information, see * SageMaker Autopilot problem types. *

* *

* You must either specify the type of supervised learning problem in ProblemType and provide the AutoMLJobObjective metric, or none at all. *

*
*

* If the service returns an enum value that is not available in the current SDK version, {@link #problemType} will * return {@link ProblemType#UNKNOWN_TO_SDK_VERSION}. The raw value returned by the service is available from * {@link #problemTypeAsString}. *

* * @return The type of supervised learning problem available for the model candidates of the AutoML job V2. For more * information, see SageMaker Autopilot problem types.

*

* You must either specify the type of supervised learning problem in ProblemType and provide * the AutoMLJobObjective metric, or none at all. *

* @see ProblemType */ public final ProblemType problemType() { return ProblemType.fromValue(problemType); } /** *

* The type of supervised learning problem available for the model candidates of the AutoML job V2. For more * information, see * SageMaker Autopilot problem types. *

* *

* You must either specify the type of supervised learning problem in ProblemType and provide the AutoMLJobObjective metric, or none at all. *

*
*

* If the service returns an enum value that is not available in the current SDK version, {@link #problemType} will * return {@link ProblemType#UNKNOWN_TO_SDK_VERSION}. The raw value returned by the service is available from * {@link #problemTypeAsString}. *

* * @return The type of supervised learning problem available for the model candidates of the AutoML job V2. For more * information, see SageMaker Autopilot problem types.

*

* You must either specify the type of supervised learning problem in ProblemType and provide * the AutoMLJobObjective metric, or none at all. *

* @see ProblemType */ public final String problemTypeAsString() { return problemType; } /** *

* The name of the target variable in supervised learning, usually represented by 'y'. *

* * @return The name of the target variable in supervised learning, usually represented by 'y'. */ public final String targetAttributeName() { return targetAttributeName; } /** *

* If specified, this column name indicates which column of the dataset should be treated as sample weights for use * by the objective metric during the training, evaluation, and the selection of the best model. This column is not * considered as a predictive feature. For more information on Autopilot metrics, see Metrics and * validation. *

*

* Sample weights should be numeric, non-negative, with larger values indicating which rows are more important than * others. Data points that have invalid or no weight value are excluded. *

*

* Support for sample weights is available in Ensembling * mode only. *

* * @return If specified, this column name indicates which column of the dataset should be treated as sample weights * for use by the objective metric during the training, evaluation, and the selection of the best model. * This column is not considered as a predictive feature. For more information on Autopilot metrics, see Metrics and * validation.

*

* Sample weights should be numeric, non-negative, with larger values indicating which rows are more * important than others. Data points that have invalid or no weight value are excluded. *

*

* Support for sample weights is available in Ensembling mode only. */ public final String sampleWeightAttributeName() { return sampleWeightAttributeName; } @Override public Builder toBuilder() { return new BuilderImpl(this); } public static Builder builder() { return new BuilderImpl(); } public static Class serializableBuilderClass() { return BuilderImpl.class; } @Override public final int hashCode() { int hashCode = 1; hashCode = 31 * hashCode + Objects.hashCode(candidateGenerationConfig()); hashCode = 31 * hashCode + Objects.hashCode(completionCriteria()); hashCode = 31 * hashCode + Objects.hashCode(featureSpecificationS3Uri()); hashCode = 31 * hashCode + Objects.hashCode(modeAsString()); hashCode = 31 * hashCode + Objects.hashCode(generateCandidateDefinitionsOnly()); hashCode = 31 * hashCode + Objects.hashCode(problemTypeAsString()); hashCode = 31 * hashCode + Objects.hashCode(targetAttributeName()); hashCode = 31 * hashCode + Objects.hashCode(sampleWeightAttributeName()); return hashCode; } @Override public final boolean equals(Object obj) { return equalsBySdkFields(obj); } @Override public final boolean equalsBySdkFields(Object obj) { if (this == obj) { return true; } if (obj == null) { return false; } if (!(obj instanceof TabularJobConfig)) { return false; } TabularJobConfig other = (TabularJobConfig) obj; return Objects.equals(candidateGenerationConfig(), other.candidateGenerationConfig()) && Objects.equals(completionCriteria(), other.completionCriteria()) && Objects.equals(featureSpecificationS3Uri(), other.featureSpecificationS3Uri()) && Objects.equals(modeAsString(), other.modeAsString()) && Objects.equals(generateCandidateDefinitionsOnly(), other.generateCandidateDefinitionsOnly()) && Objects.equals(problemTypeAsString(), other.problemTypeAsString()) && Objects.equals(targetAttributeName(), other.targetAttributeName()) && Objects.equals(sampleWeightAttributeName(), other.sampleWeightAttributeName()); } /** * Returns a string representation of this object. This is useful for testing and debugging. Sensitive data will be * redacted from this string using a placeholder value. */ @Override public final String toString() { return ToString.builder("TabularJobConfig").add("CandidateGenerationConfig", candidateGenerationConfig()) .add("CompletionCriteria", completionCriteria()).add("FeatureSpecificationS3Uri", featureSpecificationS3Uri()) .add("Mode", modeAsString()).add("GenerateCandidateDefinitionsOnly", generateCandidateDefinitionsOnly()) .add("ProblemType", problemTypeAsString()).add("TargetAttributeName", targetAttributeName()) .add("SampleWeightAttributeName", sampleWeightAttributeName()).build(); } public final Optional getValueForField(String fieldName, Class clazz) { switch (fieldName) { case "CandidateGenerationConfig": return Optional.ofNullable(clazz.cast(candidateGenerationConfig())); case "CompletionCriteria": return Optional.ofNullable(clazz.cast(completionCriteria())); case "FeatureSpecificationS3Uri": return Optional.ofNullable(clazz.cast(featureSpecificationS3Uri())); case "Mode": return Optional.ofNullable(clazz.cast(modeAsString())); case "GenerateCandidateDefinitionsOnly": return Optional.ofNullable(clazz.cast(generateCandidateDefinitionsOnly())); case "ProblemType": return Optional.ofNullable(clazz.cast(problemTypeAsString())); case "TargetAttributeName": return Optional.ofNullable(clazz.cast(targetAttributeName())); case "SampleWeightAttributeName": return Optional.ofNullable(clazz.cast(sampleWeightAttributeName())); default: return Optional.empty(); } } @Override public final List> sdkFields() { return SDK_FIELDS; } private static Function getter(Function g) { return obj -> g.apply((TabularJobConfig) obj); } private static BiConsumer setter(BiConsumer s) { return (obj, val) -> s.accept((Builder) obj, val); } public interface Builder extends SdkPojo, CopyableBuilder { /** *

* The configuration information of how model candidates are generated. *

* * @param candidateGenerationConfig * The configuration information of how model candidates are generated. * @return Returns a reference to this object so that method calls can be chained together. */ Builder candidateGenerationConfig(CandidateGenerationConfig candidateGenerationConfig); /** *

* The configuration information of how model candidates are generated. *

* This is a convenience method that creates an instance of the {@link CandidateGenerationConfig.Builder} * avoiding the need to create one manually via {@link CandidateGenerationConfig#builder()}. * *

* When the {@link Consumer} completes, {@link CandidateGenerationConfig.Builder#build()} is called immediately * and its result is passed to {@link #candidateGenerationConfig(CandidateGenerationConfig)}. * * @param candidateGenerationConfig * a consumer that will call methods on {@link CandidateGenerationConfig.Builder} * @return Returns a reference to this object so that method calls can be chained together. * @see #candidateGenerationConfig(CandidateGenerationConfig) */ default Builder candidateGenerationConfig(Consumer candidateGenerationConfig) { return candidateGenerationConfig(CandidateGenerationConfig.builder().applyMutation(candidateGenerationConfig).build()); } /** * Sets the value of the CompletionCriteria property for this object. * * @param completionCriteria * The new value for the CompletionCriteria property for this object. * @return Returns a reference to this object so that method calls can be chained together. */ Builder completionCriteria(AutoMLJobCompletionCriteria completionCriteria); /** * Sets the value of the CompletionCriteria property for this object. * * This is a convenience method that creates an instance of the {@link AutoMLJobCompletionCriteria.Builder} * avoiding the need to create one manually via {@link AutoMLJobCompletionCriteria#builder()}. * *

* When the {@link Consumer} completes, {@link AutoMLJobCompletionCriteria.Builder#build()} is called * immediately and its result is passed to {@link #completionCriteria(AutoMLJobCompletionCriteria)}. * * @param completionCriteria * a consumer that will call methods on {@link AutoMLJobCompletionCriteria.Builder} * @return Returns a reference to this object so that method calls can be chained together. * @see #completionCriteria(AutoMLJobCompletionCriteria) */ default Builder completionCriteria(Consumer completionCriteria) { return completionCriteria(AutoMLJobCompletionCriteria.builder().applyMutation(completionCriteria).build()); } /** *

* A URL to the Amazon S3 data source containing selected features from the input data source to run an * Autopilot job V2. You can input FeatureAttributeNames (optional) in JSON format as shown below: *

*

* { "FeatureAttributeNames":["col1", "col2", ...] }. *

*

* You can also specify the data type of the feature (optional) in the format shown below: *

*

* { "FeatureDataTypes":{"col1":"numeric", "col2":"categorical" ... } } *

* *

* These column keys may not include the target column. *

*
*

* In ensembling mode, Autopilot only supports the following data types: numeric, * categorical, text, and datetime. In HPO mode, Autopilot can support * numeric, categorical, text, datetime, and * sequence. *

*

* If only FeatureDataTypes is provided, the column keys (col1, col2,..) * should be a subset of the column names in the input data. *

*

* If both FeatureDataTypes and FeatureAttributeNames are provided, then the column * keys should be a subset of the column names provided in FeatureAttributeNames. *

*

* The key name FeatureAttributeNames is fixed. The values listed in * ["col1", "col2", ...] are case sensitive and should be a list of strings containing unique * values that are a subset of the column names in the input data. The list of columns provided must not include * the target column. *

* * @param featureSpecificationS3Uri * A URL to the Amazon S3 data source containing selected features from the input data source to run an * Autopilot job V2. You can input FeatureAttributeNames (optional) in JSON format as shown * below:

*

* { "FeatureAttributeNames":["col1", "col2", ...] }. *

*

* You can also specify the data type of the feature (optional) in the format shown below: *

*

* { "FeatureDataTypes":{"col1":"numeric", "col2":"categorical" ... } } *

* *

* These column keys may not include the target column. *

*
*

* In ensembling mode, Autopilot only supports the following data types: numeric, * categorical, text, and datetime. In HPO mode, Autopilot can * support numeric, categorical, text, datetime, and * sequence. *

*

* If only FeatureDataTypes is provided, the column keys (col1, * col2,..) should be a subset of the column names in the input data. *

*

* If both FeatureDataTypes and FeatureAttributeNames are provided, then the * column keys should be a subset of the column names provided in FeatureAttributeNames. *

*

* The key name FeatureAttributeNames is fixed. The values listed in * ["col1", "col2", ...] are case sensitive and should be a list of strings containing * unique values that are a subset of the column names in the input data. The list of columns provided * must not include the target column. * @return Returns a reference to this object so that method calls can be chained together. */ Builder featureSpecificationS3Uri(String featureSpecificationS3Uri); /** *

* The method that Autopilot uses to train the data. You can either specify the mode manually or let Autopilot * choose for you based on the dataset size by selecting AUTO. In AUTO mode, Autopilot * chooses ENSEMBLING for datasets smaller than 100 MB, and HYPERPARAMETER_TUNING for * larger ones. *

*

* The ENSEMBLING mode uses a multi-stack ensemble model to predict classification and regression * tasks directly from your dataset. This machine learning mode combines several base models to produce an * optimal predictive model. It then uses a stacking ensemble method to combine predictions from contributing * members. A multi-stack ensemble model can provide better performance over a single model by combining the * predictive capabilities of multiple models. See Autopilot algorithm support for a list of algorithms supported by ENSEMBLING mode. *

*

* The HYPERPARAMETER_TUNING (HPO) mode uses the best hyperparameters to train the best version of * a model. HPO automatically selects an algorithm for the type of problem you want to solve. Then HPO finds the * best hyperparameters according to your objective metric. See Autopilot algorithm support for a list of algorithms supported by HYPERPARAMETER_TUNING * mode. *

* * @param mode * The method that Autopilot uses to train the data. You can either specify the mode manually or let * Autopilot choose for you based on the dataset size by selecting AUTO. In * AUTO mode, Autopilot chooses ENSEMBLING for datasets smaller than 100 MB, * and HYPERPARAMETER_TUNING for larger ones.

*

* The ENSEMBLING mode uses a multi-stack ensemble model to predict classification and * regression tasks directly from your dataset. This machine learning mode combines several base models * to produce an optimal predictive model. It then uses a stacking ensemble method to combine predictions * from contributing members. A multi-stack ensemble model can provide better performance over a single * model by combining the predictive capabilities of multiple models. See Autopilot algorithm support for a list of algorithms supported by ENSEMBLING mode. *

*

* The HYPERPARAMETER_TUNING (HPO) mode uses the best hyperparameters to train the best * version of a model. HPO automatically selects an algorithm for the type of problem you want to solve. * Then HPO finds the best hyperparameters according to your objective metric. See Autopilot algorithm support for a list of algorithms supported by * HYPERPARAMETER_TUNING mode. * @see AutoMLMode * @return Returns a reference to this object so that method calls can be chained together. * @see AutoMLMode */ Builder mode(String mode); /** *

* The method that Autopilot uses to train the data. You can either specify the mode manually or let Autopilot * choose for you based on the dataset size by selecting AUTO. In AUTO mode, Autopilot * chooses ENSEMBLING for datasets smaller than 100 MB, and HYPERPARAMETER_TUNING for * larger ones. *

*

* The ENSEMBLING mode uses a multi-stack ensemble model to predict classification and regression * tasks directly from your dataset. This machine learning mode combines several base models to produce an * optimal predictive model. It then uses a stacking ensemble method to combine predictions from contributing * members. A multi-stack ensemble model can provide better performance over a single model by combining the * predictive capabilities of multiple models. See Autopilot algorithm support for a list of algorithms supported by ENSEMBLING mode. *

*

* The HYPERPARAMETER_TUNING (HPO) mode uses the best hyperparameters to train the best version of * a model. HPO automatically selects an algorithm for the type of problem you want to solve. Then HPO finds the * best hyperparameters according to your objective metric. See Autopilot algorithm support for a list of algorithms supported by HYPERPARAMETER_TUNING * mode. *

* * @param mode * The method that Autopilot uses to train the data. You can either specify the mode manually or let * Autopilot choose for you based on the dataset size by selecting AUTO. In * AUTO mode, Autopilot chooses ENSEMBLING for datasets smaller than 100 MB, * and HYPERPARAMETER_TUNING for larger ones.

*

* The ENSEMBLING mode uses a multi-stack ensemble model to predict classification and * regression tasks directly from your dataset. This machine learning mode combines several base models * to produce an optimal predictive model. It then uses a stacking ensemble method to combine predictions * from contributing members. A multi-stack ensemble model can provide better performance over a single * model by combining the predictive capabilities of multiple models. See Autopilot algorithm support for a list of algorithms supported by ENSEMBLING mode. *

*

* The HYPERPARAMETER_TUNING (HPO) mode uses the best hyperparameters to train the best * version of a model. HPO automatically selects an algorithm for the type of problem you want to solve. * Then HPO finds the best hyperparameters according to your objective metric. See Autopilot algorithm support for a list of algorithms supported by * HYPERPARAMETER_TUNING mode. * @see AutoMLMode * @return Returns a reference to this object so that method calls can be chained together. * @see AutoMLMode */ Builder mode(AutoMLMode mode); /** *

* Generates possible candidates without training the models. A model candidate is a combination of data * preprocessors, algorithms, and algorithm parameter settings. *

* * @param generateCandidateDefinitionsOnly * Generates possible candidates without training the models. A model candidate is a combination of data * preprocessors, algorithms, and algorithm parameter settings. * @return Returns a reference to this object so that method calls can be chained together. */ Builder generateCandidateDefinitionsOnly(Boolean generateCandidateDefinitionsOnly); /** *

* The type of supervised learning problem available for the model candidates of the AutoML job V2. For more * information, see SageMaker Autopilot problem types. *

* *

* You must either specify the type of supervised learning problem in ProblemType and provide the * AutoMLJobObjective metric, or none at all. *

*
* * @param problemType * The type of supervised learning problem available for the model candidates of the AutoML job V2. For * more information, see SageMaker Autopilot problem types.

*

* You must either specify the type of supervised learning problem in ProblemType and * provide the AutoMLJobObjective metric, or none at all. *

* @see ProblemType * @return Returns a reference to this object so that method calls can be chained together. * @see ProblemType */ Builder problemType(String problemType); /** *

* The type of supervised learning problem available for the model candidates of the AutoML job V2. For more * information, see SageMaker Autopilot problem types. *

* *

* You must either specify the type of supervised learning problem in ProblemType and provide the * AutoMLJobObjective metric, or none at all. *

*
* * @param problemType * The type of supervised learning problem available for the model candidates of the AutoML job V2. For * more information, see SageMaker Autopilot problem types.

*

* You must either specify the type of supervised learning problem in ProblemType and * provide the AutoMLJobObjective metric, or none at all. *

* @see ProblemType * @return Returns a reference to this object so that method calls can be chained together. * @see ProblemType */ Builder problemType(ProblemType problemType); /** *

* The name of the target variable in supervised learning, usually represented by 'y'. *

* * @param targetAttributeName * The name of the target variable in supervised learning, usually represented by 'y'. * @return Returns a reference to this object so that method calls can be chained together. */ Builder targetAttributeName(String targetAttributeName); /** *

* If specified, this column name indicates which column of the dataset should be treated as sample weights for * use by the objective metric during the training, evaluation, and the selection of the best model. This column * is not considered as a predictive feature. For more information on Autopilot metrics, see Metrics and * validation. *

*

* Sample weights should be numeric, non-negative, with larger values indicating which rows are more important * than others. Data points that have invalid or no weight value are excluded. *

*

* Support for sample weights is available in Ensembling mode only. *

* * @param sampleWeightAttributeName * If specified, this column name indicates which column of the dataset should be treated as sample * weights for use by the objective metric during the training, evaluation, and the selection of the best * model. This column is not considered as a predictive feature. For more information on Autopilot * metrics, see Metrics and * validation.

*

* Sample weights should be numeric, non-negative, with larger values indicating which rows are more * important than others. Data points that have invalid or no weight value are excluded. *

*

* Support for sample weights is available in Ensembling mode only. * @return Returns a reference to this object so that method calls can be chained together. */ Builder sampleWeightAttributeName(String sampleWeightAttributeName); } static final class BuilderImpl implements Builder { private CandidateGenerationConfig candidateGenerationConfig; private AutoMLJobCompletionCriteria completionCriteria; private String featureSpecificationS3Uri; private String mode; private Boolean generateCandidateDefinitionsOnly; private String problemType; private String targetAttributeName; private String sampleWeightAttributeName; private BuilderImpl() { } private BuilderImpl(TabularJobConfig model) { candidateGenerationConfig(model.candidateGenerationConfig); completionCriteria(model.completionCriteria); featureSpecificationS3Uri(model.featureSpecificationS3Uri); mode(model.mode); generateCandidateDefinitionsOnly(model.generateCandidateDefinitionsOnly); problemType(model.problemType); targetAttributeName(model.targetAttributeName); sampleWeightAttributeName(model.sampleWeightAttributeName); } public final CandidateGenerationConfig.Builder getCandidateGenerationConfig() { return candidateGenerationConfig != null ? candidateGenerationConfig.toBuilder() : null; } public final void setCandidateGenerationConfig(CandidateGenerationConfig.BuilderImpl candidateGenerationConfig) { this.candidateGenerationConfig = candidateGenerationConfig != null ? candidateGenerationConfig.build() : null; } @Override public final Builder candidateGenerationConfig(CandidateGenerationConfig candidateGenerationConfig) { this.candidateGenerationConfig = candidateGenerationConfig; return this; } public final AutoMLJobCompletionCriteria.Builder getCompletionCriteria() { return completionCriteria != null ? completionCriteria.toBuilder() : null; } public final void setCompletionCriteria(AutoMLJobCompletionCriteria.BuilderImpl completionCriteria) { this.completionCriteria = completionCriteria != null ? completionCriteria.build() : null; } @Override public final Builder completionCriteria(AutoMLJobCompletionCriteria completionCriteria) { this.completionCriteria = completionCriteria; return this; } public final String getFeatureSpecificationS3Uri() { return featureSpecificationS3Uri; } public final void setFeatureSpecificationS3Uri(String featureSpecificationS3Uri) { this.featureSpecificationS3Uri = featureSpecificationS3Uri; } @Override public final Builder featureSpecificationS3Uri(String featureSpecificationS3Uri) { this.featureSpecificationS3Uri = featureSpecificationS3Uri; return this; } public final String getMode() { return mode; } public final void setMode(String mode) { this.mode = mode; } @Override public final Builder mode(String mode) { this.mode = mode; return this; } @Override public final Builder mode(AutoMLMode mode) { this.mode(mode == null ? null : mode.toString()); return this; } public final Boolean getGenerateCandidateDefinitionsOnly() { return generateCandidateDefinitionsOnly; } public final void setGenerateCandidateDefinitionsOnly(Boolean generateCandidateDefinitionsOnly) { this.generateCandidateDefinitionsOnly = generateCandidateDefinitionsOnly; } @Override public final Builder generateCandidateDefinitionsOnly(Boolean generateCandidateDefinitionsOnly) { this.generateCandidateDefinitionsOnly = generateCandidateDefinitionsOnly; return this; } public final String getProblemType() { return problemType; } public final void setProblemType(String problemType) { this.problemType = problemType; } @Override public final Builder problemType(String problemType) { this.problemType = problemType; return this; } @Override public final Builder problemType(ProblemType problemType) { this.problemType(problemType == null ? null : problemType.toString()); return this; } public final String getTargetAttributeName() { return targetAttributeName; } public final void setTargetAttributeName(String targetAttributeName) { this.targetAttributeName = targetAttributeName; } @Override public final Builder targetAttributeName(String targetAttributeName) { this.targetAttributeName = targetAttributeName; return this; } public final String getSampleWeightAttributeName() { return sampleWeightAttributeName; } public final void setSampleWeightAttributeName(String sampleWeightAttributeName) { this.sampleWeightAttributeName = sampleWeightAttributeName; } @Override public final Builder sampleWeightAttributeName(String sampleWeightAttributeName) { this.sampleWeightAttributeName = sampleWeightAttributeName; return this; } @Override public TabularJobConfig build() { return new TabularJobConfig(this); } @Override public List> sdkFields() { return SDK_FIELDS; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy