software.amazon.awssdk.services.sagemaker.model.AutoMLJobObjective Maven / Gradle / Ivy
/*
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with
* the License. A copy of the License is located at
*
* http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
* and limitations under the License.
*/
package software.amazon.awssdk.services.sagemaker.model;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.BiConsumer;
import java.util.function.Function;
import software.amazon.awssdk.annotations.Generated;
import software.amazon.awssdk.core.SdkField;
import software.amazon.awssdk.core.SdkPojo;
import software.amazon.awssdk.core.protocol.MarshallLocation;
import software.amazon.awssdk.core.protocol.MarshallingType;
import software.amazon.awssdk.core.traits.LocationTrait;
import software.amazon.awssdk.utils.ToString;
import software.amazon.awssdk.utils.builder.CopyableBuilder;
import software.amazon.awssdk.utils.builder.ToCopyableBuilder;
/**
*
* Specifies a metric to minimize or maximize as the objective of a job. V2 API jobs (for example jobs created by
* calling CreateAutoMLJobV2
), support Accuracy
only.
*
*/
@Generated("software.amazon.awssdk:codegen")
public final class AutoMLJobObjective implements SdkPojo, Serializable,
ToCopyableBuilder {
private static final SdkField METRIC_NAME_FIELD = SdkField. builder(MarshallingType.STRING)
.memberName("MetricName").getter(getter(AutoMLJobObjective::metricNameAsString)).setter(setter(Builder::metricName))
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("MetricName").build()).build();
private static final List> SDK_FIELDS = Collections.unmodifiableList(Arrays.asList(METRIC_NAME_FIELD));
private static final long serialVersionUID = 1L;
private final String metricName;
private AutoMLJobObjective(BuilderImpl builder) {
this.metricName = builder.metricName;
}
/**
*
* The name of the objective metric used to measure the predictive quality of a machine learning system. This metric
* is optimized during training to provide the best estimate for model parameter values from data.
*
*
* Here are the options:
*
*
* - Accuracy
* -
*
* The ratio of the number of correctly classified items to the total number of (correctly and incorrectly)
* classified items. It is used for both binary and multiclass classification. Accuracy measures how close the
* predicted class values are to the actual values. Values for accuracy metrics vary between zero (0) and one (1). A
* value of 1 indicates perfect accuracy, and 0 indicates perfect inaccuracy.
*
*
* - AUC
* -
*
* The area under the curve (AUC) metric is used to compare and evaluate binary classification by algorithms that
* return probabilities, such as logistic regression. To map the probabilities into classifications, these are
* compared against a threshold value.
*
*
* The relevant curve is the receiver operating characteristic curve (ROC curve). The ROC curve plots the true
* positive rate (TPR) of predictions (or recall) against the false positive rate (FPR) as a function of the
* threshold value, above which a prediction is considered positive. Increasing the threshold results in fewer false
* positives, but more false negatives.
*
*
* AUC is the area under this ROC curve. Therefore, AUC provides an aggregated measure of the model performance
* across all possible classification thresholds. AUC scores vary between 0 and 1. A score of 1 indicates perfect
* accuracy, and a score of one half (0.5) indicates that the prediction is not better than a random classifier.
*
*
* - BalancedAccuracy
* -
*
* BalancedAccuracy
is a metric that measures the ratio of accurate predictions to all predictions.
* This ratio is calculated after normalizing true positives (TP) and true negatives (TN) by the total number of
* positive (P) and negative (N) values. It is used in both binary and multiclass classification and is defined as
* follows: 0.5*((TP/P)+(TN/N)), with values ranging from 0 to 1. BalancedAccuracy
gives a better
* measure of accuracy when the number of positives or negatives differ greatly from each other in an imbalanced
* dataset. For example, when only 1% of email is spam.
*
*
* - F1
* -
*
* The F1
score is the harmonic mean of the precision and recall, defined as follows: F1 = 2 *
* (precision * recall) / (precision + recall). It is used for binary classification into classes traditionally
* referred to as positive and negative. Predictions are said to be true when they match their actual (correct)
* class, and false when they do not.
*
*
* Precision is the ratio of the true positive predictions to all positive predictions, and it includes the false
* positives in a dataset. Precision measures the quality of the prediction when it predicts the positive class.
*
*
* Recall (or sensitivity) is the ratio of the true positive predictions to all actual positive instances. Recall
* measures how completely a model predicts the actual class members in a dataset.
*
*
* F1 scores vary between 0 and 1. A score of 1 indicates the best possible performance, and 0 indicates the worst.
*
*
* - F1macro
* -
*
* The F1macro
score applies F1 scoring to multiclass classification problems. It does this by
* calculating the precision and recall, and then taking their harmonic mean to calculate the F1 score for each
* class. Lastly, the F1macro averages the individual scores to obtain the F1macro
score.
* F1macro
scores vary between 0 and 1. A score of 1 indicates the best possible performance, and 0
* indicates the worst.
*
*
* - MAE
* -
*
* The mean absolute error (MAE) is a measure of how different the predicted and actual values are, when they're
* averaged over all values. MAE is commonly used in regression analysis to understand model prediction error. If
* there is linear regression, MAE represents the average distance from a predicted line to the actual value. MAE is
* defined as the sum of absolute errors divided by the number of observations. Values range from 0 to infinity,
* with smaller numbers indicating a better model fit to the data.
*
*
* - MSE
* -
*
* The mean squared error (MSE) is the average of the squared differences between the predicted and actual values.
* It is used for regression. MSE values are always positive. The better a model is at predicting the actual values,
* the smaller the MSE value is
*
*
* - Precision
* -
*
* Precision measures how well an algorithm predicts the true positives (TP) out of all of the positives that it
* identifies. It is defined as follows: Precision = TP/(TP+FP), with values ranging from zero (0) to one (1), and
* is used in binary classification. Precision is an important metric when the cost of a false positive is high. For
* example, the cost of a false positive is very high if an airplane safety system is falsely deemed safe to fly. A
* false positive (FP) reflects a positive prediction that is actually negative in the data.
*
*
* - PrecisionMacro
* -
*
* The precision macro computes precision for multiclass classification problems. It does this by calculating
* precision for each class and averaging scores to obtain precision for several classes.
* PrecisionMacro
scores range from zero (0) to one (1). Higher scores reflect the model's ability to
* predict true positives (TP) out of all of the positives that it identifies, averaged across multiple classes.
*
*
* - R2
* -
*
* R2, also known as the coefficient of determination, is used in regression to quantify how much a model can
* explain the variance of a dependent variable. Values range from one (1) to negative one (-1). Higher numbers
* indicate a higher fraction of explained variability. R2
values close to zero (0) indicate that very
* little of the dependent variable can be explained by the model. Negative values indicate a poor fit and that the
* model is outperformed by a constant function. For linear regression, this is a horizontal line.
*
*
* - Recall
* -
*
* Recall measures how well an algorithm correctly predicts all of the true positives (TP) in a dataset. A true
* positive is a positive prediction that is also an actual positive value in the data. Recall is defined as
* follows: Recall = TP/(TP+FN), with values ranging from 0 to 1. Higher scores reflect a better ability of the
* model to predict true positives (TP) in the data, and is used in binary classification.
*
*
* Recall is important when testing for cancer because it's used to find all of the true positives. A false positive
* (FP) reflects a positive prediction that is actually negative in the data. It is often insufficient to measure
* only recall, because predicting every output as a true positive yield a perfect recall score.
*
*
* - RecallMacro
* -
*
* The RecallMacro computes recall for multiclass classification problems by calculating recall for each class and
* averaging scores to obtain recall for several classes. RecallMacro scores range from 0 to 1. Higher scores
* reflect the model's ability to predict true positives (TP) in a dataset. Whereas, a true positive reflects a
* positive prediction that is also an actual positive value in the data. It is often insufficient to measure only
* recall, because predicting every output as a true positive yields a perfect recall score.
*
*
* - RMSE
* -
*
* Root mean squared error (RMSE) measures the square root of the squared difference between predicted and actual
* values, and it's averaged over all values. It is used in regression analysis to understand model prediction
* error. It's an important metric to indicate the presence of large model errors and outliers. Values range from
* zero (0) to infinity, with smaller numbers indicating a better model fit to the data. RMSE is dependent on scale,
* and should not be used to compare datasets of different sizes.
*
*
*
*
* If you do not specify a metric explicitly, the default behavior is to automatically use:
*
*
* -
*
* MSE
: for regression.
*
*
* -
*
* F1
: for binary classification
*
*
* -
*
* Accuracy
: for multiclass classification.
*
*
*
*
* If the service returns an enum value that is not available in the current SDK version, {@link #metricName} will
* return {@link AutoMLMetricEnum#UNKNOWN_TO_SDK_VERSION}. The raw value returned by the service is available from
* {@link #metricNameAsString}.
*
*
* @return The name of the objective metric used to measure the predictive quality of a machine learning system.
* This metric is optimized during training to provide the best estimate for model parameter values from
* data.
*
* Here are the options:
*
*
* - Accuracy
* -
*
* The ratio of the number of correctly classified items to the total number of (correctly and incorrectly)
* classified items. It is used for both binary and multiclass classification. Accuracy measures how close
* the predicted class values are to the actual values. Values for accuracy metrics vary between zero (0)
* and one (1). A value of 1 indicates perfect accuracy, and 0 indicates perfect inaccuracy.
*
*
* - AUC
* -
*
* The area under the curve (AUC) metric is used to compare and evaluate binary classification by algorithms
* that return probabilities, such as logistic regression. To map the probabilities into classifications,
* these are compared against a threshold value.
*
*
* The relevant curve is the receiver operating characteristic curve (ROC curve). The ROC curve plots the
* true positive rate (TPR) of predictions (or recall) against the false positive rate (FPR) as a function
* of the threshold value, above which a prediction is considered positive. Increasing the threshold results
* in fewer false positives, but more false negatives.
*
*
* AUC is the area under this ROC curve. Therefore, AUC provides an aggregated measure of the model
* performance across all possible classification thresholds. AUC scores vary between 0 and 1. A score of 1
* indicates perfect accuracy, and a score of one half (0.5) indicates that the prediction is not better
* than a random classifier.
*
*
* - BalancedAccuracy
* -
*
* BalancedAccuracy
is a metric that measures the ratio of accurate predictions to all
* predictions. This ratio is calculated after normalizing true positives (TP) and true negatives (TN) by
* the total number of positive (P) and negative (N) values. It is used in both binary and multiclass
* classification and is defined as follows: 0.5*((TP/P)+(TN/N)), with values ranging from 0 to 1.
* BalancedAccuracy
gives a better measure of accuracy when the number of positives or
* negatives differ greatly from each other in an imbalanced dataset. For example, when only 1% of email is
* spam.
*
*
* - F1
* -
*
* The F1
score is the harmonic mean of the precision and recall, defined as follows: F1 = 2 *
* (precision * recall) / (precision + recall). It is used for binary classification into classes
* traditionally referred to as positive and negative. Predictions are said to be true when they match their
* actual (correct) class, and false when they do not.
*
*
* Precision is the ratio of the true positive predictions to all positive predictions, and it includes the
* false positives in a dataset. Precision measures the quality of the prediction when it predicts the
* positive class.
*
*
* Recall (or sensitivity) is the ratio of the true positive predictions to all actual positive instances.
* Recall measures how completely a model predicts the actual class members in a dataset.
*
*
* F1 scores vary between 0 and 1. A score of 1 indicates the best possible performance, and 0 indicates the
* worst.
*
*
* - F1macro
* -
*
* The F1macro
score applies F1 scoring to multiclass classification problems. It does this by
* calculating the precision and recall, and then taking their harmonic mean to calculate the F1 score for
* each class. Lastly, the F1macro averages the individual scores to obtain the F1macro
score.
* F1macro
scores vary between 0 and 1. A score of 1 indicates the best possible performance,
* and 0 indicates the worst.
*
*
* - MAE
* -
*
* The mean absolute error (MAE) is a measure of how different the predicted and actual values are, when
* they're averaged over all values. MAE is commonly used in regression analysis to understand model
* prediction error. If there is linear regression, MAE represents the average distance from a predicted
* line to the actual value. MAE is defined as the sum of absolute errors divided by the number of
* observations. Values range from 0 to infinity, with smaller numbers indicating a better model fit to the
* data.
*
*
* - MSE
* -
*
* The mean squared error (MSE) is the average of the squared differences between the predicted and actual
* values. It is used for regression. MSE values are always positive. The better a model is at predicting
* the actual values, the smaller the MSE value is
*
*
* - Precision
* -
*
* Precision measures how well an algorithm predicts the true positives (TP) out of all of the positives
* that it identifies. It is defined as follows: Precision = TP/(TP+FP), with values ranging from zero (0)
* to one (1), and is used in binary classification. Precision is an important metric when the cost of a
* false positive is high. For example, the cost of a false positive is very high if an airplane safety
* system is falsely deemed safe to fly. A false positive (FP) reflects a positive prediction that is
* actually negative in the data.
*
*
* - PrecisionMacro
* -
*
* The precision macro computes precision for multiclass classification problems. It does this by
* calculating precision for each class and averaging scores to obtain precision for several classes.
* PrecisionMacro
scores range from zero (0) to one (1). Higher scores reflect the model's
* ability to predict true positives (TP) out of all of the positives that it identifies, averaged across
* multiple classes.
*
*
* - R2
* -
*
* R2, also known as the coefficient of determination, is used in regression to quantify how much a model
* can explain the variance of a dependent variable. Values range from one (1) to negative one (-1). Higher
* numbers indicate a higher fraction of explained variability. R2
values close to zero (0)
* indicate that very little of the dependent variable can be explained by the model. Negative values
* indicate a poor fit and that the model is outperformed by a constant function. For linear regression,
* this is a horizontal line.
*
*
* - Recall
* -
*
* Recall measures how well an algorithm correctly predicts all of the true positives (TP) in a dataset. A
* true positive is a positive prediction that is also an actual positive value in the data. Recall is
* defined as follows: Recall = TP/(TP+FN), with values ranging from 0 to 1. Higher scores reflect a better
* ability of the model to predict true positives (TP) in the data, and is used in binary classification.
*
*
* Recall is important when testing for cancer because it's used to find all of the true positives. A false
* positive (FP) reflects a positive prediction that is actually negative in the data. It is often
* insufficient to measure only recall, because predicting every output as a true positive yield a perfect
* recall score.
*
*
* - RecallMacro
* -
*
* The RecallMacro computes recall for multiclass classification problems by calculating recall for each
* class and averaging scores to obtain recall for several classes. RecallMacro scores range from 0 to 1.
* Higher scores reflect the model's ability to predict true positives (TP) in a dataset. Whereas, a true
* positive reflects a positive prediction that is also an actual positive value in the data. It is often
* insufficient to measure only recall, because predicting every output as a true positive yields a perfect
* recall score.
*
*
* - RMSE
* -
*
* Root mean squared error (RMSE) measures the square root of the squared difference between predicted and
* actual values, and it's averaged over all values. It is used in regression analysis to understand model
* prediction error. It's an important metric to indicate the presence of large model errors and outliers.
* Values range from zero (0) to infinity, with smaller numbers indicating a better model fit to the data.
* RMSE is dependent on scale, and should not be used to compare datasets of different sizes.
*
*
*
*
* If you do not specify a metric explicitly, the default behavior is to automatically use:
*
*
* -
*
* MSE
: for regression.
*
*
* -
*
* F1
: for binary classification
*
*
* -
*
* Accuracy
: for multiclass classification.
*
*
* @see AutoMLMetricEnum
*/
public final AutoMLMetricEnum metricName() {
return AutoMLMetricEnum.fromValue(metricName);
}
/**
*
* The name of the objective metric used to measure the predictive quality of a machine learning system. This metric
* is optimized during training to provide the best estimate for model parameter values from data.
*
*
* Here are the options:
*
*
* - Accuracy
* -
*
* The ratio of the number of correctly classified items to the total number of (correctly and incorrectly)
* classified items. It is used for both binary and multiclass classification. Accuracy measures how close the
* predicted class values are to the actual values. Values for accuracy metrics vary between zero (0) and one (1). A
* value of 1 indicates perfect accuracy, and 0 indicates perfect inaccuracy.
*
*
* - AUC
* -
*
* The area under the curve (AUC) metric is used to compare and evaluate binary classification by algorithms that
* return probabilities, such as logistic regression. To map the probabilities into classifications, these are
* compared against a threshold value.
*
*
* The relevant curve is the receiver operating characteristic curve (ROC curve). The ROC curve plots the true
* positive rate (TPR) of predictions (or recall) against the false positive rate (FPR) as a function of the
* threshold value, above which a prediction is considered positive. Increasing the threshold results in fewer false
* positives, but more false negatives.
*
*
* AUC is the area under this ROC curve. Therefore, AUC provides an aggregated measure of the model performance
* across all possible classification thresholds. AUC scores vary between 0 and 1. A score of 1 indicates perfect
* accuracy, and a score of one half (0.5) indicates that the prediction is not better than a random classifier.
*
*
* - BalancedAccuracy
* -
*
* BalancedAccuracy
is a metric that measures the ratio of accurate predictions to all predictions.
* This ratio is calculated after normalizing true positives (TP) and true negatives (TN) by the total number of
* positive (P) and negative (N) values. It is used in both binary and multiclass classification and is defined as
* follows: 0.5*((TP/P)+(TN/N)), with values ranging from 0 to 1. BalancedAccuracy
gives a better
* measure of accuracy when the number of positives or negatives differ greatly from each other in an imbalanced
* dataset. For example, when only 1% of email is spam.
*
*
* - F1
* -
*
* The F1
score is the harmonic mean of the precision and recall, defined as follows: F1 = 2 *
* (precision * recall) / (precision + recall). It is used for binary classification into classes traditionally
* referred to as positive and negative. Predictions are said to be true when they match their actual (correct)
* class, and false when they do not.
*
*
* Precision is the ratio of the true positive predictions to all positive predictions, and it includes the false
* positives in a dataset. Precision measures the quality of the prediction when it predicts the positive class.
*
*
* Recall (or sensitivity) is the ratio of the true positive predictions to all actual positive instances. Recall
* measures how completely a model predicts the actual class members in a dataset.
*
*
* F1 scores vary between 0 and 1. A score of 1 indicates the best possible performance, and 0 indicates the worst.
*
*
* - F1macro
* -
*
* The F1macro
score applies F1 scoring to multiclass classification problems. It does this by
* calculating the precision and recall, and then taking their harmonic mean to calculate the F1 score for each
* class. Lastly, the F1macro averages the individual scores to obtain the F1macro
score.
* F1macro
scores vary between 0 and 1. A score of 1 indicates the best possible performance, and 0
* indicates the worst.
*
*
* - MAE
* -
*
* The mean absolute error (MAE) is a measure of how different the predicted and actual values are, when they're
* averaged over all values. MAE is commonly used in regression analysis to understand model prediction error. If
* there is linear regression, MAE represents the average distance from a predicted line to the actual value. MAE is
* defined as the sum of absolute errors divided by the number of observations. Values range from 0 to infinity,
* with smaller numbers indicating a better model fit to the data.
*
*
* - MSE
* -
*
* The mean squared error (MSE) is the average of the squared differences between the predicted and actual values.
* It is used for regression. MSE values are always positive. The better a model is at predicting the actual values,
* the smaller the MSE value is
*
*
* - Precision
* -
*
* Precision measures how well an algorithm predicts the true positives (TP) out of all of the positives that it
* identifies. It is defined as follows: Precision = TP/(TP+FP), with values ranging from zero (0) to one (1), and
* is used in binary classification. Precision is an important metric when the cost of a false positive is high. For
* example, the cost of a false positive is very high if an airplane safety system is falsely deemed safe to fly. A
* false positive (FP) reflects a positive prediction that is actually negative in the data.
*
*
* - PrecisionMacro
* -
*
* The precision macro computes precision for multiclass classification problems. It does this by calculating
* precision for each class and averaging scores to obtain precision for several classes.
* PrecisionMacro
scores range from zero (0) to one (1). Higher scores reflect the model's ability to
* predict true positives (TP) out of all of the positives that it identifies, averaged across multiple classes.
*
*
* - R2
* -
*
* R2, also known as the coefficient of determination, is used in regression to quantify how much a model can
* explain the variance of a dependent variable. Values range from one (1) to negative one (-1). Higher numbers
* indicate a higher fraction of explained variability. R2
values close to zero (0) indicate that very
* little of the dependent variable can be explained by the model. Negative values indicate a poor fit and that the
* model is outperformed by a constant function. For linear regression, this is a horizontal line.
*
*
* - Recall
* -
*
* Recall measures how well an algorithm correctly predicts all of the true positives (TP) in a dataset. A true
* positive is a positive prediction that is also an actual positive value in the data. Recall is defined as
* follows: Recall = TP/(TP+FN), with values ranging from 0 to 1. Higher scores reflect a better ability of the
* model to predict true positives (TP) in the data, and is used in binary classification.
*
*
* Recall is important when testing for cancer because it's used to find all of the true positives. A false positive
* (FP) reflects a positive prediction that is actually negative in the data. It is often insufficient to measure
* only recall, because predicting every output as a true positive yield a perfect recall score.
*
*
* - RecallMacro
* -
*
* The RecallMacro computes recall for multiclass classification problems by calculating recall for each class and
* averaging scores to obtain recall for several classes. RecallMacro scores range from 0 to 1. Higher scores
* reflect the model's ability to predict true positives (TP) in a dataset. Whereas, a true positive reflects a
* positive prediction that is also an actual positive value in the data. It is often insufficient to measure only
* recall, because predicting every output as a true positive yields a perfect recall score.
*
*
* - RMSE
* -
*
* Root mean squared error (RMSE) measures the square root of the squared difference between predicted and actual
* values, and it's averaged over all values. It is used in regression analysis to understand model prediction
* error. It's an important metric to indicate the presence of large model errors and outliers. Values range from
* zero (0) to infinity, with smaller numbers indicating a better model fit to the data. RMSE is dependent on scale,
* and should not be used to compare datasets of different sizes.
*
*
*
*
* If you do not specify a metric explicitly, the default behavior is to automatically use:
*
*
* -
*
* MSE
: for regression.
*
*
* -
*
* F1
: for binary classification
*
*
* -
*
* Accuracy
: for multiclass classification.
*
*
*
*
* If the service returns an enum value that is not available in the current SDK version, {@link #metricName} will
* return {@link AutoMLMetricEnum#UNKNOWN_TO_SDK_VERSION}. The raw value returned by the service is available from
* {@link #metricNameAsString}.
*
*
* @return The name of the objective metric used to measure the predictive quality of a machine learning system.
* This metric is optimized during training to provide the best estimate for model parameter values from
* data.
*
* Here are the options:
*
*
* - Accuracy
* -
*
* The ratio of the number of correctly classified items to the total number of (correctly and incorrectly)
* classified items. It is used for both binary and multiclass classification. Accuracy measures how close
* the predicted class values are to the actual values. Values for accuracy metrics vary between zero (0)
* and one (1). A value of 1 indicates perfect accuracy, and 0 indicates perfect inaccuracy.
*
*
* - AUC
* -
*
* The area under the curve (AUC) metric is used to compare and evaluate binary classification by algorithms
* that return probabilities, such as logistic regression. To map the probabilities into classifications,
* these are compared against a threshold value.
*
*
* The relevant curve is the receiver operating characteristic curve (ROC curve). The ROC curve plots the
* true positive rate (TPR) of predictions (or recall) against the false positive rate (FPR) as a function
* of the threshold value, above which a prediction is considered positive. Increasing the threshold results
* in fewer false positives, but more false negatives.
*
*
* AUC is the area under this ROC curve. Therefore, AUC provides an aggregated measure of the model
* performance across all possible classification thresholds. AUC scores vary between 0 and 1. A score of 1
* indicates perfect accuracy, and a score of one half (0.5) indicates that the prediction is not better
* than a random classifier.
*
*
* - BalancedAccuracy
* -
*
* BalancedAccuracy
is a metric that measures the ratio of accurate predictions to all
* predictions. This ratio is calculated after normalizing true positives (TP) and true negatives (TN) by
* the total number of positive (P) and negative (N) values. It is used in both binary and multiclass
* classification and is defined as follows: 0.5*((TP/P)+(TN/N)), with values ranging from 0 to 1.
* BalancedAccuracy
gives a better measure of accuracy when the number of positives or
* negatives differ greatly from each other in an imbalanced dataset. For example, when only 1% of email is
* spam.
*
*
* - F1
* -
*
* The F1
score is the harmonic mean of the precision and recall, defined as follows: F1 = 2 *
* (precision * recall) / (precision + recall). It is used for binary classification into classes
* traditionally referred to as positive and negative. Predictions are said to be true when they match their
* actual (correct) class, and false when they do not.
*
*
* Precision is the ratio of the true positive predictions to all positive predictions, and it includes the
* false positives in a dataset. Precision measures the quality of the prediction when it predicts the
* positive class.
*
*
* Recall (or sensitivity) is the ratio of the true positive predictions to all actual positive instances.
* Recall measures how completely a model predicts the actual class members in a dataset.
*
*
* F1 scores vary between 0 and 1. A score of 1 indicates the best possible performance, and 0 indicates the
* worst.
*
*
* - F1macro
* -
*
* The F1macro
score applies F1 scoring to multiclass classification problems. It does this by
* calculating the precision and recall, and then taking their harmonic mean to calculate the F1 score for
* each class. Lastly, the F1macro averages the individual scores to obtain the F1macro
score.
* F1macro
scores vary between 0 and 1. A score of 1 indicates the best possible performance,
* and 0 indicates the worst.
*
*
* - MAE
* -
*
* The mean absolute error (MAE) is a measure of how different the predicted and actual values are, when
* they're averaged over all values. MAE is commonly used in regression analysis to understand model
* prediction error. If there is linear regression, MAE represents the average distance from a predicted
* line to the actual value. MAE is defined as the sum of absolute errors divided by the number of
* observations. Values range from 0 to infinity, with smaller numbers indicating a better model fit to the
* data.
*
*
* - MSE
* -
*
* The mean squared error (MSE) is the average of the squared differences between the predicted and actual
* values. It is used for regression. MSE values are always positive. The better a model is at predicting
* the actual values, the smaller the MSE value is
*
*
* - Precision
* -
*
* Precision measures how well an algorithm predicts the true positives (TP) out of all of the positives
* that it identifies. It is defined as follows: Precision = TP/(TP+FP), with values ranging from zero (0)
* to one (1), and is used in binary classification. Precision is an important metric when the cost of a
* false positive is high. For example, the cost of a false positive is very high if an airplane safety
* system is falsely deemed safe to fly. A false positive (FP) reflects a positive prediction that is
* actually negative in the data.
*
*
* - PrecisionMacro
* -
*
* The precision macro computes precision for multiclass classification problems. It does this by
* calculating precision for each class and averaging scores to obtain precision for several classes.
* PrecisionMacro
scores range from zero (0) to one (1). Higher scores reflect the model's
* ability to predict true positives (TP) out of all of the positives that it identifies, averaged across
* multiple classes.
*
*
* - R2
* -
*
* R2, also known as the coefficient of determination, is used in regression to quantify how much a model
* can explain the variance of a dependent variable. Values range from one (1) to negative one (-1). Higher
* numbers indicate a higher fraction of explained variability. R2
values close to zero (0)
* indicate that very little of the dependent variable can be explained by the model. Negative values
* indicate a poor fit and that the model is outperformed by a constant function. For linear regression,
* this is a horizontal line.
*
*
* - Recall
* -
*
* Recall measures how well an algorithm correctly predicts all of the true positives (TP) in a dataset. A
* true positive is a positive prediction that is also an actual positive value in the data. Recall is
* defined as follows: Recall = TP/(TP+FN), with values ranging from 0 to 1. Higher scores reflect a better
* ability of the model to predict true positives (TP) in the data, and is used in binary classification.
*
*
* Recall is important when testing for cancer because it's used to find all of the true positives. A false
* positive (FP) reflects a positive prediction that is actually negative in the data. It is often
* insufficient to measure only recall, because predicting every output as a true positive yield a perfect
* recall score.
*
*
* - RecallMacro
* -
*
* The RecallMacro computes recall for multiclass classification problems by calculating recall for each
* class and averaging scores to obtain recall for several classes. RecallMacro scores range from 0 to 1.
* Higher scores reflect the model's ability to predict true positives (TP) in a dataset. Whereas, a true
* positive reflects a positive prediction that is also an actual positive value in the data. It is often
* insufficient to measure only recall, because predicting every output as a true positive yields a perfect
* recall score.
*
*
* - RMSE
* -
*
* Root mean squared error (RMSE) measures the square root of the squared difference between predicted and
* actual values, and it's averaged over all values. It is used in regression analysis to understand model
* prediction error. It's an important metric to indicate the presence of large model errors and outliers.
* Values range from zero (0) to infinity, with smaller numbers indicating a better model fit to the data.
* RMSE is dependent on scale, and should not be used to compare datasets of different sizes.
*
*
*
*
* If you do not specify a metric explicitly, the default behavior is to automatically use:
*
*
* -
*
* MSE
: for regression.
*
*
* -
*
* F1
: for binary classification
*
*
* -
*
* Accuracy
: for multiclass classification.
*
*
* @see AutoMLMetricEnum
*/
public final String metricNameAsString() {
return metricName;
}
@Override
public Builder toBuilder() {
return new BuilderImpl(this);
}
public static Builder builder() {
return new BuilderImpl();
}
public static Class extends Builder> serializableBuilderClass() {
return BuilderImpl.class;
}
@Override
public final int hashCode() {
int hashCode = 1;
hashCode = 31 * hashCode + Objects.hashCode(metricNameAsString());
return hashCode;
}
@Override
public final boolean equals(Object obj) {
return equalsBySdkFields(obj);
}
@Override
public final boolean equalsBySdkFields(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (!(obj instanceof AutoMLJobObjective)) {
return false;
}
AutoMLJobObjective other = (AutoMLJobObjective) obj;
return Objects.equals(metricNameAsString(), other.metricNameAsString());
}
/**
* Returns a string representation of this object. This is useful for testing and debugging. Sensitive data will be
* redacted from this string using a placeholder value.
*/
@Override
public final String toString() {
return ToString.builder("AutoMLJobObjective").add("MetricName", metricNameAsString()).build();
}
public final Optional getValueForField(String fieldName, Class clazz) {
switch (fieldName) {
case "MetricName":
return Optional.ofNullable(clazz.cast(metricNameAsString()));
default:
return Optional.empty();
}
}
@Override
public final List> sdkFields() {
return SDK_FIELDS;
}
private static Function