tensorflow_metadata.proto.v0.problem_statement.proto Maven / Gradle / Ivy
The newest version!
// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
syntax = "proto3";
package tensorflow.metadata.v0;
import "google/protobuf/descriptor.proto";
import "tensorflow_metadata/proto/v0/metric.proto";
import "tensorflow_metadata/proto/v0/path.proto";
option cc_enable_arenas = true;
option java_package = "org.tensorflow.metadata.v0";
option java_multiple_files = true;
enum TaskType {
UNKNOWN_TYPE = 0;
BINARY_CLASSIFICATION = 1;
MULTI_CLASS_CLASSIFICATION = 2;
TOP_K_CLASSIFICATION = 3;
ONE_DIMENSIONAL_REGRESSION = 4;
MULTI_LABEL_CLASSIFICATION = 5;
}
extend google.protobuf.MessageOptions {
TaskType task_type = 241943395;
}
// Configuration for a binary classification task.
// The output is one of two possible class labels, encoded as the same type
// as the label column.
// BinaryClassification is the same as MultiClassClassification with
// n_classes = 2.
message BinaryClassification {
option (task_type) = BINARY_CLASSIFICATION;
// The label column.
oneof label_id {
// The name of the label. Assumes the label is a flat, top-level field.
string label = 1;
// A path can be used instead of a flat string if the label is nested.
Path label_path = 3;
}
// (optional) The weight column.
string example_weight = 2;
// Defines which label value is the positive and/or negative class.
message PositiveNegativeSpec {
// Specifies a label's value which can be used for positive/negative class
// specification.
message LabelValue {
oneof value_type {
string string_value = 1;
}
}
// This value is the positive class.
LabelValue positive_class_value = 1;
// This value is the negative class.
LabelValue negative_class_value = 2;
}
// (optional) specification of the positive and/or negative class value.
PositiveNegativeSpec positive_negative_spec = 4;
}
// Specifies a dynamic multiclass/multi-label problem where the number of label
// classes is inferred from the data.
message DynamicClassSpec {
// Note: it is up to a solution provider to implement support for OOV labels.
// Note: both a frequency_threshold and a top_k may be set. A class is grouped
// into the OOV class if it fails to meet either of the criteria below.
message OovClassSpec {
// If set, labels are grouped into the "OOV" class if they occur less than
// frequency_threshold times in the training dataset. If 0, labels
// that appear in test / validation splits but not in training would be
// still classified as the "OOV" class.
int64 frequency_threshold = 1;
// If set, only the top_k labels in the training set are used and all others
// are grouped into an "OOV" class.
int64 top_k = 2;
}
// Optional. If specified, an Out-Of-Vocabulary (OOV) class is created and
// populated based on frequencies in the training set. If no OOV class is
// specified, the model's label vocabulary should consist of all labels that
// appear in the training set.
OovClassSpec oov_class_spec = 1;
}
// Configuration for a multi-class classification task.
// In this problem type, there are n_classes possible label values, and the
// model predicts a single label.
// The output is one of the class labels, out of n_classes possible classes.
// The output type will correspond to the label column type.
message MultiClassClassification {
option (task_type) = MULTI_CLASS_CLASSIFICATION;
// The label column. There's only a single label per example.
// If the label column is a BoolDomain, use the BinaryClassification Type
// instead.
oneof label_id {
// The name of the label. Assumes the label is a flat, top-level field.
string label = 1;
// A path can be used instead of a flat string if the label is nested.
Path label_path = 5;
}
// The weight column.
string example_weight = 2;
oneof class_spec {
// The exact number of label classes.
uint64 n_classes = 3;
// The number of label classes that should be inferred dynamically from the
// data.
DynamicClassSpec dynamic_class_spec = 4;
}
}
// Configuration for a multi-label classification task.
// In this problem type there are n_classes unique possible label values
// overall. There can be from zero up to n_classes unique labels per example.
// The output, which is of type real number, is class probabilities associated
// with each class. It will be of n_classes dimension for each example, if
// n_classes is specified. Otherwise, the dimension will be set to the number
// of unique class labels that are dynamically inferred from the data based on
// dynamic_class_spec.
message MultiLabelClassification {
option (task_type) = MULTI_LABEL_CLASSIFICATION;
// The label column. There can be one or more labels per example.
oneof label_id {
// The name of the label. Assumes the label is a flat, top-level field.
string label = 1;
// A path can be used instead of a flat string if the label is nested.
Path label_path = 5;
}
// The weight column.
string example_weight = 2;
oneof class_spec {
// The exact number of unique class labels.
uint64 n_classes = 3;
// The maximal number of label classes that should be inferred dynamically
// from the data.
DynamicClassSpec dynamic_class_spec = 4;
}
}
// Configuration for a top-K classification task.
// In this problem type, there are n_classes possible label values, and the
// model predicts n_predicted_labels labels.
// The output is a sequence of n_predicted_labels labels, out of n_classes
// possible classes. The order of the predicted output labels is determined
// by the predictions_order field.
// (*) MultiClassClassification is the same as TopKClassification with
// n_predicted_labels = 1.
// (*) TopKClassification does NOT mean multi-class multi-label classification:
// e.g., the output contains a sequence of labels, all coming from the same
// label column in the data.
message TopKClassification {
option (task_type) = TOP_K_CLASSIFICATION;
// The label column.
oneof label_id {
// The name of the label. Assumes the label is a flat, top-level field.
string label = 1;
// A path can be used instead of a flat string if the label is nested.
Path label_path = 6;
}
// (optional) The weight column.
string example_weight = 2;
// (optional) The number of label classes. If unset, the solution provider
// is expected to infer the number of classes from the data.
uint64 n_classes = 3;
// (optional) The number of class labels to predict. If unset, we assume 1.
uint64 n_predicted_labels = 4;
enum Order {
UNSPECIFIED = 0;
// Predictions are ordered from the most likely to least likely.
SCORE_DESC = 1;
// Predictions are ordered from the least likely to most likely.
SCORE_ASC = 2;
}
Order predictions_order = 5;
}
// A one-dimensional regression task.
// The output is a single real number, whose range is dependent upon the
// objective.
message OneDimensionalRegression {
option (task_type) = ONE_DIMENSIONAL_REGRESSION;
// The label column.
oneof label_id { // oneof label_id is required.
// The name of the label. Assumes the label is a flat, top-level field.
string label = 1;
// A path can be used instead of a flat string if the label is nested.
Path label_path = 3;
}
// (optional) The weight column.
string weight = 2;
// Defines a regression problem where labels are in [0, 1] and represent a
// probability (e.g: probability of click).
message Probability {}
// Defines a regression problem where the labels are counts i.e. integers >=0.
message Counts {}
oneof label_type {
// When set means the label is a probability in range [0..1].
Probability probability = 4;
// When set the label corresponds to counts from a poisson distribution.
// Eg: Number of googlers contributing to memegen each year.
Counts counts = 5;
}
}
// The type of a head or meta-objective. Specifies the label, weight,
// and output type of the head.
// TODO(martinz): add logistic regression.
message Type {
oneof task_type {
BinaryClassification binary_classification = 1;
OneDimensionalRegression one_dimensional_regression = 2;
MultiClassClassification multi_class_classification = 3;
TopKClassification top_k_classification = 4;
MultiLabelClassification multi_label_classification = 5;
}
}
// Describes a single task in a model and all its properties.
// A task corresponds to a single output of the model.
// Multiple tasks in the same problem statement correspond to different outputs
// of the model.
message Task {
reserved 3;
// Specification of the label and weight columns, and the type of the
// prediction or classification.
Type type = 1;
// The task name. Tasks within the same ProblemStatement should have unique
// names. This a REQUIRED field in case of multi-task learning problems.
string name = 5;
// If a Problem is composed of mulitple sub-tasks, the weight of each task
// determines the importance of solving each sub-task. It is used to
// rank and select the best solution for multi-task problems.
// Not meaningful for a problem with one task.
// If the problem has multiple tasks and all task_weight=0 (unset) then all
// tasks are weighted equally.
double task_weight = 2 [deprecated = true];
// This field includes performance metrics of this head that are important to
// the problem owner and need to be monitored and reported. However, unlike
// fields such as "meta_optimization_target", these metrics are not
// not automatically used in meta-optimization.
repeated PerformanceMetric performance_metric = 4;
// True to indicate the task is an auxiliary task in a multi-task setting.
// Auxiliary tasks are of minor relevance for the application and they are
// added only to improve the performance on a primary task (by providing
// additional regularization or data augmentation), and thus are not
// considered in the meta optimization process (but may be utilized in the
// learner optimization).
bool is_auxiliary = 6;
}
// The high-level objectives described by this problem statement. These
// objectives provide a basis for ranking models and can be optimized by a meta
// optimizer (e.g. a grid search over hyperparameters). A solution provider may
// also directly use the meta optimization targets to heuristically select
// losses, etc without any meta-optimization process. If not specified, the
// high-level meta optimization target is inferred from the task. These
// objectives do not need to be differentiable, as the solution provider may use
// proxy function to optimize model weights. Target definitions include tasks,
// metrics, and any weighted combination of them.
message MetaOptimizationTarget {
reserved 2;
// The name of a task in this problem statement producing the
// prediction or classification for the metric.
string task_name = 1;
// The performance metric to be evaluated.
// The prediction or classification is based upon the task.
// The label is from the type of the task, or from the override_task.
PerformanceMetric performance_metric = 3;
// Configuration for thresholded meta-optimization targets.
message ThresholdConfig {
oneof type {
// If specified, indicates a threshold that the user wishes the metric to
// stay under (for MINIMIZE type), or above (for MAXIMIZE type). The
// optimization process need not prefer models that are higher (or lower)
// on the thresholded metric so long as the threshold is respected.
// E.g., if `threshold` for a MAXIMIZE type metric X is .9, the
// optimization process will prefer a solution with X = .92 over a
// solution with X = .88, but may not prefer a solution with X = .95 over
// a solution with X = .92. Unless otherwise specified by the
// PerformanceMetric, threshold is best effort. It does not provide a hard
// guarantee about the properties of the final model, but rather serves as
// a "target" to guide the optimization process. The user is responsible
// for validating that final model metrics are in an acceptable range for
// the application. A problem statement may, however, be rejected if the
// specified target is impossible to achieve. Keep this in mind if running
// the optimization on a recurring basis, as shifts in the data could push
// a previously achievable target to being unachievable (and thus yield no
// solution). The units and range for the threshold will be the same as
// the valid output range of the associated performance_metric.
double threshold = 1;
}
}
// Describes how to combine with other objectives.
oneof objective_combination {
// If a model spec has multiple meta optimization targets, the weight
// of each can be specified. The final objective is then a weighted
// combination of the multiple objectives. If not specified, value is 1.
double weight = 4 [deprecated = true];
// Secondary meta optimization targets can be thresholded, meaning that the
// optimization process prefers solutions above (or below) the threshold,
// but need not prefer solutions higher (or lower) on the metric if the
// threshold is met.
ThresholdConfig threshold_config = 5;
}
}
message ProblemStatement {
// Description of the problem statement. For example, should describe how
// the problem statement was arrived at: what experiments were run, what
// side-by-sides were considered.
string description = 2;
repeated string owner = 3;
// The environment of the ProblemStatement (optional). Specifies an
// environment string in the SchemaProto.
string environment = 4;
// The target used for meta-optimization. This is used to compare multiple
// solutions for this problem. For example, if two solutions have different
// candidates, a tuning tool can use meta_optimization_target to decide which
// candidate performs the best.
// A repeated meta-optimization target implies the weighted sum of the
// meta_optimization targets of any non-thresholded metrics.
repeated MetaOptimizationTarget meta_optimization_target = 7;
bool multi_objective = 8 [deprecated = true];
reserved 5;
// Tasks for heads of the generated model. This field is repeated because some
// models are multi-task models. Each task should have a unique name.
// If you wish to directly optimize this problem statement, you need
// to specify the objective in the task.
repeated Task tasks = 9;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy