All Downloads are FREE. Search and download functionalities are using the official Maven repository.

google.cloud.automl.v1beta1.data_stats.proto Maven / Gradle / Ivy

There is a newer version: 0.141.0
Show newest version
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.automl.v1beta1;


option go_package = "cloud.google.com/go/automl/apiv1beta1/automlpb;automlpb";
option java_multiple_files = true;
option java_package = "com.google.cloud.automl.v1beta1";
option php_namespace = "Google\\Cloud\\AutoMl\\V1beta1";
option ruby_package = "Google::Cloud::AutoML::V1beta1";

// The data statistics of a series of values that share the same DataType.
message DataStats {
  // The data statistics specific to a DataType.
  oneof stats {
    // The statistics for FLOAT64 DataType.
    Float64Stats float64_stats = 3;

    // The statistics for STRING DataType.
    StringStats string_stats = 4;

    // The statistics for TIMESTAMP DataType.
    TimestampStats timestamp_stats = 5;

    // The statistics for ARRAY DataType.
    ArrayStats array_stats = 6;

    // The statistics for STRUCT DataType.
    StructStats struct_stats = 7;

    // The statistics for CATEGORY DataType.
    CategoryStats category_stats = 8;
  }

  // The number of distinct values.
  int64 distinct_value_count = 1;

  // The number of values that are null.
  int64 null_value_count = 2;

  // The number of values that are valid.
  int64 valid_value_count = 9;
}

// The data statistics of a series of FLOAT64 values.
message Float64Stats {
  // A bucket of a histogram.
  message HistogramBucket {
    // The minimum value of the bucket, inclusive.
    double min = 1;

    // The maximum value of the bucket, exclusive unless max = `"Infinity"`, in
    // which case it's inclusive.
    double max = 2;

    // The number of data values that are in the bucket, i.e. are between
    // min and max values.
    int64 count = 3;
  }

  // The mean of the series.
  double mean = 1;

  // The standard deviation of the series.
  double standard_deviation = 2;

  // Ordered from 0 to k k-quantile values of the data series of n values.
  // The value at index i is, approximately, the i*n/k-th smallest value in the
  // series; for i = 0 and i = k these are, respectively, the min and max
  // values.
  repeated double quantiles = 3;

  // Histogram buckets of the data series. Sorted by the min value of the
  // bucket, ascendingly, and the number of the buckets is dynamically
  // generated. The buckets are non-overlapping and completely cover whole
  // FLOAT64 range with min of first bucket being `"-Infinity"`, and max of
  // the last one being `"Infinity"`.
  repeated HistogramBucket histogram_buckets = 4;
}

// The data statistics of a series of STRING values.
message StringStats {
  // The statistics of a unigram.
  message UnigramStats {
    // The unigram.
    string value = 1;

    // The number of occurrences of this unigram in the series.
    int64 count = 2;
  }

  // The statistics of the top 20 unigrams, ordered by
  // [count][google.cloud.automl.v1beta1.StringStats.UnigramStats.count].
  repeated UnigramStats top_unigram_stats = 1;
}

// The data statistics of a series of TIMESTAMP values.
message TimestampStats {
  // Stats split by a defined in context granularity.
  message GranularStats {
    // A map from granularity key to example count for that key.
    // E.g. for hour_of_day `13` means 1pm, or for month_of_year `5` means May).
    map buckets = 1;
  }

  // The string key is the pre-defined granularity. Currently supported:
  // hour_of_day, day_of_week, month_of_year.
  // Granularities finer that the granularity of timestamp data are not
  // populated (e.g. if timestamps are at day granularity, then hour_of_day
  // is not populated).
  map granular_stats = 1;
}

// The data statistics of a series of ARRAY values.
message ArrayStats {
  // Stats of all the values of all arrays, as if they were a single long
  // series of data. The type depends on the element type of the array.
  DataStats member_stats = 2;
}

// The data statistics of a series of STRUCT values.
message StructStats {
  // Map from a field name of the struct to data stats aggregated over series
  // of all data in that field across all the structs.
  map field_stats = 1;
}

// The data statistics of a series of CATEGORY values.
message CategoryStats {
  // The statistics of a single CATEGORY value.
  message SingleCategoryStats {
    // The CATEGORY value.
    string value = 1;

    // The number of occurrences of this value in the series.
    int64 count = 2;
  }

  // The statistics of the top 20 CATEGORY values, ordered by
  //
  // [count][google.cloud.automl.v1beta1.CategoryStats.SingleCategoryStats.count].
  repeated SingleCategoryStats top_category_stats = 1;
}

// A correlation statistics between two series of DataType values. The series
// may have differing DataType-s, but within a single series the DataType must
// be the same.
message CorrelationStats {
  // The correlation value using the Cramer's V measure.
  double cramers_v = 1;
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy