Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.dataplex.v1;
import "google/api/field_behavior.proto";
import "google/cloud/dataplex/v1/processing.proto";
option go_package = "cloud.google.com/go/dataplex/apiv1/dataplexpb;dataplexpb";
option java_multiple_files = true;
option java_outer_classname = "DataProfileProto";
option java_package = "com.google.cloud.dataplex.v1";
// DataProfileScan related setting.
message DataProfileSpec {
// The configuration of post scan actions of DataProfileScan job.
message PostScanActions {
// The configuration of BigQuery export post scan action.
message BigQueryExport {
// Optional. The BigQuery table to export DataProfileScan results to.
// Format:
// //bigquery.googleapis.com/projects/PROJECT_ID/datasets/DATASET_ID/tables/TABLE_ID
string results_table = 1 [(google.api.field_behavior) = OPTIONAL];
}
// Optional. If set, results will be exported to the provided BigQuery
// table.
BigQueryExport bigquery_export = 1 [(google.api.field_behavior) = OPTIONAL];
}
// The specification for fields to include or exclude in data profile scan.
message SelectedFields {
// Optional. Expected input is a list of fully qualified names of fields as
// in the schema.
//
// Only top-level field names for nested fields are supported.
// For instance, if 'x' is of nested field type, listing 'x' is supported
// but 'x.y.z' is not supported. Here 'y' and 'y.z' are nested fields of
// 'x'.
repeated string field_names = 1 [(google.api.field_behavior) = OPTIONAL];
}
// Optional. The percentage of the records to be selected from the dataset for
// DataScan.
//
// * Value can range between 0.0 and 100.0 with up to 3 significant decimal
// digits.
// * Sampling is not applied if `sampling_percent` is not specified, 0 or
// 100.
float sampling_percent = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. A filter applied to all rows in a single DataScan job.
// The filter needs to be a valid SQL expression for a WHERE clause in
// BigQuery standard SQL syntax.
// Example: col1 >= 0 AND col2 < 10
string row_filter = 3 [(google.api.field_behavior) = OPTIONAL];
// Optional. Actions to take upon job completion..
PostScanActions post_scan_actions = 4
[(google.api.field_behavior) = OPTIONAL];
// Optional. The fields to include in data profile.
//
// If not specified, all fields at the time of profile scan job execution are
// included, except for ones listed in `exclude_fields`.
SelectedFields include_fields = 5 [(google.api.field_behavior) = OPTIONAL];
// Optional. The fields to exclude from data profile.
//
// If specified, the fields will be excluded from data profile, regardless of
// `include_fields` value.
SelectedFields exclude_fields = 6 [(google.api.field_behavior) = OPTIONAL];
}
// DataProfileResult defines the output of DataProfileScan. Each field of the
// table will have field type specific profile result.
message DataProfileResult {
// Contains name, type, mode and field type specific profile information.
message Profile {
// A field within a table.
message Field {
// The profile information for each field type.
message ProfileInfo {
// The profile information for a string type field.
message StringFieldInfo {
// Minimum length of non-null values in the scanned data.
int64 min_length = 1;
// Maximum length of non-null values in the scanned data.
int64 max_length = 2;
// Average length of non-null values in the scanned data.
double average_length = 3;
}
// The profile information for an integer type field.
message IntegerFieldInfo {
// Average of non-null values in the scanned data. NaN, if the field
// has a NaN.
double average = 1;
// Standard deviation of non-null values in the scanned data. NaN, if
// the field has a NaN.
double standard_deviation = 3;
// Minimum of non-null values in the scanned data. NaN, if the field
// has a NaN.
int64 min = 4;
// A quartile divides the number of data points into four parts, or
// quarters, of more-or-less equal size. Three main quartiles used
// are: The first quartile (Q1) splits off the lowest 25% of data from
// the highest 75%. It is also known as the lower or 25th empirical
// quartile, as 25% of the data is below this point. The second
// quartile (Q2) is the median of a data set. So, 50% of the data lies
// below this point. The third quartile (Q3) splits off the highest
// 25% of data from the lowest 75%. It is known as the upper or 75th
// empirical quartile, as 75% of the data lies below this point.
// Here, the quartiles is provided as an ordered list of approximate
// quartile values for the scanned data, occurring in order Q1,
// median, Q3.
repeated int64 quartiles = 6;
// Maximum of non-null values in the scanned data. NaN, if the field
// has a NaN.
int64 max = 5;
}
// The profile information for a double type field.
message DoubleFieldInfo {
// Average of non-null values in the scanned data. NaN, if the field
// has a NaN.
double average = 1;
// Standard deviation of non-null values in the scanned data. NaN, if
// the field has a NaN.
double standard_deviation = 3;
// Minimum of non-null values in the scanned data. NaN, if the field
// has a NaN.
double min = 4;
// A quartile divides the number of data points into four parts, or
// quarters, of more-or-less equal size. Three main quartiles used
// are: The first quartile (Q1) splits off the lowest 25% of data from
// the highest 75%. It is also known as the lower or 25th empirical
// quartile, as 25% of the data is below this point. The second
// quartile (Q2) is the median of a data set. So, 50% of the data lies
// below this point. The third quartile (Q3) splits off the highest
// 25% of data from the lowest 75%. It is known as the upper or 75th
// empirical quartile, as 75% of the data lies below this point.
// Here, the quartiles is provided as an ordered list of quartile
// values for the scanned data, occurring in order Q1, median, Q3.
repeated double quartiles = 6;
// Maximum of non-null values in the scanned data. NaN, if the field
// has a NaN.
double max = 5;
}
// Top N non-null values in the scanned data.
message TopNValue {
// String value of a top N non-null value.
string value = 1;
// Count of the corresponding value in the scanned data.
int64 count = 2;
// Ratio of the corresponding value in the field against the total
// number of rows in the scanned data.
double ratio = 3;
}
// Ratio of rows with null value against total scanned rows.
double null_ratio = 2;
// Ratio of rows with distinct values against total scanned rows.
// Not available for complex non-groupable field type RECORD and fields
// with REPEATABLE mode.
double distinct_ratio = 3;
// The list of top N non-null values, frequency and ratio with which
// they occur in the scanned data. N is 10 or equal to the number of
// distinct values in the field, whichever is smaller. Not available for
// complex non-groupable field type RECORD and fields with REPEATABLE
// mode.
repeated TopNValue top_n_values = 4;
// Structural and profile information for specific field type. Not
// available, if mode is REPEATABLE.
oneof field_info {
// String type field information.
StringFieldInfo string_profile = 101;
// Integer type field information.
IntegerFieldInfo integer_profile = 102;
// Double type field information.
DoubleFieldInfo double_profile = 103;
}
}
// The name of the field.
string name = 1;
// The data type retrieved from the schema of the data source. For
// instance, for a BigQuery native table, it is the [BigQuery Table
// Schema](https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#tablefieldschema).
// For a Dataplex Entity, it is the [Entity
// Schema](https://cloud.google.com/dataplex/docs/reference/rpc/google.cloud.dataplex.v1#type_3).
string type = 2;
// The mode of the field. Possible values include:
//
// * REQUIRED, if it is a required field.
// * NULLABLE, if it is an optional field.
// * REPEATED, if it is a repeated field.
string mode = 3;
// Profile information for the corresponding field.
ProfileInfo profile = 4;
}
// List of fields with structural and profile information for each field.
repeated Field fields = 2;
}
// The result of post scan actions of DataProfileScan job.
message PostScanActionsResult {
// The result of BigQuery export post scan action.
message BigQueryExportResult {
// Execution state for the exporting.
enum State {
// The exporting state is unspecified.
STATE_UNSPECIFIED = 0;
// The exporting completed successfully.
SUCCEEDED = 1;
// The exporting is no longer running due to an error.
FAILED = 2;
// The exporting is skipped due to no valid scan result to export
// (usually caused by scan failed).
SKIPPED = 3;
}
// Output only. Execution state for the BigQuery exporting.
State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Additional information about the BigQuery exporting.
string message = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
}
// Output only. The result of BigQuery export post scan action.
BigQueryExportResult bigquery_export_result = 1
[(google.api.field_behavior) = OUTPUT_ONLY];
}
// The count of rows scanned.
int64 row_count = 3;
// The profile information per field.
Profile profile = 4;
// The data scanned for this result.
ScannedData scanned_data = 5;
// Output only. The result of post scan actions.
PostScanActionsResult post_scan_actions_result = 6
[(google.api.field_behavior) = OUTPUT_ONLY];
}