All Downloads are FREE. Search and download functionalities are using the official Maven repository.

google.cloud.dataplex.v1.datascans.proto Maven / Gradle / Ivy

// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.dataplex.v1;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/dataplex/v1/data_profile.proto";
import "google/cloud/dataplex/v1/data_quality.proto";
import "google/cloud/dataplex/v1/processing.proto";
import "google/cloud/dataplex/v1/resources.proto";
import "google/cloud/dataplex/v1/service.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";

option go_package = "cloud.google.com/go/dataplex/apiv1/dataplexpb;dataplexpb";
option java_multiple_files = true;
option java_outer_classname = "DataScansProto";
option java_package = "com.google.cloud.dataplex.v1";

// DataScanService manages DataScan resources which can be configured to run
// various types of data scanning workload and generate enriched metadata (e.g.
// Data Profile, Data Quality) for the data source.
service DataScanService {
  option (google.api.default_host) = "dataplex.googleapis.com";
  option (google.api.oauth_scopes) =
      "https://www.googleapis.com/auth/cloud-platform";

  // Creates a DataScan resource.
  rpc CreateDataScan(CreateDataScanRequest)
      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1/{parent=projects/*/locations/*}/dataScans"
      body: "data_scan"
    };
    option (google.api.method_signature) = "parent,data_scan,data_scan_id";
    option (google.longrunning.operation_info) = {
      response_type: "DataScan"
      metadata_type: "OperationMetadata"
    };
  }

  // Updates a DataScan resource.
  rpc UpdateDataScan(UpdateDataScanRequest)
      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      patch: "/v1/{data_scan.name=projects/*/locations/*/dataScans/*}"
      body: "data_scan"
    };
    option (google.api.method_signature) = "data_scan,update_mask";
    option (google.longrunning.operation_info) = {
      response_type: "DataScan"
      metadata_type: "OperationMetadata"
    };
  }

  // Deletes a DataScan resource.
  rpc DeleteDataScan(DeleteDataScanRequest)
      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      delete: "/v1/{name=projects/*/locations/*/dataScans/*}"
    };
    option (google.api.method_signature) = "name";
    option (google.longrunning.operation_info) = {
      response_type: "google.protobuf.Empty"
      metadata_type: "OperationMetadata"
    };
  }

  // Gets a DataScan resource.
  rpc GetDataScan(GetDataScanRequest) returns (DataScan) {
    option (google.api.http) = {
      get: "/v1/{name=projects/*/locations/*/dataScans/*}"
    };
    option (google.api.method_signature) = "name";
  }

  // Lists DataScans.
  rpc ListDataScans(ListDataScansRequest) returns (ListDataScansResponse) {
    option (google.api.http) = {
      get: "/v1/{parent=projects/*/locations/*}/dataScans"
    };
    option (google.api.method_signature) = "parent";
  }

  // Runs an on-demand execution of a DataScan
  rpc RunDataScan(RunDataScanRequest) returns (RunDataScanResponse) {
    option (google.api.http) = {
      post: "/v1/{name=projects/*/locations/*/dataScans/*}:run"
      body: "*"
    };
    option (google.api.method_signature) = "name";
  }

  // Gets a DataScanJob resource.
  rpc GetDataScanJob(GetDataScanJobRequest) returns (DataScanJob) {
    option (google.api.http) = {
      get: "/v1/{name=projects/*/locations/*/dataScans/*/jobs/*}"
    };
    option (google.api.method_signature) = "name";
  }

  // Lists DataScanJobs under the given DataScan.
  rpc ListDataScanJobs(ListDataScanJobsRequest)
      returns (ListDataScanJobsResponse) {
    option (google.api.http) = {
      get: "/v1/{parent=projects/*/locations/*/dataScans/*}/jobs"
    };
    option (google.api.method_signature) = "parent";
  }

  // Generates recommended data quality rules based on the results of a data
  // profiling scan.
  //
  // Use the recommendations to build rules for a data quality scan.
  rpc GenerateDataQualityRules(GenerateDataQualityRulesRequest)
      returns (GenerateDataQualityRulesResponse) {
    option (google.api.http) = {
      post: "/v1/{name=projects/*/locations/*/dataScans/*}:generateDataQualityRules"
      body: "*"
      additional_bindings {
        post: "/v1/{name=projects/*/locations/*/dataScans/*/jobs/*}:generateDataQualityRules"
        body: "*"
      }
    };
    option (google.api.method_signature) = "name";
  }
}

// Create dataScan request.
message CreateDataScanRequest {
  // Required. The resource name of the parent location:
  // `projects/{project}/locations/{location_id}`
  // where `project` refers to a *project_id* or *project_number* and
  // `location_id` refers to a GCP region.
  string parent = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "locations.googleapis.com/Location"
    }
  ];

  // Required. DataScan resource.
  DataScan data_scan = 2 [(google.api.field_behavior) = REQUIRED];

  // Required. DataScan identifier.
  //
  // * Must contain only lowercase letters, numbers and hyphens.
  // * Must start with a letter.
  // * Must end with a number or a letter.
  // * Must be between 1-63 characters.
  // * Must be unique within the customer project / location.
  string data_scan_id = 3 [(google.api.field_behavior) = REQUIRED];

  // Optional. Only validate the request, but do not perform mutations.
  // The default is `false`.
  bool validate_only = 4 [(google.api.field_behavior) = OPTIONAL];
}

// Update dataScan request.
message UpdateDataScanRequest {
  // Required. DataScan resource to be updated.
  //
  // Only fields specified in `update_mask` are updated.
  DataScan data_scan = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. Mask of fields to update.
  google.protobuf.FieldMask update_mask = 2
      [(google.api.field_behavior) = REQUIRED];

  // Optional. Only validate the request, but do not perform mutations.
  // The default is `false`.
  bool validate_only = 3 [(google.api.field_behavior) = OPTIONAL];
}

// Delete dataScan request.
message DeleteDataScanRequest {
  // Required. The resource name of the dataScan:
  // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}`
  // where `project` refers to a *project_id* or *project_number* and
  // `location_id` refers to a GCP region.
  string name = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "dataplex.googleapis.com/DataScan"
    }
  ];
}

// Get dataScan request.
message GetDataScanRequest {
  // DataScan view options.
  enum DataScanView {
    // The API will default to the `BASIC` view.
    DATA_SCAN_VIEW_UNSPECIFIED = 0;

    // Basic view that does not include *spec* and *result*.
    BASIC = 1;

    // Include everything.
    FULL = 10;
  }

  // Required. The resource name of the dataScan:
  // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}`
  // where `project` refers to a *project_id* or *project_number* and
  // `location_id` refers to a GCP region.
  string name = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "dataplex.googleapis.com/DataScan"
    }
  ];

  // Optional. Select the DataScan view to return. Defaults to `BASIC`.
  DataScanView view = 2 [(google.api.field_behavior) = OPTIONAL];
}

// List dataScans request.
message ListDataScansRequest {
  // Required. The resource name of the parent location:
  // `projects/{project}/locations/{location_id}`
  // where `project` refers to a *project_id* or *project_number* and
  // `location_id` refers to a GCP region.
  string parent = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "locations.googleapis.com/Location"
    }
  ];

  // Optional. Maximum number of dataScans to return. The service may return
  // fewer than this value. If unspecified, at most 500 scans will be returned.
  // The maximum value is 1000; values above 1000 will be coerced to 1000.
  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Page token received from a previous `ListDataScans` call. Provide
  // this to retrieve the subsequent page. When paginating, all other parameters
  // provided to `ListDataScans` must match the call that provided the
  // page token.
  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Filter request.
  string filter = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Order by fields (`name` or `create_time`) for the result.
  // If not specified, the ordering is undefined.
  string order_by = 5 [(google.api.field_behavior) = OPTIONAL];
}

// List dataScans response.
message ListDataScansResponse {
  // DataScans (`BASIC` view only) under the given parent location.
  repeated DataScan data_scans = 1;

  // Token to retrieve the next page of results, or empty if there are no more
  // results in the list.
  string next_page_token = 2;

  // Locations that could not be reached.
  repeated string unreachable = 3;
}

// Run DataScan Request
message RunDataScanRequest {
  // Required. The resource name of the DataScan:
  // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}`.
  // where `project` refers to a *project_id* or *project_number* and
  // `location_id` refers to a GCP region.
  //
  // Only **OnDemand** data scans are allowed.
  string name = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "dataplex.googleapis.com/DataScan"
    }
  ];
}

// Run DataScan Response.
message RunDataScanResponse {
  // DataScanJob created by RunDataScan request.
  DataScanJob job = 1;
}

// Get DataScanJob request.
message GetDataScanJobRequest {
  // DataScanJob view options.
  enum DataScanJobView {
    // The API will default to the `BASIC` view.
    DATA_SCAN_JOB_VIEW_UNSPECIFIED = 0;

    // Basic view that does not include *spec* and *result*.
    BASIC = 1;

    // Include everything.
    FULL = 10;
  }

  // Required. The resource name of the DataScanJob:
  // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}/jobs/{data_scan_job_id}`
  // where `project` refers to a *project_id* or *project_number* and
  // `location_id` refers to a GCP region.
  string name = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "dataplex.googleapis.com/DataScanJob"
    }
  ];

  // Optional. Select the DataScanJob view to return. Defaults to `BASIC`.
  DataScanJobView view = 2 [(google.api.field_behavior) = OPTIONAL];
}

// List DataScanJobs request.
message ListDataScanJobsRequest {
  // Required. The resource name of the parent environment:
  // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}`
  // where `project` refers to a *project_id* or *project_number* and
  // `location_id` refers to a GCP region.
  string parent = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "dataplex.googleapis.com/DataScan"
    }
  ];

  // Optional. Maximum number of DataScanJobs to return. The service may return
  // fewer than this value. If unspecified, at most 10 DataScanJobs will be
  // returned. The maximum value is 1000; values above 1000 will be coerced to
  // 1000.
  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Page token received from a previous `ListDataScanJobs` call.
  // Provide this to retrieve the subsequent page. When paginating, all other
  // parameters provided to `ListDataScanJobs` must match the call that provided
  // the page token.
  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. An expression for filtering the results of the ListDataScanJobs
  // request.
  //
  // If unspecified, all datascan jobs will be returned. Multiple filters can be
  // applied (with `AND`, `OR` logical operators). Filters are case-sensitive.
  //
  // Allowed fields are:
  //
  // - `start_time`
  // - `end_time`
  //
  // `start_time` and `end_time` expect RFC-3339 formatted strings (e.g.
  // 2018-10-08T18:30:00-07:00).
  //
  // For instance, 'start_time > 2018-10-08T00:00:00.123456789Z AND end_time <
  // 2018-10-09T00:00:00.123456789Z' limits results to DataScanJobs between
  // specified start and end times.
  string filter = 4 [(google.api.field_behavior) = OPTIONAL];
}

// List DataScanJobs response.
message ListDataScanJobsResponse {
  // DataScanJobs (`BASIC` view only) under a given dataScan.
  repeated DataScanJob data_scan_jobs = 1;

  // Token to retrieve the next page of results, or empty if there are no more
  // results in the list.
  string next_page_token = 2;
}

// Request details for generating data quality rule recommendations.
message GenerateDataQualityRulesRequest {
  // Required. The name must be one of the following:
  //
  // * The name of a data scan with at least one successful, completed data
  // profiling job
  // * The name of a successful, completed data profiling job (a data scan job
  // where the job type is data profiling)
  string name = 1 [(google.api.field_behavior) = REQUIRED];
}

// Response details for data quality rule recommendations.
message GenerateDataQualityRulesResponse {
  // The data quality rules that Dataplex generates based on the results
  // of a data profiling scan.
  repeated DataQualityRule rule = 1;
}

// Represents a user-visible job which provides the insights for the related
// data source.
//
// For example:
//
// * Data Quality: generates queries based on the rules and runs against the
//   data to get data quality check results.
// * Data Profile: analyzes the data in table(s) and generates insights about
//   the structure, content and relationships (such as null percent,
//   cardinality, min/max/mean, etc).
message DataScan {
  option (google.api.resource) = {
    type: "dataplex.googleapis.com/DataScan"
    pattern: "projects/{project}/locations/{location}/dataScans/{dataScan}"
  };

  // DataScan execution settings.
  message ExecutionSpec {
    // Optional. Spec related to how often and when a scan should be triggered.
    //
    // If not specified, the default is `OnDemand`, which means the scan will
    // not run until the user calls `RunDataScan` API.
    Trigger trigger = 1 [(google.api.field_behavior) = OPTIONAL];

    // Spec related to incremental scan of the data
    //
    // When an option is selected for incremental scan, it cannot be unset or
    // changed. If not specified, a data scan will run for all data in the
    // table.
    oneof incremental {
      // Immutable. The unnested field (of type *Date* or *Timestamp*) that
      // contains values which monotonically increase over time.
      //
      // If not specified, a data scan will run for all data in the table.
      string field = 100 [(google.api.field_behavior) = IMMUTABLE];
    }
  }

  // Status of the data scan execution.
  message ExecutionStatus {
    // The time when the latest DataScanJob started.
    google.protobuf.Timestamp latest_job_start_time = 4;

    // The time when the latest DataScanJob ended.
    google.protobuf.Timestamp latest_job_end_time = 5;

    // Optional. The time when the DataScanJob execution was created.
    google.protobuf.Timestamp latest_job_create_time = 6
        [(google.api.field_behavior) = OPTIONAL];
  }

  // Output only. The relative resource name of the scan, of the form:
  // `projects/{project}/locations/{location_id}/dataScans/{datascan_id}`,
  // where `project` refers to a *project_id* or *project_number* and
  // `location_id` refers to a GCP region.
  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. System generated globally unique ID for the scan. This ID will
  // be different if the scan is deleted and re-created with the same name.
  string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Optional. Description of the scan.
  //
  // * Must be between 1-1024 characters.
  string description = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. User friendly display name.
  //
  // * Must be between 1-256 characters.
  string display_name = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. User-defined labels for the scan.
  map labels = 5 [(google.api.field_behavior) = OPTIONAL];

  // Output only. Current state of the DataScan.
  State state = 6 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The time when the scan was created.
  google.protobuf.Timestamp create_time = 7
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The time when the scan was last updated.
  google.protobuf.Timestamp update_time = 8
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Required. The data source for DataScan.
  DataSource data = 9 [(google.api.field_behavior) = REQUIRED];

  // Optional. DataScan execution settings.
  //
  // If not specified, the fields in it will use their default values.
  ExecutionSpec execution_spec = 10 [(google.api.field_behavior) = OPTIONAL];

  // Output only. Status of the data scan execution.
  ExecutionStatus execution_status = 11
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The type of DataScan.
  DataScanType type = 12 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Data Scan related setting.
  // It is required and immutable which means once data_quality_spec is set, it
  // cannot be changed to data_profile_spec.
  oneof spec {
    // DataQualityScan related setting.
    DataQualitySpec data_quality_spec = 100;

    // DataProfileScan related setting.
    DataProfileSpec data_profile_spec = 101;
  }

  // The result of the data scan.
  oneof result {
    // Output only. The result of the data quality scan.
    DataQualityResult data_quality_result = 200
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. The result of the data profile scan.
    DataProfileResult data_profile_result = 201
        [(google.api.field_behavior) = OUTPUT_ONLY];
  }
}

// A DataScanJob represents an instance of DataScan execution.
message DataScanJob {
  option (google.api.resource) = {
    type: "dataplex.googleapis.com/DataScanJob"
    pattern: "projects/{project}/locations/{location}/dataScans/{dataScan}/jobs/{job}"
  };

  // Execution state for the DataScanJob.
  enum State {
    // The DataScanJob state is unspecified.
    STATE_UNSPECIFIED = 0;

    // The DataScanJob is running.
    RUNNING = 1;

    // The DataScanJob is canceling.
    CANCELING = 2;

    // The DataScanJob cancellation was successful.
    CANCELLED = 3;

    // The DataScanJob completed successfully.
    SUCCEEDED = 4;

    // The DataScanJob is no longer running due to an error.
    FAILED = 5;

    // The DataScanJob has been created but not started to run yet.
    PENDING = 7;
  }

  // Output only. The relative resource name of the DataScanJob, of the form:
  // `projects/{project}/locations/{location_id}/dataScans/{datascan_id}/jobs/{job_id}`,
  // where `project` refers to a *project_id* or *project_number* and
  // `location_id` refers to a GCP region.
  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. System generated globally unique ID for the DataScanJob.
  string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The time when the DataScanJob was started.
  google.protobuf.Timestamp start_time = 3
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The time when the DataScanJob ended.
  google.protobuf.Timestamp end_time = 4
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Execution state for the DataScanJob.
  State state = 5 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Additional information about the current state.
  string message = 6 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The type of the parent DataScan.
  DataScanType type = 7 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Data Scan related setting.
  oneof spec {
    // Output only. DataQualityScan related setting.
    DataQualitySpec data_quality_spec = 100
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. DataProfileScan related setting.
    DataProfileSpec data_profile_spec = 101
        [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  // The result of the data scan.
  oneof result {
    // Output only. The result of the data quality scan.
    DataQualityResult data_quality_result = 200
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. The result of the data profile scan.
    DataProfileResult data_profile_result = 201
        [(google.api.field_behavior) = OUTPUT_ONLY];
  }
}

// The type of DataScan.
enum DataScanType {
  // The DataScan type is unspecified.
  DATA_SCAN_TYPE_UNSPECIFIED = 0;

  // Data Quality scan.
  DATA_QUALITY = 1;

  // Data Profile scan.
  DATA_PROFILE = 2;
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy