All Downloads are FREE. Search and download functionalities are using the official Maven repository.

google.cloud.dataproc.v1.clusters.proto Maven / Gradle / Ivy

There is a newer version: 4.53.0
Show newest version
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.dataproc.v1;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/dataproc/v1/operations.proto";
import "google/cloud/dataproc/v1/shared.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";
import "google/protobuf/wrappers.proto";
import "google/type/interval.proto";

option go_package = "cloud.google.com/go/dataproc/v2/apiv1/dataprocpb;dataprocpb";
option java_multiple_files = true;
option java_outer_classname = "ClustersProto";
option java_package = "com.google.cloud.dataproc.v1";

// The ClusterControllerService provides methods to manage clusters
// of Compute Engine instances.
service ClusterController {
  option (google.api.default_host) = "dataproc.googleapis.com";
  option (google.api.oauth_scopes) =
      "https://www.googleapis.com/auth/cloud-platform";

  // Creates a cluster in a project. The returned
  // [Operation.metadata][google.longrunning.Operation.metadata] will be
  // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata).
  rpc CreateCluster(CreateClusterRequest)
      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1/projects/{project_id}/regions/{region}/clusters"
      body: "cluster"
    };
    option (google.api.method_signature) = "project_id,region,cluster";
    option (google.longrunning.operation_info) = {
      response_type: "Cluster"
      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
    };
  }

  // Updates a cluster in a project. The returned
  // [Operation.metadata][google.longrunning.Operation.metadata] will be
  // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata).
  // The cluster must be in a
  // [`RUNNING`][google.cloud.dataproc.v1.ClusterStatus.State] state or an error
  // is returned.
  rpc UpdateCluster(UpdateClusterRequest)
      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      patch: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
      body: "cluster"
    };
    option (google.api.method_signature) =
        "project_id,region,cluster_name,cluster,update_mask";
    option (google.longrunning.operation_info) = {
      response_type: "Cluster"
      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
    };
  }

  // Stops a cluster in a project.
  rpc StopCluster(StopClusterRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:stop"
      body: "*"
    };
    option (google.longrunning.operation_info) = {
      response_type: "Cluster"
      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
    };
  }

  // Starts a cluster in a project.
  rpc StartCluster(StartClusterRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:start"
      body: "*"
    };
    option (google.longrunning.operation_info) = {
      response_type: "Cluster"
      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
    };
  }

  // Deletes a cluster in a project. The returned
  // [Operation.metadata][google.longrunning.Operation.metadata] will be
  // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata).
  rpc DeleteCluster(DeleteClusterRequest)
      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      delete: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
    };
    option (google.api.method_signature) = "project_id,region,cluster_name";
    option (google.longrunning.operation_info) = {
      response_type: "google.protobuf.Empty"
      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
    };
  }

  // Gets the resource representation for a cluster in a project.
  rpc GetCluster(GetClusterRequest) returns (Cluster) {
    option (google.api.http) = {
      get: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
    };
    option (google.api.method_signature) = "project_id,region,cluster_name";
  }

  // Lists all regions/{region}/clusters in a project alphabetically.
  rpc ListClusters(ListClustersRequest) returns (ListClustersResponse) {
    option (google.api.http) = {
      get: "/v1/projects/{project_id}/regions/{region}/clusters"
    };
    option (google.api.method_signature) = "project_id,region";
    option (google.api.method_signature) = "project_id,region,filter";
  }

  // Gets cluster diagnostic information. The returned
  // [Operation.metadata][google.longrunning.Operation.metadata] will be
  // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata).
  // After the operation completes,
  // [Operation.response][google.longrunning.Operation.response]
  // contains
  // [DiagnoseClusterResults](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#diagnoseclusterresults).
  rpc DiagnoseCluster(DiagnoseClusterRequest)
      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:diagnose"
      body: "*"
    };
    option (google.api.method_signature) = "project_id,region,cluster_name";
    option (google.longrunning.operation_info) = {
      response_type: "DiagnoseClusterResults"
      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
    };
  }
}

// Describes the identifying information, config, and status of
// a Dataproc cluster
message Cluster {
  // Required. The Google Cloud Platform project ID that the cluster belongs to.
  string project_id = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. The cluster name, which must be unique within a project.
  // The name must start with a lowercase letter, and can contain
  // up to 51 lowercase letters, numbers, and hyphens. It cannot end
  // with a hyphen. The name of a deleted cluster can be reused.
  string cluster_name = 2 [(google.api.field_behavior) = REQUIRED];

  // Optional. The cluster config for a cluster of Compute Engine Instances.
  // Note that Dataproc may set default values, and values may change
  // when clusters are updated.
  //
  // Exactly one of ClusterConfig or VirtualClusterConfig must be specified.
  ClusterConfig config = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The virtual cluster config is used when creating a Dataproc
  // cluster that does not directly control the underlying compute resources,
  // for example, when creating a [Dataproc-on-GKE
  // cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke-overview).
  // Dataproc may set default values, and values may change when
  // clusters are updated. Exactly one of
  // [config][google.cloud.dataproc.v1.Cluster.config] or
  // [virtual_cluster_config][google.cloud.dataproc.v1.Cluster.virtual_cluster_config]
  // must be specified.
  VirtualClusterConfig virtual_cluster_config = 10
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The labels to associate with this cluster.
  // Label **keys** must contain 1 to 63 characters, and must conform to
  // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
  // Label **values** may be empty, but, if present, must contain 1 to 63
  // characters, and must conform to [RFC
  // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
  // associated with a cluster.
  map labels = 8 [(google.api.field_behavior) = OPTIONAL];

  // Output only. Cluster status.
  ClusterStatus status = 4 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The previous cluster status.
  repeated ClusterStatus status_history = 7
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. A cluster UUID (Unique Universal Identifier). Dataproc
  // generates this value when it creates the cluster.
  string cluster_uuid = 6 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Contains cluster daemon metrics such as HDFS and YARN stats.
  //
  // **Beta Feature**: This report is available for testing purposes only. It
  // may be changed before final release.
  ClusterMetrics metrics = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// The cluster config.
message ClusterConfig {
  // Optional. A Cloud Storage bucket used to stage job
  // dependencies, config files, and job driver console output.
  // If you do not specify a staging bucket, Cloud
  // Dataproc will determine a Cloud Storage location (US,
  // ASIA, or EU) for your cluster's staging bucket according to the
  // Compute Engine zone where your cluster is deployed, and then create
  // and manage this project-level, per-location bucket (see
  // [Dataproc staging and temp
  // buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
  // **This field requires a Cloud Storage bucket name, not a `gs://...` URI to
  // a Cloud Storage bucket.**
  string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs
  // data, such as Spark and MapReduce history files. If you do not specify a
  // temp bucket, Dataproc will determine a Cloud Storage location (US, ASIA, or
  // EU) for your cluster's temp bucket according to the Compute Engine zone
  // where your cluster is deployed, and then create and manage this
  // project-level, per-location bucket. The default bucket has a TTL of 90
  // days, but you can use any TTL (or none) if you specify a bucket (see
  // [Dataproc staging and temp
  // buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
  // **This field requires a Cloud Storage bucket name, not a `gs://...` URI to
  // a Cloud Storage bucket.**
  string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The shared Compute Engine config settings for
  // all instances in a cluster.
  GceClusterConfig gce_cluster_config = 8
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Compute Engine config settings for
  // the cluster's master instance.
  InstanceGroupConfig master_config = 9
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Compute Engine config settings for
  // the cluster's worker instances.
  InstanceGroupConfig worker_config = 10
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Compute Engine config settings for
  // a cluster's secondary worker instances
  InstanceGroupConfig secondary_worker_config = 12
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The config settings for cluster software.
  SoftwareConfig software_config = 13 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Commands to execute on each node after config is
  // completed. By default, executables are run on master and all worker nodes.
  // You can test a node's `role` metadata to run an executable on
  // a master or worker node, as shown below using `curl` (you can also use
  // `wget`):
  //
  //     ROLE=$(curl -H Metadata-Flavor:Google
  //     http://metadata/computeMetadata/v1/instance/attributes/dataproc-role)
  //     if [[ "${ROLE}" == 'Master' ]]; then
  //       ... master specific actions ...
  //     else
  //       ... worker specific actions ...
  //     fi
  repeated NodeInitializationAction initialization_actions = 11
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Encryption settings for the cluster.
  EncryptionConfig encryption_config = 15
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Autoscaling config for the policy associated with the cluster.
  // Cluster does not autoscale if this field is unset.
  AutoscalingConfig autoscaling_config = 18
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Security settings for the cluster.
  SecurityConfig security_config = 16 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Lifecycle setting for the cluster.
  LifecycleConfig lifecycle_config = 17
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Port/endpoint configuration for this cluster
  EndpointConfig endpoint_config = 19 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Metastore configuration.
  MetastoreConfig metastore_config = 20
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The config for Dataproc metrics.
  DataprocMetricConfig dataproc_metric_config = 23
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The node group settings.
  repeated AuxiliaryNodeGroup auxiliary_node_groups = 25
      [(google.api.field_behavior) = OPTIONAL];
}

// The Dataproc cluster config for a cluster that does not directly control the
// underlying compute resources, such as a [Dataproc-on-GKE
// cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke-overview).
message VirtualClusterConfig {
  // Optional. A Cloud Storage bucket used to stage job
  // dependencies, config files, and job driver console output.
  // If you do not specify a staging bucket, Cloud
  // Dataproc will determine a Cloud Storage location (US,
  // ASIA, or EU) for your cluster's staging bucket according to the
  // Compute Engine zone where your cluster is deployed, and then create
  // and manage this project-level, per-location bucket (see
  // [Dataproc staging and temp
  // buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
  // **This field requires a Cloud Storage bucket name, not a `gs://...` URI to
  // a Cloud Storage bucket.**
  string staging_bucket = 1 [(google.api.field_behavior) = OPTIONAL];

  oneof infrastructure_config {
    // Required. The configuration for running the Dataproc cluster on
    // Kubernetes.
    KubernetesClusterConfig kubernetes_cluster_config = 6
        [(google.api.field_behavior) = REQUIRED];
  }

  // Optional. Configuration of auxiliary services used by this cluster.
  AuxiliaryServicesConfig auxiliary_services_config = 7
      [(google.api.field_behavior) = OPTIONAL];
}

// Auxiliary services configuration for a Cluster.
message AuxiliaryServicesConfig {
  // Optional. The Hive Metastore configuration for this workload.
  MetastoreConfig metastore_config = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Spark History Server configuration for the workload.
  SparkHistoryServerConfig spark_history_server_config = 2
      [(google.api.field_behavior) = OPTIONAL];
}

// Endpoint config for this cluster
message EndpointConfig {
  // Output only. The map of port descriptions to URLs. Will only be populated
  // if enable_http_port_access is true.
  map http_ports = 1
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Optional. If true, enable http access to specific ports on the cluster
  // from external sources. Defaults to false.
  bool enable_http_port_access = 2 [(google.api.field_behavior) = OPTIONAL];
}

// Autoscaling Policy config associated with the cluster.
message AutoscalingConfig {
  // Optional. The autoscaling policy used by the cluster.
  //
  // Only resource names including projectid and location (region) are valid.
  // Examples:
  //
  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]`
  // * `projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]`
  //
  // Note that the policy must be in the same project and Dataproc region.
  string policy_uri = 1 [(google.api.field_behavior) = OPTIONAL];
}

// Encryption settings for the cluster.
message EncryptionConfig {
  // Optional. The Cloud KMS key resource name to use for persistent disk
  // encryption for all instances in the cluster. See [Use CMEK with cluster
  // data]
  // (https://cloud.google.com//dataproc/docs/concepts/configuring-clusters/customer-managed-encryption#use_cmek_with_cluster_data)
  // for more information.
  string gce_pd_kms_key_name = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Cloud KMS key resource name to use for cluster persistent
  // disk and job argument encryption. See [Use CMEK with cluster data]
  // (https://cloud.google.com//dataproc/docs/concepts/configuring-clusters/customer-managed-encryption#use_cmek_with_cluster_data)
  // for more information.
  //
  // When this key resource name is provided, the following job arguments of
  // the following job types submitted to the cluster are encrypted using CMEK:
  //
  // * [FlinkJob
  // args](https://cloud.google.com/dataproc/docs/reference/rest/v1/FlinkJob)
  // * [HadoopJob
  // args](https://cloud.google.com/dataproc/docs/reference/rest/v1/HadoopJob)
  // * [SparkJob
  // args](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkJob)
  // * [SparkRJob
  // args](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkRJob)
  // * [PySparkJob
  // args](https://cloud.google.com/dataproc/docs/reference/rest/v1/PySparkJob)
  // * [SparkSqlJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkSqlJob)
  //   scriptVariables and queryList.queries
  // * [HiveJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HiveJob)
  //   scriptVariables and queryList.queries
  // * [PigJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PigJob)
  //   scriptVariables and queryList.queries
  // * [PrestoJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PrestoJob)
  //   scriptVariables and queryList.queries
  string kms_key = 2 [
    (google.api.field_behavior) = OPTIONAL,
    (google.api.resource_reference) = {
      type: "cloudkms.googleapis.com/CryptoKey"
    }
  ];
}

// Common config settings for resources of Compute Engine cluster
// instances, applicable to all instances in the cluster.
message GceClusterConfig {
  // `PrivateIpv6GoogleAccess` controls whether and how Dataproc cluster nodes
  // can communicate with Google Services through gRPC over IPv6.
  // These values are directly mapped to corresponding values in the
  // [Compute Engine Instance
  // fields](https://cloud.google.com/compute/docs/reference/rest/v1/instances).
  enum PrivateIpv6GoogleAccess {
    // If unspecified, Compute Engine default behavior will apply, which
    // is the same as
    // [INHERIT_FROM_SUBNETWORK][google.cloud.dataproc.v1.GceClusterConfig.PrivateIpv6GoogleAccess.INHERIT_FROM_SUBNETWORK].
    PRIVATE_IPV6_GOOGLE_ACCESS_UNSPECIFIED = 0;

    // Private access to and from Google Services configuration
    // inherited from the subnetwork configuration. This is the
    // default Compute Engine behavior.
    INHERIT_FROM_SUBNETWORK = 1;

    // Enables outbound private IPv6 access to Google Services from the Dataproc
    // cluster.
    OUTBOUND = 2;

    // Enables bidirectional private IPv6 access between Google Services and the
    // Dataproc cluster.
    BIDIRECTIONAL = 3;
  }

  // Optional. The Compute Engine zone where the Dataproc cluster will be
  // located. If omitted, the service will pick a zone in the cluster's Compute
  // Engine region. On a get request, zone will always be present.
  //
  // A full URL, partial URI, or short name are valid. Examples:
  //
  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`
  // * `projects/[project_id]/zones/[zone]`
  // * `[zone]`
  string zone_uri = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Compute Engine network to be used for machine
  // communications. Cannot be specified with subnetwork_uri. If neither
  // `network_uri` nor `subnetwork_uri` is specified, the "default" network of
  // the project is used, if it exists. Cannot be a "Custom Subnet Network" (see
  // [Using Subnetworks](https://cloud.google.com/compute/docs/subnetworks) for
  // more information).
  //
  // A full URL, partial URI, or short name are valid. Examples:
  //
  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/networks/default`
  // * `projects/[project_id]/global/networks/default`
  // * `default`
  string network_uri = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Compute Engine subnetwork to be used for machine
  // communications. Cannot be specified with network_uri.
  //
  // A full URL, partial URI, or short name are valid. Examples:
  //
  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/[region]/subnetworks/sub0`
  // * `projects/[project_id]/regions/[region]/subnetworks/sub0`
  // * `sub0`
  string subnetwork_uri = 6 [(google.api.field_behavior) = OPTIONAL];

  // Optional. This setting applies to subnetwork-enabled networks. It is set to
  // `true` by default in clusters created with image versions 2.2.x.
  //
  // When set to `true`:
  //
  // * All cluster VMs have internal IP addresses.
  // * [Google Private Access]
  // (https://cloud.google.com/vpc/docs/private-google-access)
  // must be enabled to access Dataproc and other Google Cloud APIs.
  // * Off-cluster dependencies must be configured to be accessible
  // without external IP addresses.
  //
  // When set to `false`:
  //
  // * Cluster VMs are not restricted to internal IP addresses.
  // * Ephemeral external IP addresses are assigned to each cluster VM.
  optional bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The type of IPv6 access for a cluster.
  PrivateIpv6GoogleAccess private_ipv6_google_access = 12
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The [Dataproc service
  // account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc)
  // (also see [VM Data Plane
  // identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity))
  // used by Dataproc cluster VM instances to access Google Cloud Platform
  // services.
  //
  // If not specified, the
  // [Compute Engine default service
  // account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account)
  // is used.
  string service_account = 8 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The URIs of service account scopes to be included in
  // Compute Engine instances. The following base set of scopes is always
  // included:
  //
  // * https://www.googleapis.com/auth/cloud.useraccounts.readonly
  // * https://www.googleapis.com/auth/devstorage.read_write
  // * https://www.googleapis.com/auth/logging.write
  //
  // If no scopes are specified, the following defaults are also provided:
  //
  // * https://www.googleapis.com/auth/bigquery
  // * https://www.googleapis.com/auth/bigtable.admin.table
  // * https://www.googleapis.com/auth/bigtable.data
  // * https://www.googleapis.com/auth/devstorage.full_control
  repeated string service_account_scopes = 3
      [(google.api.field_behavior) = OPTIONAL];

  // The Compute Engine network tags to add to all instances (see [Tagging
  // instances](https://cloud.google.com/vpc/docs/add-remove-network-tags)).
  repeated string tags = 4;

  // Optional. The Compute Engine metadata entries to add to all instances (see
  // [Project and instance
  // metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)).
  map metadata = 5 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Reservation Affinity for consuming Zonal reservation.
  ReservationAffinity reservation_affinity = 11
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Node Group Affinity for sole-tenant clusters.
  NodeGroupAffinity node_group_affinity = 13
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Shielded Instance Config for clusters using [Compute Engine
  // Shielded
  // VMs](https://cloud.google.com/security/shielded-cloud/shielded-vm).
  ShieldedInstanceConfig shielded_instance_config = 14
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Confidential Instance Config for clusters using [Confidential
  // VMs](https://cloud.google.com/compute/confidential-vm/docs).
  ConfidentialInstanceConfig confidential_instance_config = 15
      [(google.api.field_behavior) = OPTIONAL];
}

// Node Group Affinity for clusters using sole-tenant node groups.
// **The Dataproc `NodeGroupAffinity` resource is not related to the
// Dataproc [NodeGroup][google.cloud.dataproc.v1.NodeGroup] resource.**
message NodeGroupAffinity {
  // Required. The URI of a
  // sole-tenant [node group
  // resource](https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups)
  // that the cluster will be created on.
  //
  // A full URL, partial URI, or node group name are valid. Examples:
  //
  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/nodeGroups/node-group-1`
  // * `projects/[project_id]/zones/[zone]/nodeGroups/node-group-1`
  // * `node-group-1`
  string node_group_uri = 1 [(google.api.field_behavior) = REQUIRED];
}

// Shielded Instance Config for clusters using [Compute Engine Shielded
// VMs](https://cloud.google.com/security/shielded-cloud/shielded-vm).
message ShieldedInstanceConfig {
  // Optional. Defines whether instances have Secure Boot enabled.
  optional bool enable_secure_boot = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Defines whether instances have the vTPM enabled.
  optional bool enable_vtpm = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Defines whether instances have integrity monitoring enabled.
  optional bool enable_integrity_monitoring = 3
      [(google.api.field_behavior) = OPTIONAL];
}

// Confidential Instance Config for clusters using [Confidential
// VMs](https://cloud.google.com/compute/confidential-vm/docs)
message ConfidentialInstanceConfig {
  // Optional. Defines whether the instance should have confidential compute
  // enabled.
  bool enable_confidential_compute = 1 [(google.api.field_behavior) = OPTIONAL];
}

// The config settings for Compute Engine resources in
// an instance group, such as a master or worker group.
message InstanceGroupConfig {
  // Controls the use of preemptible instances within the group.
  enum Preemptibility {
    // Preemptibility is unspecified, the system will choose the
    // appropriate setting for each instance group.
    PREEMPTIBILITY_UNSPECIFIED = 0;

    // Instances are non-preemptible.
    //
    // This option is allowed for all instance groups and is the only valid
    // value for Master and Worker instance groups.
    NON_PREEMPTIBLE = 1;

    // Instances are [preemptible]
    // (https://cloud.google.com/compute/docs/instances/preemptible).
    //
    // This option is allowed only for [secondary worker]
    // (https://cloud.google.com/dataproc/docs/concepts/compute/secondary-vms)
    // groups.
    PREEMPTIBLE = 2;

    // Instances are [Spot VMs]
    // (https://cloud.google.com/compute/docs/instances/spot).
    //
    // This option is allowed only for [secondary worker]
    // (https://cloud.google.com/dataproc/docs/concepts/compute/secondary-vms)
    // groups. Spot VMs are the latest version of [preemptible VMs]
    // (https://cloud.google.com/compute/docs/instances/preemptible), and
    // provide additional features.
    SPOT = 3;
  }

  // Optional. The number of VM instances in the instance group.
  // For [HA
  // cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
  // [master_config](#FIELDS.master_config) groups, **must be set to 3**.
  // For standard cluster [master_config](#FIELDS.master_config) groups,
  // **must be set to 1**.
  int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL];

  // Output only. The list of instance names. Dataproc derives the names
  // from `cluster_name`, `num_instances`, and the instance group.
  repeated string instance_names = 2
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. List of references to Compute Engine instances.
  repeated InstanceReference instance_references = 11
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Optional. The Compute Engine image resource used for cluster instances.
  //
  // The URI can represent an image or image family.
  //
  // Image examples:
  //
  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/images/[image-id]`
  // * `projects/[project_id]/global/images/[image-id]`
  // * `image-id`
  //
  // Image family examples. Dataproc will use the most recent
  // image from the family:
  //
  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/images/family/[custom-image-family-name]`
  // * `projects/[project_id]/global/images/family/[custom-image-family-name]`
  //
  // If the URI is unspecified, it will be inferred from
  // `SoftwareConfig.image_version` or the system default.
  string image_uri = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Compute Engine machine type used for cluster instances.
  //
  // A full URL, partial URI, or short name are valid. Examples:
  //
  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/machineTypes/n1-standard-2`
  // * `projects/[project_id]/zones/[zone]/machineTypes/n1-standard-2`
  // * `n1-standard-2`
  //
  // **Auto Zone Exception**: If you are using the Dataproc
  // [Auto Zone
  // Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
  // feature, you must use the short name of the machine type
  // resource, for example, `n1-standard-2`.
  string machine_type_uri = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Disk option config settings.
  DiskConfig disk_config = 5 [(google.api.field_behavior) = OPTIONAL];

  // Output only. Specifies that this instance group contains preemptible
  // instances.
  bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Optional. Specifies the preemptibility of the instance group.
  //
  // The default value for master and worker groups is
  // `NON_PREEMPTIBLE`. This default cannot be changed.
  //
  // The default value for secondary instances is
  // `PREEMPTIBLE`.
  Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL];

  // Output only. The config for Compute Engine Instance Group
  // Manager that manages this group.
  // This is only used for preemptible instance groups.
  ManagedGroupConfig managed_group_config = 7
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Optional. The Compute Engine accelerator configuration for these
  // instances.
  repeated AcceleratorConfig accelerators = 8
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Specifies the minimum cpu platform for the Instance Group.
  // See [Dataproc -> Minimum CPU
  // Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
  string min_cpu_platform = 9 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The minimum number of primary worker instances to create.
  // If `min_num_instances` is set, cluster creation will succeed if
  // the number of primary workers created is at least equal to the
  // `min_num_instances` number.
  //
  // Example: Cluster creation request with `num_instances` = `5` and
  // `min_num_instances` = `3`:
  //
  // *  If 4 VMs are created and 1 instance fails,
  //    the failed VM is deleted. The cluster is
  //    resized to 4 instances and placed in a `RUNNING` state.
  // *  If 2 instances are created and 3 instances fail,
  //    the cluster in placed in an `ERROR` state. The failed VMs
  //    are not deleted.
  int32 min_num_instances = 12 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Instance flexibility Policy allowing a mixture of VM shapes and
  // provisioning models.
  InstanceFlexibilityPolicy instance_flexibility_policy = 13
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Configuration to handle the startup of instances during cluster
  // create and update process.
  StartupConfig startup_config = 14 [(google.api.field_behavior) = OPTIONAL];
}

// Configuration to handle the startup of instances during cluster create and
// update process.
message StartupConfig {
  // Optional. The config setting to enable cluster creation/ updation to be
  // successful only after required_registration_fraction of instances are up
  // and running. This configuration is applicable to only secondary workers for
  // now. The cluster will fail if required_registration_fraction of instances
  // are not available. This will include instance creation, agent registration,
  // and service registration (if enabled).
  optional double required_registration_fraction = 1
      [(google.api.field_behavior) = OPTIONAL];
}

// A reference to a Compute Engine instance.
message InstanceReference {
  // The user-friendly name of the Compute Engine instance.
  string instance_name = 1;

  // The unique identifier of the Compute Engine instance.
  string instance_id = 2;

  // The public RSA key used for sharing data with this instance.
  string public_key = 3;

  // The public ECIES key used for sharing data with this instance.
  string public_ecies_key = 4;
}

// Specifies the resources used to actively manage an instance group.
message ManagedGroupConfig {
  // Output only. The name of the Instance Template used for the Managed
  // Instance Group.
  string instance_template_name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The name of the Instance Group Manager for this group.
  string instance_group_manager_name = 2
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The partial URI to the instance group manager for this group.
  // E.g. projects/my-project/regions/us-central1/instanceGroupManagers/my-igm.
  string instance_group_manager_uri = 3
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Instance flexibility Policy allowing a mixture of VM shapes and provisioning
// models.
message InstanceFlexibilityPolicy {
  // Defines machines types and a rank to which the machines types belong.
  message InstanceSelection {
    // Optional. Full machine-type names, e.g. "n1-standard-16".
    repeated string machine_types = 1 [(google.api.field_behavior) = OPTIONAL];

    // Optional. Preference of this instance selection. Lower number means
    // higher preference. Dataproc will first try to create a VM based on the
    // machine-type with priority rank and fallback to next rank based on
    // availability. Machine types and instance selections with the same
    // priority have the same preference.
    int32 rank = 2 [(google.api.field_behavior) = OPTIONAL];
  }

  // Defines a mapping from machine types to the number of VMs that are created
  // with each machine type.
  message InstanceSelectionResult {
    // Output only. Full machine-type names, e.g. "n1-standard-16".
    optional string machine_type = 1
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Number of VM provisioned with the machine_type.
    optional int32 vm_count = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  // Optional. List of instance selection options that the group will use when
  // creating new VMs.
  repeated InstanceSelection instance_selection_list = 2
      [(google.api.field_behavior) = OPTIONAL];

  // Output only. A list of instance selection results in the group.
  repeated InstanceSelectionResult instance_selection_results = 3
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Specifies the type and number of accelerator cards attached to the instances
// of an instance. See [GPUs on Compute
// Engine](https://cloud.google.com/compute/docs/gpus/).
message AcceleratorConfig {
  // Full URL, partial URI, or short name of the accelerator type resource to
  // expose to this instance. See
  // [Compute Engine
  // AcceleratorTypes](https://cloud.google.com/compute/docs/reference/v1/acceleratorTypes).
  //
  // Examples:
  //
  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-t4`
  // * `projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-t4`
  // * `nvidia-tesla-t4`
  //
  // **Auto Zone Exception**: If you are using the Dataproc
  // [Auto Zone
  // Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
  // feature, you must use the short name of the accelerator type
  // resource, for example, `nvidia-tesla-t4`.
  string accelerator_type_uri = 1;

  // The number of the accelerator cards of this type exposed to this instance.
  int32 accelerator_count = 2;
}

// Specifies the config of disk options for a group of VM instances.
message DiskConfig {
  // Optional. Type of the boot disk (default is "pd-standard").
  // Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive),
  // "pd-ssd" (Persistent Disk Solid State Drive),
  // or "pd-standard" (Persistent Disk Hard Disk Drive).
  // See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types).
  string boot_disk_type = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Size in GB of the boot disk (default is 500GB).
  int32 boot_disk_size_gb = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Number of attached SSDs, from 0 to 8 (default is 0).
  // If SSDs are not attached, the boot disk is used to store runtime logs and
  // [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data.
  // If one or more SSDs are attached, this runtime bulk
  // data is spread across them, and the boot disk contains only basic
  // config and installed binaries.
  //
  // Note: Local SSD options may vary by machine type and number of vCPUs
  // selected.
  int32 num_local_ssds = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Interface type of local SSDs (default is "scsi").
  // Valid values: "scsi" (Small Computer System Interface),
  // "nvme" (Non-Volatile Memory Express).
  // See [local SSD
  // performance](https://cloud.google.com/compute/docs/disks/local-ssd#performance).
  string local_ssd_interface = 4 [(google.api.field_behavior) = OPTIONAL];
}

// Node group identification and configuration information.
message AuxiliaryNodeGroup {
  // Required. Node group configuration.
  NodeGroup node_group = 1 [(google.api.field_behavior) = REQUIRED];

  // Optional. A node group ID. Generated if not specified.
  //
  // The ID must contain only letters (a-z, A-Z), numbers (0-9),
  // underscores (_), and hyphens (-). Cannot begin or end with underscore
  // or hyphen. Must consist of from 3 to 33 characters.
  string node_group_id = 2 [(google.api.field_behavior) = OPTIONAL];
}

// Dataproc Node Group.
// **The Dataproc `NodeGroup` resource is not related to the
// Dataproc [NodeGroupAffinity][google.cloud.dataproc.v1.NodeGroupAffinity]
// resource.**
message NodeGroup {
  option (google.api.resource) = {
    type: "dataproc.googleapis.com/NodeGroup"
    pattern: "projects/{project}/regions/{region}/clusters/{cluster}/nodeGroups/{node_group}"
  };

  // Node pool roles.
  enum Role {
    // Required unspecified role.
    ROLE_UNSPECIFIED = 0;

    // Job drivers run on the node pool.
    DRIVER = 1;
  }

  // The Node group [resource name](https://aip.dev/122).
  string name = 1;

  // Required. Node group roles.
  repeated Role roles = 2 [(google.api.field_behavior) = REQUIRED];

  // Optional. The node group instance group configuration.
  InstanceGroupConfig node_group_config = 3
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Node group labels.
  //
  // * Label **keys** must consist of from 1 to 63 characters and conform to
  //   [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
  // * Label **values** can be empty. If specified, they must consist of from
  //   1 to 63 characters and conform to [RFC 1035]
  //   (https://www.ietf.org/rfc/rfc1035.txt).
  // * The node group must have no more than 32 labels.
  map labels = 4 [(google.api.field_behavior) = OPTIONAL];
}

// Specifies an executable to run on a fully configured node and a
// timeout period for executable completion.
message NodeInitializationAction {
  // Required. Cloud Storage URI of executable file.
  string executable_file = 1 [(google.api.field_behavior) = REQUIRED];

  // Optional. Amount of time executable has to complete. Default is
  // 10 minutes (see JSON representation of
  // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
  //
  // Cluster creation fails with an explanatory error message (the
  // name of the executable that caused the error and the exceeded timeout
  // period) if the executable is not completed at end of the timeout period.
  google.protobuf.Duration execution_timeout = 2
      [(google.api.field_behavior) = OPTIONAL];
}

// The status of a cluster and its instances.
message ClusterStatus {
  // The cluster state.
  enum State {
    // The cluster state is unknown.
    UNKNOWN = 0;

    // The cluster is being created and set up. It is not ready for use.
    CREATING = 1;

    // The cluster is currently running and healthy. It is ready for use.
    //
    // **Note:** The cluster state changes from "creating" to "running" status
    // after the master node(s), first two primary worker nodes (and the last
    // primary worker node if primary workers > 2) are running.
    RUNNING = 2;

    // The cluster encountered an error. It is not ready for use.
    ERROR = 3;

    // The cluster has encountered an error while being updated. Jobs can
    // be submitted to the cluster, but the cluster cannot be updated.
    ERROR_DUE_TO_UPDATE = 9;

    // The cluster is being deleted. It cannot be used.
    DELETING = 4;

    // The cluster is being updated. It continues to accept and process jobs.
    UPDATING = 5;

    // The cluster is being stopped. It cannot be used.
    STOPPING = 6;

    // The cluster is currently stopped. It is not ready for use.
    STOPPED = 7;

    // The cluster is being started. It is not ready for use.
    STARTING = 8;

    // The cluster is being repaired. It is not ready for use.
    REPAIRING = 10;
  }

  // The cluster substate.
  enum Substate {
    // The cluster substate is unknown.
    UNSPECIFIED = 0;

    // The cluster is known to be in an unhealthy state
    // (for example, critical daemons are not running or HDFS capacity is
    // exhausted).
    //
    // Applies to RUNNING state.
    UNHEALTHY = 1;

    // The agent-reported status is out of date (may occur if
    // Dataproc loses communication with Agent).
    //
    // Applies to RUNNING state.
    STALE_STATUS = 2;
  }

  // Output only. The cluster's state.
  State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Optional. Output only. Details of cluster's state.
  string detail = 2 [
    (google.api.field_behavior) = OUTPUT_ONLY,
    (google.api.field_behavior) = OPTIONAL
  ];

  // Output only. Time when this state was entered (see JSON representation of
  // [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
  google.protobuf.Timestamp state_start_time = 3
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Additional state information that includes
  // status reported by the agent.
  Substate substate = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Security related configuration, including encryption, Kerberos, etc.
message SecurityConfig {
  // Optional. Kerberos related configuration.
  KerberosConfig kerberos_config = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Identity related configuration, including service account based
  // secure multi-tenancy user mappings.
  IdentityConfig identity_config = 2 [(google.api.field_behavior) = OPTIONAL];
}

// Specifies Kerberos related configuration.
message KerberosConfig {
  // Optional. Flag to indicate whether to Kerberize the cluster (default:
  // false). Set this field to true to enable Kerberos on a cluster.
  bool enable_kerberos = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Cloud Storage URI of a KMS encrypted file containing the root
  // principal password.
  string root_principal_password_uri = 2
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The URI of the KMS key used to encrypt sensitive
  // files.
  string kms_key_uri = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Cloud Storage URI of the keystore file used for SSL
  // encryption. If not provided, Dataproc will provide a self-signed
  // certificate.
  string keystore_uri = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Cloud Storage URI of the truststore file used for SSL
  // encryption. If not provided, Dataproc will provide a self-signed
  // certificate.
  string truststore_uri = 5 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Cloud Storage URI of a KMS encrypted file containing the
  // password to the user provided keystore. For the self-signed certificate,
  // this password is generated by Dataproc.
  string keystore_password_uri = 6 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Cloud Storage URI of a KMS encrypted file containing the
  // password to the user provided key. For the self-signed certificate, this
  // password is generated by Dataproc.
  string key_password_uri = 7 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Cloud Storage URI of a KMS encrypted file containing the
  // password to the user provided truststore. For the self-signed certificate,
  // this password is generated by Dataproc.
  string truststore_password_uri = 8 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The remote realm the Dataproc on-cluster KDC will trust, should
  // the user enable cross realm trust.
  string cross_realm_trust_realm = 9 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The KDC (IP or hostname) for the remote trusted realm in a cross
  // realm trust relationship.
  string cross_realm_trust_kdc = 10 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The admin server (IP or hostname) for the remote trusted realm in
  // a cross realm trust relationship.
  string cross_realm_trust_admin_server = 11
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Cloud Storage URI of a KMS encrypted file containing the
  // shared password between the on-cluster Kerberos realm and the remote
  // trusted realm, in a cross realm trust relationship.
  string cross_realm_trust_shared_password_uri = 12
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Cloud Storage URI of a KMS encrypted file containing the
  // master key of the KDC database.
  string kdc_db_key_uri = 13 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The lifetime of the ticket granting ticket, in hours.
  // If not specified, or user specifies 0, then default value 10
  // will be used.
  int32 tgt_lifetime_hours = 14 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The name of the on-cluster Kerberos realm.
  // If not specified, the uppercased domain of hostnames will be the realm.
  string realm = 15 [(google.api.field_behavior) = OPTIONAL];
}

// Identity related configuration, including service account based
// secure multi-tenancy user mappings.
message IdentityConfig {
  // Required. Map of user to service account.
  map user_service_account_mapping = 1
      [(google.api.field_behavior) = REQUIRED];
}

// Specifies the selection and config of software inside the cluster.
message SoftwareConfig {
  // Optional. The version of software inside the cluster. It must be one of the
  // supported [Dataproc
  // Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported-dataproc-image-versions),
  // such as "1.2" (including a subminor version, such as "1.2.29"), or the
  // ["preview"
  // version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
  // If unspecified, it defaults to the latest Debian version.
  string image_version = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The properties to set on daemon config files.
  //
  // Property keys are specified in `prefix:property` format, for example
  // `core:hadoop.tmp.dir`. The following are supported prefixes
  // and their mappings:
  //
  // * capacity-scheduler: `capacity-scheduler.xml`
  // * core:   `core-site.xml`
  // * distcp: `distcp-default.xml`
  // * hdfs:   `hdfs-site.xml`
  // * hive:   `hive-site.xml`
  // * mapred: `mapred-site.xml`
  // * pig:    `pig.properties`
  // * spark:  `spark-defaults.conf`
  // * yarn:   `yarn-site.xml`
  //
  // For more information, see [Cluster
  // properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).
  map properties = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The set of components to activate on the cluster.
  repeated Component optional_components = 3
      [(google.api.field_behavior) = OPTIONAL];
}

// Specifies the cluster auto-delete schedule configuration.
message LifecycleConfig {
  // Optional. The duration to keep the cluster alive while idling (when no jobs
  // are running). Passing this threshold will cause the cluster to be
  // deleted. Minimum value is 5 minutes; maximum value is 14 days (see JSON
  // representation of
  // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
  google.protobuf.Duration idle_delete_ttl = 1
      [(google.api.field_behavior) = OPTIONAL];

  // Either the exact time the cluster should be deleted at or
  // the cluster maximum age.
  oneof ttl {
    // Optional. The time when cluster will be auto-deleted (see JSON
    // representation of
    // [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
    google.protobuf.Timestamp auto_delete_time = 2
        [(google.api.field_behavior) = OPTIONAL];

    // Optional. The lifetime duration of cluster. The cluster will be
    // auto-deleted at the end of this period. Minimum value is 10 minutes;
    // maximum value is 14 days (see JSON representation of
    // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
    google.protobuf.Duration auto_delete_ttl = 3
        [(google.api.field_behavior) = OPTIONAL];
  }

  // Output only. The time when cluster became idle (most recent job finished)
  // and became eligible for deletion due to idleness (see JSON representation
  // of
  // [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
  google.protobuf.Timestamp idle_start_time = 4
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Specifies a Metastore configuration.
message MetastoreConfig {
  // Required. Resource name of an existing Dataproc Metastore service.
  //
  // Example:
  //
  // * `projects/[project_id]/locations/[dataproc_region]/services/[service-name]`
  string dataproc_metastore_service = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "metastore.googleapis.com/Service"
    }
  ];
}

// Contains cluster daemon metrics, such as HDFS and YARN stats.
//
// **Beta Feature**: This report is available for testing purposes only. It may
// be changed before final release.
message ClusterMetrics {
  // The HDFS metrics.
  map hdfs_metrics = 1;

  // YARN metrics.
  map yarn_metrics = 2;
}

// Dataproc metric config.
message DataprocMetricConfig {
  // A source for the collection of Dataproc custom metrics (see [Custom
  // metrics]
  // (https://cloud.google.com//dataproc/docs/guides/dataproc-metrics#custom_metrics)).
  enum MetricSource {
    // Required unspecified metric source.
    METRIC_SOURCE_UNSPECIFIED = 0;

    // Monitoring agent metrics. If this source is enabled,
    // Dataproc enables the monitoring agent in Compute Engine,
    // and collects monitoring agent metrics, which are published
    // with an `agent.googleapis.com` prefix.
    MONITORING_AGENT_DEFAULTS = 1;

    // HDFS metric source.
    HDFS = 2;

    // Spark metric source.
    SPARK = 3;

    // YARN metric source.
    YARN = 4;

    // Spark History Server metric source.
    SPARK_HISTORY_SERVER = 5;

    // Hiveserver2 metric source.
    HIVESERVER2 = 6;

    // hivemetastore metric source
    HIVEMETASTORE = 7;

    // flink metric source
    FLINK = 8;
  }

  // A Dataproc custom metric.
  message Metric {
    // Required. A standard set of metrics is collected unless `metricOverrides`
    // are specified for the metric source (see [Custom metrics]
    // (https://cloud.google.com/dataproc/docs/guides/dataproc-metrics#custom_metrics)
    // for more information).
    MetricSource metric_source = 1 [(google.api.field_behavior) = REQUIRED];

    // Optional. Specify one or more [Custom metrics]
    // (https://cloud.google.com/dataproc/docs/guides/dataproc-metrics#custom_metrics)
    // to collect for the metric course (for the `SPARK` metric source (any
    // [Spark metric]
    // (https://spark.apache.org/docs/latest/monitoring.html#metrics) can be
    // specified).
    //
    // Provide metrics in the following format:
    // METRIC_SOURCE:INSTANCE:GROUP:METRIC
    // Use camelcase as appropriate.
    //
    // Examples:
    //
    // ```
    // yarn:ResourceManager:QueueMetrics:AppsCompleted
    // spark:driver:DAGScheduler:job.allJobs
    // sparkHistoryServer:JVM:Memory:NonHeapMemoryUsage.committed
    // hiveserver2:JVM:Memory:NonHeapMemoryUsage.used
    // ```
    //
    // Notes:
    //
    // * Only the specified overridden metrics are collected for the
    //   metric source. For example, if one or more `spark:executive` metrics
    //   are listed as metric overrides, other `SPARK` metrics are not
    //   collected. The collection of the metrics for other enabled custom
    //   metric sources is unaffected. For example, if both `SPARK` andd `YARN`
    //   metric sources are enabled, and overrides are provided for Spark
    //   metrics only, all YARN metrics are collected.
    repeated string metric_overrides = 2
        [(google.api.field_behavior) = OPTIONAL];
  }

  // Required. Metrics sources to enable.
  repeated Metric metrics = 1 [(google.api.field_behavior) = REQUIRED];
}

// A request to create a cluster.
message CreateClusterRequest {
  // Required. The ID of the Google Cloud Platform project that the cluster
  // belongs to.
  string project_id = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. The Dataproc region in which to handle the request.
  string region = 3 [(google.api.field_behavior) = REQUIRED];

  // Required. The cluster to create.
  Cluster cluster = 2 [(google.api.field_behavior) = REQUIRED];

  // Optional. A unique ID used to identify the request. If the server receives
  // two
  // [CreateClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.CreateClusterRequest)s
  // with the same id, then the second request will be ignored and the
  // first [google.longrunning.Operation][google.longrunning.Operation] created
  // and stored in the backend is returned.
  //
  // It is recommended to always set this value to a
  // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
  //
  // The ID must contain only letters (a-z, A-Z), numbers (0-9),
  // underscores (_), and hyphens (-). The maximum length is 40 characters.
  string request_id = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Failure action when primary worker creation fails.
  FailureAction action_on_failed_primary_workers = 5
      [(google.api.field_behavior) = OPTIONAL];
}

// A request to update a cluster.
message UpdateClusterRequest {
  // Required. The ID of the Google Cloud Platform project the
  // cluster belongs to.
  string project_id = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. The Dataproc region in which to handle the request.
  string region = 5 [(google.api.field_behavior) = REQUIRED];

  // Required. The cluster name.
  string cluster_name = 2 [(google.api.field_behavior) = REQUIRED];

  // Required. The changes to the cluster.
  Cluster cluster = 3 [(google.api.field_behavior) = REQUIRED];

  // Optional. Timeout for graceful YARN decommissioning. Graceful
  // decommissioning allows removing nodes from the cluster without
  // interrupting jobs in progress. Timeout specifies how long to wait for jobs
  // in progress to finish before forcefully removing nodes (and potentially
  // interrupting jobs). Default timeout is 0 (for forceful decommission), and
  // the maximum allowed timeout is 1 day. (see JSON representation of
  // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
  //
  // Only supported on Dataproc image versions 1.2 and higher.
  google.protobuf.Duration graceful_decommission_timeout = 6
      [(google.api.field_behavior) = OPTIONAL];

  // Required. Specifies the path, relative to `Cluster`, of
  // the field to update. For example, to change the number of workers
  // in a cluster to 5, the `update_mask` parameter would be
  // specified as `config.worker_config.num_instances`,
  // and the `PATCH` request body would specify the new value, as follows:
  //
  //     {
  //       "config":{
  //         "workerConfig":{
  //           "numInstances":"5"
  //         }
  //       }
  //     }
  // Similarly, to change the number of preemptible workers in a cluster to 5,
  // the `update_mask` parameter would be
  // `config.secondary_worker_config.num_instances`, and the `PATCH` request
  // body would be set as follows:
  //
  //     {
  //       "config":{
  //         "secondaryWorkerConfig":{
  //           "numInstances":"5"
  //         }
  //       }
  //     }
  // Note: Currently, only the following fields can be updated:
  //
  //  
  //  
  //  
  //  
  //  
  //  
  //  
  //  
  //  
  //  
  //  
  //  
  //  
  //  
  //  
  //  
  //  
  //  
  //  
  //  
  //  
  //  
  //  
MaskPurpose
labelsUpdate labels
config.worker_config.num_instancesResize primary worker group
config.secondary_worker_config.num_instancesResize secondary worker group
config.autoscaling_config.policy_uriUse, stop using, or // change autoscaling policies
google.protobuf.FieldMask update_mask = 4 [(google.api.field_behavior) = REQUIRED]; // Optional. A unique ID used to identify the request. If the server // receives two // [UpdateClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.UpdateClusterRequest)s // with the same id, then the second request will be ignored and the // first [google.longrunning.Operation][google.longrunning.Operation] created // and stored in the backend is returned. // // It is recommended to always set this value to a // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). // // The ID must contain only letters (a-z, A-Z), numbers (0-9), // underscores (_), and hyphens (-). The maximum length is 40 characters. string request_id = 7 [(google.api.field_behavior) = OPTIONAL]; } // A request to stop a cluster. message StopClusterRequest { // Required. The ID of the Google Cloud Platform project the // cluster belongs to. string project_id = 1 [(google.api.field_behavior) = REQUIRED]; // Required. The Dataproc region in which to handle the request. string region = 2 [(google.api.field_behavior) = REQUIRED]; // Required. The cluster name. string cluster_name = 3 [(google.api.field_behavior) = REQUIRED]; // Optional. Specifying the `cluster_uuid` means the RPC will fail // (with error NOT_FOUND) if a cluster with the specified UUID does not exist. string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. A unique ID used to identify the request. If the server // receives two // [StopClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.StopClusterRequest)s // with the same id, then the second request will be ignored and the // first [google.longrunning.Operation][google.longrunning.Operation] created // and stored in the backend is returned. // // Recommendation: Set this value to a // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). // // The ID must contain only letters (a-z, A-Z), numbers (0-9), // underscores (_), and hyphens (-). The maximum length is 40 characters. string request_id = 5 [(google.api.field_behavior) = OPTIONAL]; } // A request to start a cluster. message StartClusterRequest { // Required. The ID of the Google Cloud Platform project the // cluster belongs to. string project_id = 1 [(google.api.field_behavior) = REQUIRED]; // Required. The Dataproc region in which to handle the request. string region = 2 [(google.api.field_behavior) = REQUIRED]; // Required. The cluster name. string cluster_name = 3 [(google.api.field_behavior) = REQUIRED]; // Optional. Specifying the `cluster_uuid` means the RPC will fail // (with error NOT_FOUND) if a cluster with the specified UUID does not exist. string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. A unique ID used to identify the request. If the server // receives two // [StartClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.StartClusterRequest)s // with the same id, then the second request will be ignored and the // first [google.longrunning.Operation][google.longrunning.Operation] created // and stored in the backend is returned. // // Recommendation: Set this value to a // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). // // The ID must contain only letters (a-z, A-Z), numbers (0-9), // underscores (_), and hyphens (-). The maximum length is 40 characters. string request_id = 5 [(google.api.field_behavior) = OPTIONAL]; } // A request to delete a cluster. message DeleteClusterRequest { // Required. The ID of the Google Cloud Platform project that the cluster // belongs to. string project_id = 1 [(google.api.field_behavior) = REQUIRED]; // Required. The Dataproc region in which to handle the request. string region = 3 [(google.api.field_behavior) = REQUIRED]; // Required. The cluster name. string cluster_name = 2 [(google.api.field_behavior) = REQUIRED]; // Optional. Specifying the `cluster_uuid` means the RPC should fail // (with error NOT_FOUND) if cluster with specified UUID does not exist. string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. A unique ID used to identify the request. If the server // receives two // [DeleteClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.DeleteClusterRequest)s // with the same id, then the second request will be ignored and the // first [google.longrunning.Operation][google.longrunning.Operation] created // and stored in the backend is returned. // // It is recommended to always set this value to a // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). // // The ID must contain only letters (a-z, A-Z), numbers (0-9), // underscores (_), and hyphens (-). The maximum length is 40 characters. string request_id = 5 [(google.api.field_behavior) = OPTIONAL]; } // Request to get the resource representation for a cluster in a project. message GetClusterRequest { // Required. The ID of the Google Cloud Platform project that the cluster // belongs to. string project_id = 1 [(google.api.field_behavior) = REQUIRED]; // Required. The Dataproc region in which to handle the request. string region = 3 [(google.api.field_behavior) = REQUIRED]; // Required. The cluster name. string cluster_name = 2 [(google.api.field_behavior) = REQUIRED]; } // A request to list the clusters in a project. message ListClustersRequest { // Required. The ID of the Google Cloud Platform project that the cluster // belongs to. string project_id = 1 [(google.api.field_behavior) = REQUIRED]; // Required. The Dataproc region in which to handle the request. string region = 4 [(google.api.field_behavior) = REQUIRED]; // Optional. A filter constraining the clusters to list. Filters are // case-sensitive and have the following syntax: // // field = value [AND [field = value]] ... // // where **field** is one of `status.state`, `clusterName`, or `labels.[KEY]`, // and `[KEY]` is a label key. **value** can be `*` to match all values. // `status.state` can be one of the following: `ACTIVE`, `INACTIVE`, // `CREATING`, `RUNNING`, `ERROR`, `DELETING`, `UPDATING`, `STOPPING`, or // `STOPPED`. `ACTIVE` contains the `CREATING`, `UPDATING`, and `RUNNING` // states. `INACTIVE` contains the `DELETING`, `ERROR`, `STOPPING`, and // `STOPPED` states. `clusterName` is the name of the cluster provided at // creation time. Only the logical `AND` operator is supported; // space-separated items are treated as having an implicit `AND` operator. // // Example filter: // // status.state = ACTIVE AND clusterName = mycluster // AND labels.env = staging AND labels.starred = * string filter = 5 [(google.api.field_behavior) = OPTIONAL]; // Optional. The standard List page size. int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. The standard List page token. string page_token = 3 [(google.api.field_behavior) = OPTIONAL]; } // The list of all clusters in a project. message ListClustersResponse { // Output only. The clusters in the project. repeated Cluster clusters = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. This token is included in the response if there are more // results to fetch. To fetch additional results, provide this value as the // `page_token` in a subsequent `ListClustersRequest`. string next_page_token = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; } // A request to collect cluster diagnostic information. message DiagnoseClusterRequest { // Defines who has access to the diagnostic tarball enum TarballAccess { // Tarball Access unspecified. Falls back to default access of the bucket TARBALL_ACCESS_UNSPECIFIED = 0; // Google Cloud Support group has read access to the // diagnostic tarball GOOGLE_CLOUD_SUPPORT = 1; // Google Cloud Dataproc Diagnose service account has read access to the // diagnostic tarball GOOGLE_DATAPROC_DIAGNOSE = 2; } // Required. The ID of the Google Cloud Platform project that the cluster // belongs to. string project_id = 1 [(google.api.field_behavior) = REQUIRED]; // Required. The Dataproc region in which to handle the request. string region = 3 [(google.api.field_behavior) = REQUIRED]; // Required. The cluster name. string cluster_name = 2 [(google.api.field_behavior) = REQUIRED]; // Optional. (Optional) The output Cloud Storage directory for the diagnostic // tarball. If not specified, a task-specific directory in the cluster's // staging bucket will be used. string tarball_gcs_dir = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. (Optional) The access type to the diagnostic tarball. If not // specified, falls back to default access of the bucket TarballAccess tarball_access = 5 [(google.api.field_behavior) = OPTIONAL]; // Optional. Time interval in which diagnosis should be carried out on the // cluster. google.type.Interval diagnosis_interval = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. Specifies a list of jobs on which diagnosis is to be performed. // Format: projects/{project}/regions/{region}/jobs/{job} repeated string jobs = 10 [(google.api.field_behavior) = OPTIONAL]; // Optional. Specifies a list of yarn applications on which diagnosis is to be // performed. repeated string yarn_application_ids = 11 [(google.api.field_behavior) = OPTIONAL]; } // The location of diagnostic output. message DiagnoseClusterResults { // Output only. The Cloud Storage URI of the diagnostic output. // The output report is a plain text file with a summary of collected // diagnostics. string output_uri = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; } // Reservation Affinity for consuming Zonal reservation. message ReservationAffinity { // Indicates whether to consume capacity from an reservation or not. enum Type { TYPE_UNSPECIFIED = 0; // Do not consume from any allocated capacity. NO_RESERVATION = 1; // Consume any reservation available. ANY_RESERVATION = 2; // Must consume from a specific reservation. Must specify key value fields // for specifying the reservations. SPECIFIC_RESERVATION = 3; } // Optional. Type of reservation to consume Type consume_reservation_type = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. Corresponds to the label key of reservation resource. string key = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. Corresponds to the label values of reservation resource. repeated string values = 3 [(google.api.field_behavior) = OPTIONAL]; }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy