
google.cloud.dataproc.v1.clusters.proto Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of proto-google-cloud-dataproc-v1 Show documentation
Show all versions of proto-google-cloud-dataproc-v1 Show documentation
PROTO library for proto-google-cloud-dataproc-v1
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.dataproc.v1;
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/dataproc/v1/operations.proto";
import "google/cloud/dataproc/v1/shared.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";
import "google/protobuf/wrappers.proto";
import "google/type/interval.proto";
option go_package = "cloud.google.com/go/dataproc/v2/apiv1/dataprocpb;dataprocpb";
option java_multiple_files = true;
option java_outer_classname = "ClustersProto";
option java_package = "com.google.cloud.dataproc.v1";
// The ClusterControllerService provides methods to manage clusters
// of Compute Engine instances.
service ClusterController {
option (google.api.default_host) = "dataproc.googleapis.com";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/cloud-platform";
// Creates a cluster in a project. The returned
// [Operation.metadata][google.longrunning.Operation.metadata] will be
// [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata).
rpc CreateCluster(CreateClusterRequest)
returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/projects/{project_id}/regions/{region}/clusters"
body: "cluster"
};
option (google.api.method_signature) = "project_id,region,cluster";
option (google.longrunning.operation_info) = {
response_type: "Cluster"
metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
};
}
// Updates a cluster in a project. The returned
// [Operation.metadata][google.longrunning.Operation.metadata] will be
// [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata).
// The cluster must be in a
// [`RUNNING`][google.cloud.dataproc.v1.ClusterStatus.State] state or an error
// is returned.
rpc UpdateCluster(UpdateClusterRequest)
returns (google.longrunning.Operation) {
option (google.api.http) = {
patch: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
body: "cluster"
};
option (google.api.method_signature) =
"project_id,region,cluster_name,cluster,update_mask";
option (google.longrunning.operation_info) = {
response_type: "Cluster"
metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
};
}
// Stops a cluster in a project.
rpc StopCluster(StopClusterRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:stop"
body: "*"
};
option (google.longrunning.operation_info) = {
response_type: "Cluster"
metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
};
}
// Starts a cluster in a project.
rpc StartCluster(StartClusterRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:start"
body: "*"
};
option (google.longrunning.operation_info) = {
response_type: "Cluster"
metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
};
}
// Deletes a cluster in a project. The returned
// [Operation.metadata][google.longrunning.Operation.metadata] will be
// [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata).
rpc DeleteCluster(DeleteClusterRequest)
returns (google.longrunning.Operation) {
option (google.api.http) = {
delete: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
};
option (google.api.method_signature) = "project_id,region,cluster_name";
option (google.longrunning.operation_info) = {
response_type: "google.protobuf.Empty"
metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
};
}
// Gets the resource representation for a cluster in a project.
rpc GetCluster(GetClusterRequest) returns (Cluster) {
option (google.api.http) = {
get: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
};
option (google.api.method_signature) = "project_id,region,cluster_name";
}
// Lists all regions/{region}/clusters in a project alphabetically.
rpc ListClusters(ListClustersRequest) returns (ListClustersResponse) {
option (google.api.http) = {
get: "/v1/projects/{project_id}/regions/{region}/clusters"
};
option (google.api.method_signature) = "project_id,region";
option (google.api.method_signature) = "project_id,region,filter";
}
// Gets cluster diagnostic information. The returned
// [Operation.metadata][google.longrunning.Operation.metadata] will be
// [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata).
// After the operation completes,
// [Operation.response][google.longrunning.Operation.response]
// contains
// [DiagnoseClusterResults](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#diagnoseclusterresults).
rpc DiagnoseCluster(DiagnoseClusterRequest)
returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:diagnose"
body: "*"
};
option (google.api.method_signature) = "project_id,region,cluster_name";
option (google.longrunning.operation_info) = {
response_type: "DiagnoseClusterResults"
metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
};
}
}
// Describes the identifying information, config, and status of
// a Dataproc cluster
message Cluster {
// Required. The Google Cloud Platform project ID that the cluster belongs to.
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
// Required. The cluster name, which must be unique within a project.
// The name must start with a lowercase letter, and can contain
// up to 51 lowercase letters, numbers, and hyphens. It cannot end
// with a hyphen. The name of a deleted cluster can be reused.
string cluster_name = 2 [(google.api.field_behavior) = REQUIRED];
// Optional. The cluster config for a cluster of Compute Engine Instances.
// Note that Dataproc may set default values, and values may change
// when clusters are updated.
//
// Exactly one of ClusterConfig or VirtualClusterConfig must be specified.
ClusterConfig config = 3 [(google.api.field_behavior) = OPTIONAL];
// Optional. The virtual cluster config is used when creating a Dataproc
// cluster that does not directly control the underlying compute resources,
// for example, when creating a [Dataproc-on-GKE
// cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke-overview).
// Dataproc may set default values, and values may change when
// clusters are updated. Exactly one of
// [config][google.cloud.dataproc.v1.Cluster.config] or
// [virtual_cluster_config][google.cloud.dataproc.v1.Cluster.virtual_cluster_config]
// must be specified.
VirtualClusterConfig virtual_cluster_config = 10
[(google.api.field_behavior) = OPTIONAL];
// Optional. The labels to associate with this cluster.
// Label **keys** must contain 1 to 63 characters, and must conform to
// [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
// Label **values** may be empty, but, if present, must contain 1 to 63
// characters, and must conform to [RFC
// 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
// associated with a cluster.
map labels = 8 [(google.api.field_behavior) = OPTIONAL];
// Output only. Cluster status.
ClusterStatus status = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The previous cluster status.
repeated ClusterStatus status_history = 7
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. A cluster UUID (Unique Universal Identifier). Dataproc
// generates this value when it creates the cluster.
string cluster_uuid = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Contains cluster daemon metrics such as HDFS and YARN stats.
//
// **Beta Feature**: This report is available for testing purposes only. It
// may be changed before final release.
ClusterMetrics metrics = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
}
// The cluster config.
message ClusterConfig {
// Optional. A Cloud Storage bucket used to stage job
// dependencies, config files, and job driver console output.
// If you do not specify a staging bucket, Cloud
// Dataproc will determine a Cloud Storage location (US,
// ASIA, or EU) for your cluster's staging bucket according to the
// Compute Engine zone where your cluster is deployed, and then create
// and manage this project-level, per-location bucket (see
// [Dataproc staging and temp
// buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
// **This field requires a Cloud Storage bucket name, not a `gs://...` URI to
// a Cloud Storage bucket.**
string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs
// data, such as Spark and MapReduce history files. If you do not specify a
// temp bucket, Dataproc will determine a Cloud Storage location (US, ASIA, or
// EU) for your cluster's temp bucket according to the Compute Engine zone
// where your cluster is deployed, and then create and manage this
// project-level, per-location bucket. The default bucket has a TTL of 90
// days, but you can use any TTL (or none) if you specify a bucket (see
// [Dataproc staging and temp
// buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
// **This field requires a Cloud Storage bucket name, not a `gs://...` URI to
// a Cloud Storage bucket.**
string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. The shared Compute Engine config settings for
// all instances in a cluster.
GceClusterConfig gce_cluster_config = 8
[(google.api.field_behavior) = OPTIONAL];
// Optional. The Compute Engine config settings for
// the cluster's master instance.
InstanceGroupConfig master_config = 9
[(google.api.field_behavior) = OPTIONAL];
// Optional. The Compute Engine config settings for
// the cluster's worker instances.
InstanceGroupConfig worker_config = 10
[(google.api.field_behavior) = OPTIONAL];
// Optional. The Compute Engine config settings for
// a cluster's secondary worker instances
InstanceGroupConfig secondary_worker_config = 12
[(google.api.field_behavior) = OPTIONAL];
// Optional. The config settings for cluster software.
SoftwareConfig software_config = 13 [(google.api.field_behavior) = OPTIONAL];
// Optional. Commands to execute on each node after config is
// completed. By default, executables are run on master and all worker nodes.
// You can test a node's `role` metadata to run an executable on
// a master or worker node, as shown below using `curl` (you can also use
// `wget`):
//
// ROLE=$(curl -H Metadata-Flavor:Google
// http://metadata/computeMetadata/v1/instance/attributes/dataproc-role)
// if [[ "${ROLE}" == 'Master' ]]; then
// ... master specific actions ...
// else
// ... worker specific actions ...
// fi
repeated NodeInitializationAction initialization_actions = 11
[(google.api.field_behavior) = OPTIONAL];
// Optional. Encryption settings for the cluster.
EncryptionConfig encryption_config = 15
[(google.api.field_behavior) = OPTIONAL];
// Optional. Autoscaling config for the policy associated with the cluster.
// Cluster does not autoscale if this field is unset.
AutoscalingConfig autoscaling_config = 18
[(google.api.field_behavior) = OPTIONAL];
// Optional. Security settings for the cluster.
SecurityConfig security_config = 16 [(google.api.field_behavior) = OPTIONAL];
// Optional. Lifecycle setting for the cluster.
LifecycleConfig lifecycle_config = 17
[(google.api.field_behavior) = OPTIONAL];
// Optional. Port/endpoint configuration for this cluster
EndpointConfig endpoint_config = 19 [(google.api.field_behavior) = OPTIONAL];
// Optional. Metastore configuration.
MetastoreConfig metastore_config = 20
[(google.api.field_behavior) = OPTIONAL];
// Optional. The config for Dataproc metrics.
DataprocMetricConfig dataproc_metric_config = 23
[(google.api.field_behavior) = OPTIONAL];
// Optional. The node group settings.
repeated AuxiliaryNodeGroup auxiliary_node_groups = 25
[(google.api.field_behavior) = OPTIONAL];
}
// The Dataproc cluster config for a cluster that does not directly control the
// underlying compute resources, such as a [Dataproc-on-GKE
// cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke-overview).
message VirtualClusterConfig {
// Optional. A Cloud Storage bucket used to stage job
// dependencies, config files, and job driver console output.
// If you do not specify a staging bucket, Cloud
// Dataproc will determine a Cloud Storage location (US,
// ASIA, or EU) for your cluster's staging bucket according to the
// Compute Engine zone where your cluster is deployed, and then create
// and manage this project-level, per-location bucket (see
// [Dataproc staging and temp
// buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
// **This field requires a Cloud Storage bucket name, not a `gs://...` URI to
// a Cloud Storage bucket.**
string staging_bucket = 1 [(google.api.field_behavior) = OPTIONAL];
oneof infrastructure_config {
// Required. The configuration for running the Dataproc cluster on
// Kubernetes.
KubernetesClusterConfig kubernetes_cluster_config = 6
[(google.api.field_behavior) = REQUIRED];
}
// Optional. Configuration of auxiliary services used by this cluster.
AuxiliaryServicesConfig auxiliary_services_config = 7
[(google.api.field_behavior) = OPTIONAL];
}
// Auxiliary services configuration for a Cluster.
message AuxiliaryServicesConfig {
// Optional. The Hive Metastore configuration for this workload.
MetastoreConfig metastore_config = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. The Spark History Server configuration for the workload.
SparkHistoryServerConfig spark_history_server_config = 2
[(google.api.field_behavior) = OPTIONAL];
}
// Endpoint config for this cluster
message EndpointConfig {
// Output only. The map of port descriptions to URLs. Will only be populated
// if enable_http_port_access is true.
map http_ports = 1
[(google.api.field_behavior) = OUTPUT_ONLY];
// Optional. If true, enable http access to specific ports on the cluster
// from external sources. Defaults to false.
bool enable_http_port_access = 2 [(google.api.field_behavior) = OPTIONAL];
}
// Autoscaling Policy config associated with the cluster.
message AutoscalingConfig {
// Optional. The autoscaling policy used by the cluster.
//
// Only resource names including projectid and location (region) are valid.
// Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]`
// * `projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]`
//
// Note that the policy must be in the same project and Dataproc region.
string policy_uri = 1 [(google.api.field_behavior) = OPTIONAL];
}
// Encryption settings for the cluster.
message EncryptionConfig {
// Optional. The Cloud KMS key resource name to use for persistent disk
// encryption for all instances in the cluster. See [Use CMEK with cluster
// data]
// (https://cloud.google.com//dataproc/docs/concepts/configuring-clusters/customer-managed-encryption#use_cmek_with_cluster_data)
// for more information.
string gce_pd_kms_key_name = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. The Cloud KMS key resource name to use for cluster persistent
// disk and job argument encryption. See [Use CMEK with cluster data]
// (https://cloud.google.com//dataproc/docs/concepts/configuring-clusters/customer-managed-encryption#use_cmek_with_cluster_data)
// for more information.
//
// When this key resource name is provided, the following job arguments of
// the following job types submitted to the cluster are encrypted using CMEK:
//
// * [FlinkJob
// args](https://cloud.google.com/dataproc/docs/reference/rest/v1/FlinkJob)
// * [HadoopJob
// args](https://cloud.google.com/dataproc/docs/reference/rest/v1/HadoopJob)
// * [SparkJob
// args](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkJob)
// * [SparkRJob
// args](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkRJob)
// * [PySparkJob
// args](https://cloud.google.com/dataproc/docs/reference/rest/v1/PySparkJob)
// * [SparkSqlJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkSqlJob)
// scriptVariables and queryList.queries
// * [HiveJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HiveJob)
// scriptVariables and queryList.queries
// * [PigJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PigJob)
// scriptVariables and queryList.queries
// * [PrestoJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PrestoJob)
// scriptVariables and queryList.queries
string kms_key = 2 [
(google.api.field_behavior) = OPTIONAL,
(google.api.resource_reference) = {
type: "cloudkms.googleapis.com/CryptoKey"
}
];
}
// Common config settings for resources of Compute Engine cluster
// instances, applicable to all instances in the cluster.
message GceClusterConfig {
// `PrivateIpv6GoogleAccess` controls whether and how Dataproc cluster nodes
// can communicate with Google Services through gRPC over IPv6.
// These values are directly mapped to corresponding values in the
// [Compute Engine Instance
// fields](https://cloud.google.com/compute/docs/reference/rest/v1/instances).
enum PrivateIpv6GoogleAccess {
// If unspecified, Compute Engine default behavior will apply, which
// is the same as
// [INHERIT_FROM_SUBNETWORK][google.cloud.dataproc.v1.GceClusterConfig.PrivateIpv6GoogleAccess.INHERIT_FROM_SUBNETWORK].
PRIVATE_IPV6_GOOGLE_ACCESS_UNSPECIFIED = 0;
// Private access to and from Google Services configuration
// inherited from the subnetwork configuration. This is the
// default Compute Engine behavior.
INHERIT_FROM_SUBNETWORK = 1;
// Enables outbound private IPv6 access to Google Services from the Dataproc
// cluster.
OUTBOUND = 2;
// Enables bidirectional private IPv6 access between Google Services and the
// Dataproc cluster.
BIDIRECTIONAL = 3;
}
// Optional. The Compute Engine zone where the Dataproc cluster will be
// located. If omitted, the service will pick a zone in the cluster's Compute
// Engine region. On a get request, zone will always be present.
//
// A full URL, partial URI, or short name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`
// * `projects/[project_id]/zones/[zone]`
// * `[zone]`
string zone_uri = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. The Compute Engine network to be used for machine
// communications. Cannot be specified with subnetwork_uri. If neither
// `network_uri` nor `subnetwork_uri` is specified, the "default" network of
// the project is used, if it exists. Cannot be a "Custom Subnet Network" (see
// [Using Subnetworks](https://cloud.google.com/compute/docs/subnetworks) for
// more information).
//
// A full URL, partial URI, or short name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/networks/default`
// * `projects/[project_id]/global/networks/default`
// * `default`
string network_uri = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. The Compute Engine subnetwork to be used for machine
// communications. Cannot be specified with network_uri.
//
// A full URL, partial URI, or short name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/[region]/subnetworks/sub0`
// * `projects/[project_id]/regions/[region]/subnetworks/sub0`
// * `sub0`
string subnetwork_uri = 6 [(google.api.field_behavior) = OPTIONAL];
// Optional. This setting applies to subnetwork-enabled networks. It is set to
// `true` by default in clusters created with image versions 2.2.x.
//
// When set to `true`:
//
// * All cluster VMs have internal IP addresses.
// * [Google Private Access]
// (https://cloud.google.com/vpc/docs/private-google-access)
// must be enabled to access Dataproc and other Google Cloud APIs.
// * Off-cluster dependencies must be configured to be accessible
// without external IP addresses.
//
// When set to `false`:
//
// * Cluster VMs are not restricted to internal IP addresses.
// * Ephemeral external IP addresses are assigned to each cluster VM.
optional bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];
// Optional. The type of IPv6 access for a cluster.
PrivateIpv6GoogleAccess private_ipv6_google_access = 12
[(google.api.field_behavior) = OPTIONAL];
// Optional. The [Dataproc service
// account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc)
// (also see [VM Data Plane
// identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity))
// used by Dataproc cluster VM instances to access Google Cloud Platform
// services.
//
// If not specified, the
// [Compute Engine default service
// account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account)
// is used.
string service_account = 8 [(google.api.field_behavior) = OPTIONAL];
// Optional. The URIs of service account scopes to be included in
// Compute Engine instances. The following base set of scopes is always
// included:
//
// * https://www.googleapis.com/auth/cloud.useraccounts.readonly
// * https://www.googleapis.com/auth/devstorage.read_write
// * https://www.googleapis.com/auth/logging.write
//
// If no scopes are specified, the following defaults are also provided:
//
// * https://www.googleapis.com/auth/bigquery
// * https://www.googleapis.com/auth/bigtable.admin.table
// * https://www.googleapis.com/auth/bigtable.data
// * https://www.googleapis.com/auth/devstorage.full_control
repeated string service_account_scopes = 3
[(google.api.field_behavior) = OPTIONAL];
// The Compute Engine network tags to add to all instances (see [Tagging
// instances](https://cloud.google.com/vpc/docs/add-remove-network-tags)).
repeated string tags = 4;
// Optional. The Compute Engine metadata entries to add to all instances (see
// [Project and instance
// metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)).
map metadata = 5 [(google.api.field_behavior) = OPTIONAL];
// Optional. Reservation Affinity for consuming Zonal reservation.
ReservationAffinity reservation_affinity = 11
[(google.api.field_behavior) = OPTIONAL];
// Optional. Node Group Affinity for sole-tenant clusters.
NodeGroupAffinity node_group_affinity = 13
[(google.api.field_behavior) = OPTIONAL];
// Optional. Shielded Instance Config for clusters using [Compute Engine
// Shielded
// VMs](https://cloud.google.com/security/shielded-cloud/shielded-vm).
ShieldedInstanceConfig shielded_instance_config = 14
[(google.api.field_behavior) = OPTIONAL];
// Optional. Confidential Instance Config for clusters using [Confidential
// VMs](https://cloud.google.com/compute/confidential-vm/docs).
ConfidentialInstanceConfig confidential_instance_config = 15
[(google.api.field_behavior) = OPTIONAL];
}
// Node Group Affinity for clusters using sole-tenant node groups.
// **The Dataproc `NodeGroupAffinity` resource is not related to the
// Dataproc [NodeGroup][google.cloud.dataproc.v1.NodeGroup] resource.**
message NodeGroupAffinity {
// Required. The URI of a
// sole-tenant [node group
// resource](https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups)
// that the cluster will be created on.
//
// A full URL, partial URI, or node group name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/nodeGroups/node-group-1`
// * `projects/[project_id]/zones/[zone]/nodeGroups/node-group-1`
// * `node-group-1`
string node_group_uri = 1 [(google.api.field_behavior) = REQUIRED];
}
// Shielded Instance Config for clusters using [Compute Engine Shielded
// VMs](https://cloud.google.com/security/shielded-cloud/shielded-vm).
message ShieldedInstanceConfig {
// Optional. Defines whether instances have Secure Boot enabled.
optional bool enable_secure_boot = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Defines whether instances have the vTPM enabled.
optional bool enable_vtpm = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. Defines whether instances have integrity monitoring enabled.
optional bool enable_integrity_monitoring = 3
[(google.api.field_behavior) = OPTIONAL];
}
// Confidential Instance Config for clusters using [Confidential
// VMs](https://cloud.google.com/compute/confidential-vm/docs)
message ConfidentialInstanceConfig {
// Optional. Defines whether the instance should have confidential compute
// enabled.
bool enable_confidential_compute = 1 [(google.api.field_behavior) = OPTIONAL];
}
// The config settings for Compute Engine resources in
// an instance group, such as a master or worker group.
message InstanceGroupConfig {
// Controls the use of preemptible instances within the group.
enum Preemptibility {
// Preemptibility is unspecified, the system will choose the
// appropriate setting for each instance group.
PREEMPTIBILITY_UNSPECIFIED = 0;
// Instances are non-preemptible.
//
// This option is allowed for all instance groups and is the only valid
// value for Master and Worker instance groups.
NON_PREEMPTIBLE = 1;
// Instances are [preemptible]
// (https://cloud.google.com/compute/docs/instances/preemptible).
//
// This option is allowed only for [secondary worker]
// (https://cloud.google.com/dataproc/docs/concepts/compute/secondary-vms)
// groups.
PREEMPTIBLE = 2;
// Instances are [Spot VMs]
// (https://cloud.google.com/compute/docs/instances/spot).
//
// This option is allowed only for [secondary worker]
// (https://cloud.google.com/dataproc/docs/concepts/compute/secondary-vms)
// groups. Spot VMs are the latest version of [preemptible VMs]
// (https://cloud.google.com/compute/docs/instances/preemptible), and
// provide additional features.
SPOT = 3;
}
// Optional. The number of VM instances in the instance group.
// For [HA
// cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
// [master_config](#FIELDS.master_config) groups, **must be set to 3**.
// For standard cluster [master_config](#FIELDS.master_config) groups,
// **must be set to 1**.
int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL];
// Output only. The list of instance names. Dataproc derives the names
// from `cluster_name`, `num_instances`, and the instance group.
repeated string instance_names = 2
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. List of references to Compute Engine instances.
repeated InstanceReference instance_references = 11
[(google.api.field_behavior) = OUTPUT_ONLY];
// Optional. The Compute Engine image resource used for cluster instances.
//
// The URI can represent an image or image family.
//
// Image examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/images/[image-id]`
// * `projects/[project_id]/global/images/[image-id]`
// * `image-id`
//
// Image family examples. Dataproc will use the most recent
// image from the family:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/images/family/[custom-image-family-name]`
// * `projects/[project_id]/global/images/family/[custom-image-family-name]`
//
// If the URI is unspecified, it will be inferred from
// `SoftwareConfig.image_version` or the system default.
string image_uri = 3 [(google.api.field_behavior) = OPTIONAL];
// Optional. The Compute Engine machine type used for cluster instances.
//
// A full URL, partial URI, or short name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/machineTypes/n1-standard-2`
// * `projects/[project_id]/zones/[zone]/machineTypes/n1-standard-2`
// * `n1-standard-2`
//
// **Auto Zone Exception**: If you are using the Dataproc
// [Auto Zone
// Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
// feature, you must use the short name of the machine type
// resource, for example, `n1-standard-2`.
string machine_type_uri = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. Disk option config settings.
DiskConfig disk_config = 5 [(google.api.field_behavior) = OPTIONAL];
// Output only. Specifies that this instance group contains preemptible
// instances.
bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
// Optional. Specifies the preemptibility of the instance group.
//
// The default value for master and worker groups is
// `NON_PREEMPTIBLE`. This default cannot be changed.
//
// The default value for secondary instances is
// `PREEMPTIBLE`.
Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL];
// Output only. The config for Compute Engine Instance Group
// Manager that manages this group.
// This is only used for preemptible instance groups.
ManagedGroupConfig managed_group_config = 7
[(google.api.field_behavior) = OUTPUT_ONLY];
// Optional. The Compute Engine accelerator configuration for these
// instances.
repeated AcceleratorConfig accelerators = 8
[(google.api.field_behavior) = OPTIONAL];
// Optional. Specifies the minimum cpu platform for the Instance Group.
// See [Dataproc -> Minimum CPU
// Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
string min_cpu_platform = 9 [(google.api.field_behavior) = OPTIONAL];
// Optional. The minimum number of primary worker instances to create.
// If `min_num_instances` is set, cluster creation will succeed if
// the number of primary workers created is at least equal to the
// `min_num_instances` number.
//
// Example: Cluster creation request with `num_instances` = `5` and
// `min_num_instances` = `3`:
//
// * If 4 VMs are created and 1 instance fails,
// the failed VM is deleted. The cluster is
// resized to 4 instances and placed in a `RUNNING` state.
// * If 2 instances are created and 3 instances fail,
// the cluster in placed in an `ERROR` state. The failed VMs
// are not deleted.
int32 min_num_instances = 12 [(google.api.field_behavior) = OPTIONAL];
// Optional. Instance flexibility Policy allowing a mixture of VM shapes and
// provisioning models.
InstanceFlexibilityPolicy instance_flexibility_policy = 13
[(google.api.field_behavior) = OPTIONAL];
// Optional. Configuration to handle the startup of instances during cluster
// create and update process.
StartupConfig startup_config = 14 [(google.api.field_behavior) = OPTIONAL];
}
// Configuration to handle the startup of instances during cluster create and
// update process.
message StartupConfig {
// Optional. The config setting to enable cluster creation/ updation to be
// successful only after required_registration_fraction of instances are up
// and running. This configuration is applicable to only secondary workers for
// now. The cluster will fail if required_registration_fraction of instances
// are not available. This will include instance creation, agent registration,
// and service registration (if enabled).
optional double required_registration_fraction = 1
[(google.api.field_behavior) = OPTIONAL];
}
// A reference to a Compute Engine instance.
message InstanceReference {
// The user-friendly name of the Compute Engine instance.
string instance_name = 1;
// The unique identifier of the Compute Engine instance.
string instance_id = 2;
// The public RSA key used for sharing data with this instance.
string public_key = 3;
// The public ECIES key used for sharing data with this instance.
string public_ecies_key = 4;
}
// Specifies the resources used to actively manage an instance group.
message ManagedGroupConfig {
// Output only. The name of the Instance Template used for the Managed
// Instance Group.
string instance_template_name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The name of the Instance Group Manager for this group.
string instance_group_manager_name = 2
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The partial URI to the instance group manager for this group.
// E.g. projects/my-project/regions/us-central1/instanceGroupManagers/my-igm.
string instance_group_manager_uri = 3
[(google.api.field_behavior) = OUTPUT_ONLY];
}
// Instance flexibility Policy allowing a mixture of VM shapes and provisioning
// models.
message InstanceFlexibilityPolicy {
// Defines machines types and a rank to which the machines types belong.
message InstanceSelection {
// Optional. Full machine-type names, e.g. "n1-standard-16".
repeated string machine_types = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Preference of this instance selection. Lower number means
// higher preference. Dataproc will first try to create a VM based on the
// machine-type with priority rank and fallback to next rank based on
// availability. Machine types and instance selections with the same
// priority have the same preference.
int32 rank = 2 [(google.api.field_behavior) = OPTIONAL];
}
// Defines a mapping from machine types to the number of VMs that are created
// with each machine type.
message InstanceSelectionResult {
// Output only. Full machine-type names, e.g. "n1-standard-16".
optional string machine_type = 1
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Number of VM provisioned with the machine_type.
optional int32 vm_count = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
}
// Optional. List of instance selection options that the group will use when
// creating new VMs.
repeated InstanceSelection instance_selection_list = 2
[(google.api.field_behavior) = OPTIONAL];
// Output only. A list of instance selection results in the group.
repeated InstanceSelectionResult instance_selection_results = 3
[(google.api.field_behavior) = OUTPUT_ONLY];
}
// Specifies the type and number of accelerator cards attached to the instances
// of an instance. See [GPUs on Compute
// Engine](https://cloud.google.com/compute/docs/gpus/).
message AcceleratorConfig {
// Full URL, partial URI, or short name of the accelerator type resource to
// expose to this instance. See
// [Compute Engine
// AcceleratorTypes](https://cloud.google.com/compute/docs/reference/v1/acceleratorTypes).
//
// Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-t4`
// * `projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-t4`
// * `nvidia-tesla-t4`
//
// **Auto Zone Exception**: If you are using the Dataproc
// [Auto Zone
// Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
// feature, you must use the short name of the accelerator type
// resource, for example, `nvidia-tesla-t4`.
string accelerator_type_uri = 1;
// The number of the accelerator cards of this type exposed to this instance.
int32 accelerator_count = 2;
}
// Specifies the config of disk options for a group of VM instances.
message DiskConfig {
// Optional. Type of the boot disk (default is "pd-standard").
// Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive),
// "pd-ssd" (Persistent Disk Solid State Drive),
// or "pd-standard" (Persistent Disk Hard Disk Drive).
// See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types).
string boot_disk_type = 3 [(google.api.field_behavior) = OPTIONAL];
// Optional. Size in GB of the boot disk (default is 500GB).
int32 boot_disk_size_gb = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Number of attached SSDs, from 0 to 8 (default is 0).
// If SSDs are not attached, the boot disk is used to store runtime logs and
// [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data.
// If one or more SSDs are attached, this runtime bulk
// data is spread across them, and the boot disk contains only basic
// config and installed binaries.
//
// Note: Local SSD options may vary by machine type and number of vCPUs
// selected.
int32 num_local_ssds = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. Interface type of local SSDs (default is "scsi").
// Valid values: "scsi" (Small Computer System Interface),
// "nvme" (Non-Volatile Memory Express).
// See [local SSD
// performance](https://cloud.google.com/compute/docs/disks/local-ssd#performance).
string local_ssd_interface = 4 [(google.api.field_behavior) = OPTIONAL];
}
// Node group identification and configuration information.
message AuxiliaryNodeGroup {
// Required. Node group configuration.
NodeGroup node_group = 1 [(google.api.field_behavior) = REQUIRED];
// Optional. A node group ID. Generated if not specified.
//
// The ID must contain only letters (a-z, A-Z), numbers (0-9),
// underscores (_), and hyphens (-). Cannot begin or end with underscore
// or hyphen. Must consist of from 3 to 33 characters.
string node_group_id = 2 [(google.api.field_behavior) = OPTIONAL];
}
// Dataproc Node Group.
// **The Dataproc `NodeGroup` resource is not related to the
// Dataproc [NodeGroupAffinity][google.cloud.dataproc.v1.NodeGroupAffinity]
// resource.**
message NodeGroup {
option (google.api.resource) = {
type: "dataproc.googleapis.com/NodeGroup"
pattern: "projects/{project}/regions/{region}/clusters/{cluster}/nodeGroups/{node_group}"
};
// Node pool roles.
enum Role {
// Required unspecified role.
ROLE_UNSPECIFIED = 0;
// Job drivers run on the node pool.
DRIVER = 1;
}
// The Node group [resource name](https://aip.dev/122).
string name = 1;
// Required. Node group roles.
repeated Role roles = 2 [(google.api.field_behavior) = REQUIRED];
// Optional. The node group instance group configuration.
InstanceGroupConfig node_group_config = 3
[(google.api.field_behavior) = OPTIONAL];
// Optional. Node group labels.
//
// * Label **keys** must consist of from 1 to 63 characters and conform to
// [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
// * Label **values** can be empty. If specified, they must consist of from
// 1 to 63 characters and conform to [RFC 1035]
// (https://www.ietf.org/rfc/rfc1035.txt).
// * The node group must have no more than 32 labels.
map labels = 4 [(google.api.field_behavior) = OPTIONAL];
}
// Specifies an executable to run on a fully configured node and a
// timeout period for executable completion.
message NodeInitializationAction {
// Required. Cloud Storage URI of executable file.
string executable_file = 1 [(google.api.field_behavior) = REQUIRED];
// Optional. Amount of time executable has to complete. Default is
// 10 minutes (see JSON representation of
// [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
//
// Cluster creation fails with an explanatory error message (the
// name of the executable that caused the error and the exceeded timeout
// period) if the executable is not completed at end of the timeout period.
google.protobuf.Duration execution_timeout = 2
[(google.api.field_behavior) = OPTIONAL];
}
// The status of a cluster and its instances.
message ClusterStatus {
// The cluster state.
enum State {
// The cluster state is unknown.
UNKNOWN = 0;
// The cluster is being created and set up. It is not ready for use.
CREATING = 1;
// The cluster is currently running and healthy. It is ready for use.
//
// **Note:** The cluster state changes from "creating" to "running" status
// after the master node(s), first two primary worker nodes (and the last
// primary worker node if primary workers > 2) are running.
RUNNING = 2;
// The cluster encountered an error. It is not ready for use.
ERROR = 3;
// The cluster has encountered an error while being updated. Jobs can
// be submitted to the cluster, but the cluster cannot be updated.
ERROR_DUE_TO_UPDATE = 9;
// The cluster is being deleted. It cannot be used.
DELETING = 4;
// The cluster is being updated. It continues to accept and process jobs.
UPDATING = 5;
// The cluster is being stopped. It cannot be used.
STOPPING = 6;
// The cluster is currently stopped. It is not ready for use.
STOPPED = 7;
// The cluster is being started. It is not ready for use.
STARTING = 8;
// The cluster is being repaired. It is not ready for use.
REPAIRING = 10;
}
// The cluster substate.
enum Substate {
// The cluster substate is unknown.
UNSPECIFIED = 0;
// The cluster is known to be in an unhealthy state
// (for example, critical daemons are not running or HDFS capacity is
// exhausted).
//
// Applies to RUNNING state.
UNHEALTHY = 1;
// The agent-reported status is out of date (may occur if
// Dataproc loses communication with Agent).
//
// Applies to RUNNING state.
STALE_STATUS = 2;
}
// Output only. The cluster's state.
State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
// Optional. Output only. Details of cluster's state.
string detail = 2 [
(google.api.field_behavior) = OUTPUT_ONLY,
(google.api.field_behavior) = OPTIONAL
];
// Output only. Time when this state was entered (see JSON representation of
// [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
google.protobuf.Timestamp state_start_time = 3
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Additional state information that includes
// status reported by the agent.
Substate substate = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
}
// Security related configuration, including encryption, Kerberos, etc.
message SecurityConfig {
// Optional. Kerberos related configuration.
KerberosConfig kerberos_config = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Identity related configuration, including service account based
// secure multi-tenancy user mappings.
IdentityConfig identity_config = 2 [(google.api.field_behavior) = OPTIONAL];
}
// Specifies Kerberos related configuration.
message KerberosConfig {
// Optional. Flag to indicate whether to Kerberize the cluster (default:
// false). Set this field to true to enable Kerberos on a cluster.
bool enable_kerberos = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. The Cloud Storage URI of a KMS encrypted file containing the root
// principal password.
string root_principal_password_uri = 2
[(google.api.field_behavior) = OPTIONAL];
// Optional. The URI of the KMS key used to encrypt sensitive
// files.
string kms_key_uri = 3 [(google.api.field_behavior) = OPTIONAL];
// Optional. The Cloud Storage URI of the keystore file used for SSL
// encryption. If not provided, Dataproc will provide a self-signed
// certificate.
string keystore_uri = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. The Cloud Storage URI of the truststore file used for SSL
// encryption. If not provided, Dataproc will provide a self-signed
// certificate.
string truststore_uri = 5 [(google.api.field_behavior) = OPTIONAL];
// Optional. The Cloud Storage URI of a KMS encrypted file containing the
// password to the user provided keystore. For the self-signed certificate,
// this password is generated by Dataproc.
string keystore_password_uri = 6 [(google.api.field_behavior) = OPTIONAL];
// Optional. The Cloud Storage URI of a KMS encrypted file containing the
// password to the user provided key. For the self-signed certificate, this
// password is generated by Dataproc.
string key_password_uri = 7 [(google.api.field_behavior) = OPTIONAL];
// Optional. The Cloud Storage URI of a KMS encrypted file containing the
// password to the user provided truststore. For the self-signed certificate,
// this password is generated by Dataproc.
string truststore_password_uri = 8 [(google.api.field_behavior) = OPTIONAL];
// Optional. The remote realm the Dataproc on-cluster KDC will trust, should
// the user enable cross realm trust.
string cross_realm_trust_realm = 9 [(google.api.field_behavior) = OPTIONAL];
// Optional. The KDC (IP or hostname) for the remote trusted realm in a cross
// realm trust relationship.
string cross_realm_trust_kdc = 10 [(google.api.field_behavior) = OPTIONAL];
// Optional. The admin server (IP or hostname) for the remote trusted realm in
// a cross realm trust relationship.
string cross_realm_trust_admin_server = 11
[(google.api.field_behavior) = OPTIONAL];
// Optional. The Cloud Storage URI of a KMS encrypted file containing the
// shared password between the on-cluster Kerberos realm and the remote
// trusted realm, in a cross realm trust relationship.
string cross_realm_trust_shared_password_uri = 12
[(google.api.field_behavior) = OPTIONAL];
// Optional. The Cloud Storage URI of a KMS encrypted file containing the
// master key of the KDC database.
string kdc_db_key_uri = 13 [(google.api.field_behavior) = OPTIONAL];
// Optional. The lifetime of the ticket granting ticket, in hours.
// If not specified, or user specifies 0, then default value 10
// will be used.
int32 tgt_lifetime_hours = 14 [(google.api.field_behavior) = OPTIONAL];
// Optional. The name of the on-cluster Kerberos realm.
// If not specified, the uppercased domain of hostnames will be the realm.
string realm = 15 [(google.api.field_behavior) = OPTIONAL];
}
// Identity related configuration, including service account based
// secure multi-tenancy user mappings.
message IdentityConfig {
// Required. Map of user to service account.
map user_service_account_mapping = 1
[(google.api.field_behavior) = REQUIRED];
}
// Specifies the selection and config of software inside the cluster.
message SoftwareConfig {
// Optional. The version of software inside the cluster. It must be one of the
// supported [Dataproc
// Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported-dataproc-image-versions),
// such as "1.2" (including a subminor version, such as "1.2.29"), or the
// ["preview"
// version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
// If unspecified, it defaults to the latest Debian version.
string image_version = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. The properties to set on daemon config files.
//
// Property keys are specified in `prefix:property` format, for example
// `core:hadoop.tmp.dir`. The following are supported prefixes
// and their mappings:
//
// * capacity-scheduler: `capacity-scheduler.xml`
// * core: `core-site.xml`
// * distcp: `distcp-default.xml`
// * hdfs: `hdfs-site.xml`
// * hive: `hive-site.xml`
// * mapred: `mapred-site.xml`
// * pig: `pig.properties`
// * spark: `spark-defaults.conf`
// * yarn: `yarn-site.xml`
//
// For more information, see [Cluster
// properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).
map properties = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. The set of components to activate on the cluster.
repeated Component optional_components = 3
[(google.api.field_behavior) = OPTIONAL];
}
// Specifies the cluster auto-delete schedule configuration.
message LifecycleConfig {
// Optional. The duration to keep the cluster alive while idling (when no jobs
// are running). Passing this threshold will cause the cluster to be
// deleted. Minimum value is 5 minutes; maximum value is 14 days (see JSON
// representation of
// [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
google.protobuf.Duration idle_delete_ttl = 1
[(google.api.field_behavior) = OPTIONAL];
// Either the exact time the cluster should be deleted at or
// the cluster maximum age.
oneof ttl {
// Optional. The time when cluster will be auto-deleted (see JSON
// representation of
// [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
google.protobuf.Timestamp auto_delete_time = 2
[(google.api.field_behavior) = OPTIONAL];
// Optional. The lifetime duration of cluster. The cluster will be
// auto-deleted at the end of this period. Minimum value is 10 minutes;
// maximum value is 14 days (see JSON representation of
// [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
google.protobuf.Duration auto_delete_ttl = 3
[(google.api.field_behavior) = OPTIONAL];
}
// Output only. The time when cluster became idle (most recent job finished)
// and became eligible for deletion due to idleness (see JSON representation
// of
// [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
google.protobuf.Timestamp idle_start_time = 4
[(google.api.field_behavior) = OUTPUT_ONLY];
}
// Specifies a Metastore configuration.
message MetastoreConfig {
// Required. Resource name of an existing Dataproc Metastore service.
//
// Example:
//
// * `projects/[project_id]/locations/[dataproc_region]/services/[service-name]`
string dataproc_metastore_service = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "metastore.googleapis.com/Service"
}
];
}
// Contains cluster daemon metrics, such as HDFS and YARN stats.
//
// **Beta Feature**: This report is available for testing purposes only. It may
// be changed before final release.
message ClusterMetrics {
// The HDFS metrics.
map hdfs_metrics = 1;
// YARN metrics.
map yarn_metrics = 2;
}
// Dataproc metric config.
message DataprocMetricConfig {
// A source for the collection of Dataproc custom metrics (see [Custom
// metrics]
// (https://cloud.google.com//dataproc/docs/guides/dataproc-metrics#custom_metrics)).
enum MetricSource {
// Required unspecified metric source.
METRIC_SOURCE_UNSPECIFIED = 0;
// Monitoring agent metrics. If this source is enabled,
// Dataproc enables the monitoring agent in Compute Engine,
// and collects monitoring agent metrics, which are published
// with an `agent.googleapis.com` prefix.
MONITORING_AGENT_DEFAULTS = 1;
// HDFS metric source.
HDFS = 2;
// Spark metric source.
SPARK = 3;
// YARN metric source.
YARN = 4;
// Spark History Server metric source.
SPARK_HISTORY_SERVER = 5;
// Hiveserver2 metric source.
HIVESERVER2 = 6;
// hivemetastore metric source
HIVEMETASTORE = 7;
// flink metric source
FLINK = 8;
}
// A Dataproc custom metric.
message Metric {
// Required. A standard set of metrics is collected unless `metricOverrides`
// are specified for the metric source (see [Custom metrics]
// (https://cloud.google.com/dataproc/docs/guides/dataproc-metrics#custom_metrics)
// for more information).
MetricSource metric_source = 1 [(google.api.field_behavior) = REQUIRED];
// Optional. Specify one or more [Custom metrics]
// (https://cloud.google.com/dataproc/docs/guides/dataproc-metrics#custom_metrics)
// to collect for the metric course (for the `SPARK` metric source (any
// [Spark metric]
// (https://spark.apache.org/docs/latest/monitoring.html#metrics) can be
// specified).
//
// Provide metrics in the following format:
// METRIC_SOURCE:INSTANCE:GROUP:METRIC
// Use camelcase as appropriate.
//
// Examples:
//
// ```
// yarn:ResourceManager:QueueMetrics:AppsCompleted
// spark:driver:DAGScheduler:job.allJobs
// sparkHistoryServer:JVM:Memory:NonHeapMemoryUsage.committed
// hiveserver2:JVM:Memory:NonHeapMemoryUsage.used
// ```
//
// Notes:
//
// * Only the specified overridden metrics are collected for the
// metric source. For example, if one or more `spark:executive` metrics
// are listed as metric overrides, other `SPARK` metrics are not
// collected. The collection of the metrics for other enabled custom
// metric sources is unaffected. For example, if both `SPARK` andd `YARN`
// metric sources are enabled, and overrides are provided for Spark
// metrics only, all YARN metrics are collected.
repeated string metric_overrides = 2
[(google.api.field_behavior) = OPTIONAL];
}
// Required. Metrics sources to enable.
repeated Metric metrics = 1 [(google.api.field_behavior) = REQUIRED];
}
// A request to create a cluster.
message CreateClusterRequest {
// Required. The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
// Required. The Dataproc region in which to handle the request.
string region = 3 [(google.api.field_behavior) = REQUIRED];
// Required. The cluster to create.
Cluster cluster = 2 [(google.api.field_behavior) = REQUIRED];
// Optional. A unique ID used to identify the request. If the server receives
// two
// [CreateClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.CreateClusterRequest)s
// with the same id, then the second request will be ignored and the
// first [google.longrunning.Operation][google.longrunning.Operation] created
// and stored in the backend is returned.
//
// It is recommended to always set this value to a
// [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
//
// The ID must contain only letters (a-z, A-Z), numbers (0-9),
// underscores (_), and hyphens (-). The maximum length is 40 characters.
string request_id = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. Failure action when primary worker creation fails.
FailureAction action_on_failed_primary_workers = 5
[(google.api.field_behavior) = OPTIONAL];
}
// A request to update a cluster.
message UpdateClusterRequest {
// Required. The ID of the Google Cloud Platform project the
// cluster belongs to.
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
// Required. The Dataproc region in which to handle the request.
string region = 5 [(google.api.field_behavior) = REQUIRED];
// Required. The cluster name.
string cluster_name = 2 [(google.api.field_behavior) = REQUIRED];
// Required. The changes to the cluster.
Cluster cluster = 3 [(google.api.field_behavior) = REQUIRED];
// Optional. Timeout for graceful YARN decommissioning. Graceful
// decommissioning allows removing nodes from the cluster without
// interrupting jobs in progress. Timeout specifies how long to wait for jobs
// in progress to finish before forcefully removing nodes (and potentially
// interrupting jobs). Default timeout is 0 (for forceful decommission), and
// the maximum allowed timeout is 1 day. (see JSON representation of
// [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
//
// Only supported on Dataproc image versions 1.2 and higher.
google.protobuf.Duration graceful_decommission_timeout = 6
[(google.api.field_behavior) = OPTIONAL];
// Required. Specifies the path, relative to `Cluster`, of
// the field to update. For example, to change the number of workers
// in a cluster to 5, the `update_mask` parameter would be
// specified as `config.worker_config.num_instances`,
// and the `PATCH` request body would specify the new value, as follows:
//
// {
// "config":{
// "workerConfig":{
// "numInstances":"5"
// }
// }
// }
// Similarly, to change the number of preemptible workers in a cluster to 5,
// the `update_mask` parameter would be
// `config.secondary_worker_config.num_instances`, and the `PATCH` request
// body would be set as follows:
//
// {
// "config":{
// "secondaryWorkerConfig":{
// "numInstances":"5"
// }
// }
// }
// Note: Currently, only the following fields can be updated:
//
//
//
//
// Mask
// Purpose
//
//
// labels
// Update labels
//
//
// config.worker_config.num_instances
// Resize primary worker group
//
//
// config.secondary_worker_config.num_instances
// Resize secondary worker group
//
//
// config.autoscaling_config.policy_uri Use, stop using, or
// change autoscaling policies
//
//
//
google.protobuf.FieldMask update_mask = 4
[(google.api.field_behavior) = REQUIRED];
// Optional. A unique ID used to identify the request. If the server
// receives two
// [UpdateClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.UpdateClusterRequest)s
// with the same id, then the second request will be ignored and the
// first [google.longrunning.Operation][google.longrunning.Operation] created
// and stored in the backend is returned.
//
// It is recommended to always set this value to a
// [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
//
// The ID must contain only letters (a-z, A-Z), numbers (0-9),
// underscores (_), and hyphens (-). The maximum length is 40 characters.
string request_id = 7 [(google.api.field_behavior) = OPTIONAL];
}
// A request to stop a cluster.
message StopClusterRequest {
// Required. The ID of the Google Cloud Platform project the
// cluster belongs to.
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
// Required. The Dataproc region in which to handle the request.
string region = 2 [(google.api.field_behavior) = REQUIRED];
// Required. The cluster name.
string cluster_name = 3 [(google.api.field_behavior) = REQUIRED];
// Optional. Specifying the `cluster_uuid` means the RPC will fail
// (with error NOT_FOUND) if a cluster with the specified UUID does not exist.
string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. A unique ID used to identify the request. If the server
// receives two
// [StopClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.StopClusterRequest)s
// with the same id, then the second request will be ignored and the
// first [google.longrunning.Operation][google.longrunning.Operation] created
// and stored in the backend is returned.
//
// Recommendation: Set this value to a
// [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
//
// The ID must contain only letters (a-z, A-Z), numbers (0-9),
// underscores (_), and hyphens (-). The maximum length is 40 characters.
string request_id = 5 [(google.api.field_behavior) = OPTIONAL];
}
// A request to start a cluster.
message StartClusterRequest {
// Required. The ID of the Google Cloud Platform project the
// cluster belongs to.
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
// Required. The Dataproc region in which to handle the request.
string region = 2 [(google.api.field_behavior) = REQUIRED];
// Required. The cluster name.
string cluster_name = 3 [(google.api.field_behavior) = REQUIRED];
// Optional. Specifying the `cluster_uuid` means the RPC will fail
// (with error NOT_FOUND) if a cluster with the specified UUID does not exist.
string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. A unique ID used to identify the request. If the server
// receives two
// [StartClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.StartClusterRequest)s
// with the same id, then the second request will be ignored and the
// first [google.longrunning.Operation][google.longrunning.Operation] created
// and stored in the backend is returned.
//
// Recommendation: Set this value to a
// [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
//
// The ID must contain only letters (a-z, A-Z), numbers (0-9),
// underscores (_), and hyphens (-). The maximum length is 40 characters.
string request_id = 5 [(google.api.field_behavior) = OPTIONAL];
}
// A request to delete a cluster.
message DeleteClusterRequest {
// Required. The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
// Required. The Dataproc region in which to handle the request.
string region = 3 [(google.api.field_behavior) = REQUIRED];
// Required. The cluster name.
string cluster_name = 2 [(google.api.field_behavior) = REQUIRED];
// Optional. Specifying the `cluster_uuid` means the RPC should fail
// (with error NOT_FOUND) if cluster with specified UUID does not exist.
string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. A unique ID used to identify the request. If the server
// receives two
// [DeleteClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.DeleteClusterRequest)s
// with the same id, then the second request will be ignored and the
// first [google.longrunning.Operation][google.longrunning.Operation] created
// and stored in the backend is returned.
//
// It is recommended to always set this value to a
// [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
//
// The ID must contain only letters (a-z, A-Z), numbers (0-9),
// underscores (_), and hyphens (-). The maximum length is 40 characters.
string request_id = 5 [(google.api.field_behavior) = OPTIONAL];
}
// Request to get the resource representation for a cluster in a project.
message GetClusterRequest {
// Required. The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
// Required. The Dataproc region in which to handle the request.
string region = 3 [(google.api.field_behavior) = REQUIRED];
// Required. The cluster name.
string cluster_name = 2 [(google.api.field_behavior) = REQUIRED];
}
// A request to list the clusters in a project.
message ListClustersRequest {
// Required. The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
// Required. The Dataproc region in which to handle the request.
string region = 4 [(google.api.field_behavior) = REQUIRED];
// Optional. A filter constraining the clusters to list. Filters are
// case-sensitive and have the following syntax:
//
// field = value [AND [field = value]] ...
//
// where **field** is one of `status.state`, `clusterName`, or `labels.[KEY]`,
// and `[KEY]` is a label key. **value** can be `*` to match all values.
// `status.state` can be one of the following: `ACTIVE`, `INACTIVE`,
// `CREATING`, `RUNNING`, `ERROR`, `DELETING`, `UPDATING`, `STOPPING`, or
// `STOPPED`. `ACTIVE` contains the `CREATING`, `UPDATING`, and `RUNNING`
// states. `INACTIVE` contains the `DELETING`, `ERROR`, `STOPPING`, and
// `STOPPED` states. `clusterName` is the name of the cluster provided at
// creation time. Only the logical `AND` operator is supported;
// space-separated items are treated as having an implicit `AND` operator.
//
// Example filter:
//
// status.state = ACTIVE AND clusterName = mycluster
// AND labels.env = staging AND labels.starred = *
string filter = 5 [(google.api.field_behavior) = OPTIONAL];
// Optional. The standard List page size.
int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. The standard List page token.
string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
}
// The list of all clusters in a project.
message ListClustersResponse {
// Output only. The clusters in the project.
repeated Cluster clusters = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. This token is included in the response if there are more
// results to fetch. To fetch additional results, provide this value as the
// `page_token` in a subsequent `ListClustersRequest`.
string next_page_token = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
}
// A request to collect cluster diagnostic information.
message DiagnoseClusterRequest {
// Defines who has access to the diagnostic tarball
enum TarballAccess {
// Tarball Access unspecified. Falls back to default access of the bucket
TARBALL_ACCESS_UNSPECIFIED = 0;
// Google Cloud Support group has read access to the
// diagnostic tarball
GOOGLE_CLOUD_SUPPORT = 1;
// Google Cloud Dataproc Diagnose service account has read access to the
// diagnostic tarball
GOOGLE_DATAPROC_DIAGNOSE = 2;
}
// Required. The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
// Required. The Dataproc region in which to handle the request.
string region = 3 [(google.api.field_behavior) = REQUIRED];
// Required. The cluster name.
string cluster_name = 2 [(google.api.field_behavior) = REQUIRED];
// Optional. (Optional) The output Cloud Storage directory for the diagnostic
// tarball. If not specified, a task-specific directory in the cluster's
// staging bucket will be used.
string tarball_gcs_dir = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. (Optional) The access type to the diagnostic tarball. If not
// specified, falls back to default access of the bucket
TarballAccess tarball_access = 5 [(google.api.field_behavior) = OPTIONAL];
// Optional. Time interval in which diagnosis should be carried out on the
// cluster.
google.type.Interval diagnosis_interval = 6
[(google.api.field_behavior) = OPTIONAL];
// Optional. Specifies a list of jobs on which diagnosis is to be performed.
// Format: projects/{project}/regions/{region}/jobs/{job}
repeated string jobs = 10 [(google.api.field_behavior) = OPTIONAL];
// Optional. Specifies a list of yarn applications on which diagnosis is to be
// performed.
repeated string yarn_application_ids = 11
[(google.api.field_behavior) = OPTIONAL];
}
// The location of diagnostic output.
message DiagnoseClusterResults {
// Output only. The Cloud Storage URI of the diagnostic output.
// The output report is a plain text file with a summary of collected
// diagnostics.
string output_uri = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
}
// Reservation Affinity for consuming Zonal reservation.
message ReservationAffinity {
// Indicates whether to consume capacity from an reservation or not.
enum Type {
TYPE_UNSPECIFIED = 0;
// Do not consume from any allocated capacity.
NO_RESERVATION = 1;
// Consume any reservation available.
ANY_RESERVATION = 2;
// Must consume from a specific reservation. Must specify key value fields
// for specifying the reservations.
SPECIFIC_RESERVATION = 3;
}
// Optional. Type of reservation to consume
Type consume_reservation_type = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Corresponds to the label key of reservation resource.
string key = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. Corresponds to the label values of reservation resource.
repeated string values = 3 [(google.api.field_behavior) = OPTIONAL];
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy