All Downloads are FREE. Search and download functionalities are using the official Maven repository.

google.cloud.dataproc.v1.batches.proto Maven / Gradle / Ivy

There is a newer version: 4.53.0
Show newest version
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.dataproc.v1;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/dataproc/v1/shared.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/timestamp.proto";

option go_package = "cloud.google.com/go/dataproc/v2/apiv1/dataprocpb;dataprocpb";
option java_multiple_files = true;
option java_outer_classname = "BatchesProto";
option java_package = "com.google.cloud.dataproc.v1";

// The BatchController provides methods to manage batch workloads.
service BatchController {
  option (google.api.default_host) = "dataproc.googleapis.com";
  option (google.api.oauth_scopes) =
      "https://www.googleapis.com/auth/cloud-platform";

  // Creates a batch workload that executes asynchronously.
  rpc CreateBatch(CreateBatchRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1/{parent=projects/*/locations/*}/batches"
      body: "batch"
    };
    option (google.api.method_signature) = "parent,batch,batch_id";
    option (google.longrunning.operation_info) = {
      response_type: "Batch"
      metadata_type: "google.cloud.dataproc.v1.BatchOperationMetadata"
    };
  }

  // Gets the batch workload resource representation.
  rpc GetBatch(GetBatchRequest) returns (Batch) {
    option (google.api.http) = {
      get: "/v1/{name=projects/*/locations/*/batches/*}"
    };
    option (google.api.method_signature) = "name";
  }

  // Lists batch workloads.
  rpc ListBatches(ListBatchesRequest) returns (ListBatchesResponse) {
    option (google.api.http) = {
      get: "/v1/{parent=projects/*/locations/*}/batches"
    };
    option (google.api.method_signature) = "parent";
  }

  // Deletes the batch workload resource. If the batch is not in terminal state,
  // the delete fails and the response returns `FAILED_PRECONDITION`.
  rpc DeleteBatch(DeleteBatchRequest) returns (google.protobuf.Empty) {
    option (google.api.http) = {
      delete: "/v1/{name=projects/*/locations/*/batches/*}"
    };
    option (google.api.method_signature) = "name";
  }
}

// A request to create a batch workload.
message CreateBatchRequest {
  // Required. The parent resource where this batch will be created.
  string parent = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      child_type: "dataproc.googleapis.com/Batch"
    }
  ];

  // Required. The batch to create.
  Batch batch = 2 [(google.api.field_behavior) = REQUIRED];

  // Optional. The ID to use for the batch, which will become the final
  // component of the batch's resource name.
  //
  // This value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`.
  string batch_id = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. A unique ID used to identify the request. If the service
  // receives two
  // [CreateBatchRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.CreateBatchRequest)s
  // with the same request_id, the second request is ignored and the
  // Operation that corresponds to the first Batch created and stored
  // in the backend is returned.
  //
  // Recommendation: Set this value to a
  // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
  //
  // The value must contain only letters (a-z, A-Z), numbers (0-9),
  // underscores (_), and hyphens (-). The maximum length is 40 characters.
  string request_id = 4 [(google.api.field_behavior) = OPTIONAL];
}

// A request to get the resource representation for a batch workload.
message GetBatchRequest {
  // Required. The fully qualified name of the batch to retrieve
  // in the format
  // "projects/PROJECT_ID/locations/DATAPROC_REGION/batches/BATCH_ID"
  string name = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = { type: "dataproc.googleapis.com/Batch" }
  ];
}

// A request to list batch workloads in a project.
message ListBatchesRequest {
  // Required. The parent, which owns this collection of batches.
  string parent = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      child_type: "dataproc.googleapis.com/Batch"
    }
  ];

  // Optional. The maximum number of batches to return in each response.
  // The service may return fewer than this value.
  // The default page size is 20; the maximum page size is 1000.
  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. A page token received from a previous `ListBatches` call.
  // Provide this token to retrieve the subsequent page.
  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. A filter for the batches to return in the response.
  //
  // A filter is a logical expression constraining the values of various fields
  // in each batch resource. Filters are case sensitive, and may contain
  // multiple clauses combined with logical operators (AND/OR).
  // Supported fields are `batch_id`, `batch_uuid`, `state`, and `create_time`.
  //
  // e.g. `state = RUNNING and create_time < "2023-01-01T00:00:00Z"`
  // filters for batches in state RUNNING that were created before 2023-01-01
  //
  // See https://google.aip.dev/assets/misc/ebnf-filtering.txt for a detailed
  // description of the filter syntax and a list of supported comparisons.
  string filter = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Field(s) on which to sort the list of batches.
  //
  // Currently the only supported sort orders are unspecified (empty) and
  // `create_time desc` to sort by most recently created batches first.
  //
  // See https://google.aip.dev/132#ordering for more details.
  string order_by = 5 [(google.api.field_behavior) = OPTIONAL];
}

// A list of batch workloads.
message ListBatchesResponse {
  // The batches from the specified collection.
  repeated Batch batches = 1;

  // A token, which can be sent as `page_token` to retrieve the next page.
  // If this field is omitted, there are no subsequent pages.
  string next_page_token = 2;

  // Output only. List of Batches that could not be included in the response.
  // Attempting to get one of these resources may indicate why it was not
  // included in the list response.
  repeated string unreachable = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// A request to delete a batch workload.
message DeleteBatchRequest {
  // Required. The fully qualified name of the batch to retrieve
  // in the format
  // "projects/PROJECT_ID/locations/DATAPROC_REGION/batches/BATCH_ID"
  string name = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = { type: "dataproc.googleapis.com/Batch" }
  ];
}

// A representation of a batch workload in the service.
message Batch {
  option (google.api.resource) = {
    type: "dataproc.googleapis.com/Batch"
    pattern: "projects/{project}/locations/{location}/batches/{batch}"
  };

  // The batch state.
  enum State {
    // The batch state is unknown.
    STATE_UNSPECIFIED = 0;

    // The batch is created before running.
    PENDING = 1;

    // The batch is running.
    RUNNING = 2;

    // The batch is cancelling.
    CANCELLING = 3;

    // The batch cancellation was successful.
    CANCELLED = 4;

    // The batch completed successfully.
    SUCCEEDED = 5;

    // The batch is no longer running due to an error.
    FAILED = 6;
  }

  // Historical state information.
  message StateHistory {
    // Output only. The state of the batch at this point in history.
    State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Details about the state at this point in history.
    string state_message = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. The time when the batch entered the historical state.
    google.protobuf.Timestamp state_start_time = 3
        [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  // Output only. The resource name of the batch.
  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. A batch UUID (Unique Universal Identifier). The service
  // generates this value when it creates the batch.
  string uuid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The time when the batch was created.
  google.protobuf.Timestamp create_time = 3
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // The application/framework-specific portion of the batch configuration.
  oneof batch_config {
    // Optional. PySpark batch config.
    PySparkBatch pyspark_batch = 4 [(google.api.field_behavior) = OPTIONAL];

    // Optional. Spark batch config.
    SparkBatch spark_batch = 5 [(google.api.field_behavior) = OPTIONAL];

    // Optional. SparkR batch config.
    SparkRBatch spark_r_batch = 6 [(google.api.field_behavior) = OPTIONAL];

    // Optional. SparkSql batch config.
    SparkSqlBatch spark_sql_batch = 7 [(google.api.field_behavior) = OPTIONAL];
  }

  // Output only. Runtime information about batch execution.
  RuntimeInfo runtime_info = 8 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The state of the batch.
  State state = 9 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Batch state details, such as a failure
  // description if the state is `FAILED`.
  string state_message = 10 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The time when the batch entered a current state.
  google.protobuf.Timestamp state_time = 11
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The email address of the user who created the batch.
  string creator = 12 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Optional. The labels to associate with this batch.
  // Label **keys** must contain 1 to 63 characters, and must conform to
  // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
  // Label **values** may be empty, but, if present, must contain 1 to 63
  // characters, and must conform to [RFC
  // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
  // associated with a batch.
  map labels = 13 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Runtime configuration for the batch execution.
  RuntimeConfig runtime_config = 14 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Environment configuration for the batch execution.
  EnvironmentConfig environment_config = 15
      [(google.api.field_behavior) = OPTIONAL];

  // Output only. The resource name of the operation associated with this batch.
  string operation = 16 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Historical state information for the batch.
  repeated StateHistory state_history = 17
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// A configuration for running an
// [Apache
// PySpark](https://spark.apache.org/docs/latest/api/python/getting_started/quickstart.html)
// batch workload.
message PySparkBatch {
  // Required. The HCFS URI of the main Python file to use as the Spark driver.
  // Must be a .py file.
  string main_python_file_uri = 1 [(google.api.field_behavior) = REQUIRED];

  // Optional. The arguments to pass to the driver. Do not include arguments
  // that can be set as batch properties, such as `--conf`, since a collision
  // can occur that causes an incorrect batch submission.
  repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. HCFS file URIs of Python files to pass to the PySpark
  // framework. Supported file types: `.py`, `.egg`, and `.zip`.
  repeated string python_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. HCFS URIs of jar files to add to the classpath of the
  // Spark driver and tasks.
  repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. HCFS URIs of files to be placed in the working directory of
  // each executor.
  repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];

  // Optional. HCFS URIs of archives to be extracted into the working directory
  // of each executor. Supported file types:
  // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
  repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
}

// A configuration for running an [Apache Spark](https://spark.apache.org/)
// batch workload.
message SparkBatch {
  // The specification of the main method to call to drive the Spark
  // workload. Specify either the jar file that contains the main class or the
  // main class name. To pass both a main jar and a main class in that jar, add
  // the jar to `jar_file_uris`, and then specify the main class
  // name in `main_class`.
  oneof driver {
    // Optional. The HCFS URI of the jar file that contains the main class.
    string main_jar_file_uri = 1 [(google.api.field_behavior) = OPTIONAL];

    // Optional. The name of the driver main class. The jar file that contains
    // the class must be in the classpath or specified in `jar_file_uris`.
    string main_class = 2 [(google.api.field_behavior) = OPTIONAL];
  }

  // Optional. The arguments to pass to the driver. Do not include arguments
  // that can be set as batch properties, such as `--conf`, since a collision
  // can occur that causes an incorrect batch submission.
  repeated string args = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. HCFS URIs of jar files to add to the classpath of the
  // Spark driver and tasks.
  repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. HCFS URIs of files to be placed in the working directory of
  // each executor.
  repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];

  // Optional. HCFS URIs of archives to be extracted into the working directory
  // of each executor. Supported file types:
  // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
  repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
}

// A configuration for running an
// [Apache SparkR](https://spark.apache.org/docs/latest/sparkr.html)
// batch workload.
message SparkRBatch {
  // Required. The HCFS URI of the main R file to use as the driver.
  // Must be a `.R` or `.r` file.
  string main_r_file_uri = 1 [(google.api.field_behavior) = REQUIRED];

  // Optional. The arguments to pass to the Spark driver. Do not include
  // arguments that can be set as batch properties, such as `--conf`, since a
  // collision can occur that causes an incorrect batch submission.
  repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. HCFS URIs of files to be placed in the working directory of
  // each executor.
  repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. HCFS URIs of archives to be extracted into the working directory
  // of each executor. Supported file types:
  // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
  repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
}

// A configuration for running
// [Apache Spark SQL](https://spark.apache.org/sql/) queries as a batch
// workload.
message SparkSqlBatch {
  // Required. The HCFS URI of the script that contains Spark SQL queries to
  // execute.
  string query_file_uri = 1 [(google.api.field_behavior) = REQUIRED];

  // Optional. Mapping of query variable names to values (equivalent to the
  // Spark SQL command: `SET name="value";`).
  map query_variables = 2
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH.
  repeated string jar_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy