All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions Maven / Gradle / Ivy

Go to download

Google Cloud Dataflow Java SDK provides a simple, Java-based interface for processing virtually any size data using Google cloud resources. This artifact includes entire Dataflow Java SDK.

There is a newer version: 2.5.0
Show newest version
/*
 * Copyright (C) 2015 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.google.cloud.dataflow.sdk.options;

import com.google.cloud.dataflow.sdk.annotations.Experimental;
import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;

import com.fasterxml.jackson.annotation.JsonIgnore;

import java.util.List;

import javax.annotation.Nullable;

/**
 * Options that are used to configure the Dataflow pipeline worker pool.
 */
@Description("Options that are used to configure the Dataflow pipeline worker pool.")
public interface DataflowPipelineWorkerPoolOptions extends PipelineOptions {
  /**
   * Number of workers to use when executing the Dataflow job. Note that selection of an autoscaling
   * algorithm other then {@code NONE} will affect the size of the worker pool. If left unspecified,
   * the Dataflow service will determine the number of workers.
   */
  @Description("Number of workers to use when executing the Dataflow job. Note that "
      + "selection of an autoscaling algorithm other then \"NONE\" will affect the "
      + "size of the worker pool. If left unspecified, the Dataflow service will "
      + "determine the number of workers.")
  int getNumWorkers();
  void setNumWorkers(int value);

  /**
   * Type of autoscaling algorithm to use.
   */
  enum AutoscalingAlgorithmType {
    /** Use numWorkers machines. Do not autoscale the worker pool. */
    NONE("AUTOSCALING_ALGORITHM_NONE"),

    @Deprecated
    BASIC("AUTOSCALING_ALGORITHM_BASIC"),

    /** Autoscale the workerpool based on throughput (up to maxNumWorkers). */
    THROUGHPUT_BASED("AUTOSCALING_ALGORITHM_BASIC");

    private final String algorithm;

    private AutoscalingAlgorithmType(String algorithm) {
      this.algorithm = algorithm;
    }

    /** Returns the string representation of this type. */
    public String getAlgorithm() {
      return this.algorithm;
    }
  }

  /**
   * The autoscaling algorithm to use for the workerpool.
   *
   * 
    *
  • NONE: does not change the size of the worker pool.
  • *
  • BASIC: autoscale the worker pool size up to maxNumWorkers until the job completes.
  • *
  • THROUGHPUT_BASED: autoscale the workerpool based on throughput (up to maxNumWorkers). *
  • *
*/ @Description("The autoscaling algorithm to use for the workerpool. " + "NONE: does not change the size of the worker pool. " + "BASIC (deprecated): autoscale the worker pool size up to maxNumWorkers until the job " + "completes. " + "THROUGHPUT_BASED: autoscale the workerpool based on throughput (up to maxNumWorkers).") AutoscalingAlgorithmType getAutoscalingAlgorithm(); void setAutoscalingAlgorithm(AutoscalingAlgorithmType value); /** * The maximum number of workers to use for the workerpool. This options limits the size of the * workerpool for the lifetime of the job, including * pipeline updates. * If left unspecified, the Dataflow service will compute a ceiling. */ @Description("The maximum number of workers to use for the workerpool. This options limits the " + "size of the workerpool for the lifetime of the job, including pipeline updates. " + "If left unspecified, the Dataflow service will compute a ceiling.") int getMaxNumWorkers(); void setMaxNumWorkers(int value); /** * Remote worker disk size, in gigabytes, or 0 to use the default size. */ @Description("Remote worker disk size, in gigabytes, or 0 to use the default size.") int getDiskSizeGb(); void setDiskSizeGb(int value); /** * Docker container image that executes Dataflow worker harness, residing in Google Container * Registry. */ @Default.InstanceFactory(WorkerHarnessContainerImageFactory.class) @Description("Docker container image that executes Dataflow worker harness, residing in Google " + " Container Registry.") @Hidden String getWorkerHarnessContainerImage(); void setWorkerHarnessContainerImage(String value); /** * Returns the default Docker container image that executes Dataflow worker harness, residing in * Google Container Registry. */ class WorkerHarnessContainerImageFactory implements DefaultValueFactory { @Override public String create(PipelineOptions options) { DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class); if (dataflowOptions.isStreaming()) { return DataflowPipelineRunner.STREAMING_WORKER_HARNESS_CONTAINER_IMAGE; } else { return DataflowPipelineRunner.BATCH_WORKER_HARNESS_CONTAINER_IMAGE; } } } /** * GCE network for launching * workers. * *

Default is up to the Dataflow service. */ @Description("GCE network for launching workers. For more information, see the reference " + "documentation https://cloud.google.com/compute/docs/networking. " + "Default is up to the Dataflow service.") String getNetwork(); void setNetwork(String value); /** * GCE subnetwork for launching * workers. * *

Default is up to the Dataflow service. Expected format is * regions/REGION/subnetworks/SUBNETWORK. * *

You may also need to specify network option. */ @Description("GCE subnetwork for launching workers. For more information, see the reference " + "documentation https://cloud.google.com/compute/docs/networking. " + "Default is up to the Dataflow service.") String getSubnetwork(); void setSubnetwork(String value); /** * GCE availability zone for launching workers. * *

Default is up to the Dataflow service. */ @Description("GCE availability zone for launching workers. See " + "https://developers.google.com/compute/docs/zones for a list of valid options. " + "Default is up to the Dataflow service.") String getZone(); void setZone(String value); /** * Machine type to create Dataflow worker VMs as. * *

See GCE machine types * for a list of valid options. * *

If unset, the Dataflow service will choose a reasonable default. */ @Description("Machine type to create Dataflow worker VMs as. See " + "https://cloud.google.com/compute/docs/machine-types for a list of valid options. " + "If unset, the Dataflow service will choose a reasonable default.") String getWorkerMachineType(); void setWorkerMachineType(String value); /** * The policy for tearing down the workers spun up by the service. * @deprecated * Dataflow Service will only support TEARDOWN_ALWAYS policy in the future. */ @Deprecated enum TeardownPolicy { /** * All VMs created for a Dataflow job are deleted when the job finishes, regardless of whether * it fails or succeeds. */ TEARDOWN_ALWAYS("TEARDOWN_ALWAYS"), /** * All VMs created for a Dataflow job are left running when the job finishes, regardless of * whether it fails or succeeds. */ TEARDOWN_NEVER("TEARDOWN_NEVER"), /** * All VMs created for a Dataflow job are deleted when the job succeeds, but are left running * when it fails. (This is typically used for debugging failing jobs by SSHing into the * workers.) */ TEARDOWN_ON_SUCCESS("TEARDOWN_ON_SUCCESS"); private final String teardownPolicy; TeardownPolicy(String teardownPolicy) { this.teardownPolicy = teardownPolicy; } public String getTeardownPolicyName() { return this.teardownPolicy; } } /** * The teardown policy for the VMs. * *

If unset, the Dataflow service will choose a reasonable default. */ @Description("The teardown policy for the VMs. If unset, the Dataflow service will " + "choose a reasonable default.") TeardownPolicy getTeardownPolicy(); void setTeardownPolicy(TeardownPolicy value); /** * List of local files to make available to workers. * *

Files are placed on the worker's classpath. * *

The default value is the list of jars from the main program's classpath. */ @Description("Files to stage on GCS and make available to workers. " + "Files are placed on the worker's classpath. " + "The default value is all files from the classpath.") @JsonIgnore List getFilesToStage(); void setFilesToStage(List value); /** * Specifies what type of persistent disk should be used. The value should be a full or partial * URL of a disk type resource, e.g., zones/us-central1-f/disks/pd-standard. For * more information, see the * API reference * documentation for DiskTypes. */ @Description("Specifies what type of persistent disk should be used. The value should be a full " + "or partial URL of a disk type resource, e.g., zones/us-central1-f/disks/pd-standard. For " + "more information, see the API reference documentation for DiskTypes: " + "https://cloud.google.com/compute/docs/reference/latest/diskTypes") String getWorkerDiskType(); void setWorkerDiskType(String value); /** * Specifies whether worker pools should be started with public IP addresses. * *

WARNING: This feature is experimental. You must be whitelisted to use it. */ @Description("Specifies whether worker pools should be started with public IP addresses. WARNING:" + "This feature is experimental. You must be whitelisted to use it.") @Experimental @JsonIgnore @Nullable Boolean getUsePublicIps(); void setUsePublicIps(@Nullable Boolean value); }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy