com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of google-cloud-dataflow-java-sdk-all Show documentation
Google Cloud Dataflow Java SDK provides a simple, Java-based interface for processing virtually any size data using Google cloud resources. This artifact includes entire Dataflow Java SDK.
There is a newer version: 2.5.0
Show newest version
/*
 * Copyright (C) 2015 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.google.cloud.dataflow.sdk.options;

import com.google.cloud.dataflow.sdk.annotations.Experimental;
import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;

import com.fasterxml.jackson.annotation.JsonIgnore;

import java.util.List;

import javax.annotation.Nullable;

/**
 * Options that are used to configure the Dataflow pipeline worker pool.
 */
@Description("Options that are used to configure the Dataflow pipeline worker pool.")
public interface DataflowPipelineWorkerPoolOptions extends PipelineOptions {
  /**
   * Number of workers to use when executing the Dataflow job. Note that selection of an autoscaling
   * algorithm other then {@code NONE} will affect the size of the worker pool. If left unspecified,
   * the Dataflow service will determine the number of workers.
   */
  @Description("Number of workers to use when executing the Dataflow job. Note that "
      + "selection of an autoscaling algorithm other then \"NONE\" will affect the "
      + "size of the worker pool. If left unspecified, the Dataflow service will "
      + "determine the number of workers.")
  int getNumWorkers();
  void setNumWorkers(int value);

  /**
   * Type of autoscaling algorithm to use.
   */
  enum AutoscalingAlgorithmType {
    /** Use numWorkers machines. Do not autoscale the worker pool. */
    NONE("AUTOSCALING_ALGORITHM_NONE"),

    @Deprecated
    BASIC("AUTOSCALING_ALGORITHM_BASIC"),

    /** Autoscale the workerpool based on throughput (up to maxNumWorkers). */
    THROUGHPUT_BASED("AUTOSCALING_ALGORITHM_BASIC");

    private final String algorithm;

    private AutoscalingAlgorithmType(String algorithm) {
      this.algorithm = algorithm;
    }

    /** Returns the string representation of this type. */
    public String getAlgorithm() {
      return this.algorithm;
    }
  }

  /**
   * The autoscaling algorithm to use for the workerpool.
   *
   * 
   *   NONE: does not change the size of the worker pool.
   *   BASIC: autoscale the worker pool size up to maxNumWorkers until the job completes.
   *   THROUGHPUT_BASED: autoscale the workerpool based on throughput (up to maxNumWorkers).
   *   
   * 
   */
  @Description("The autoscaling algorithm to use for the workerpool. "
      + "NONE: does not change the size of the worker pool. "
      + "BASIC (deprecated): autoscale the worker pool size up to maxNumWorkers until the job "
      + "completes. "
      + "THROUGHPUT_BASED: autoscale the workerpool based on throughput (up to maxNumWorkers).")
  AutoscalingAlgorithmType getAutoscalingAlgorithm();
  void setAutoscalingAlgorithm(AutoscalingAlgorithmType value);

  /**
   * The maximum number of workers to use for the workerpool. This options limits the size of the
   * workerpool for the lifetime of the job, including
   * pipeline updates.
   * If left unspecified, the Dataflow service will compute a ceiling.
   */
  @Description("The maximum number of workers to use for the workerpool. This options limits the "
      + "size of the workerpool for the lifetime of the job, including pipeline updates. "
      + "If left unspecified, the Dataflow service will compute a ceiling.")
  int getMaxNumWorkers();
  void setMaxNumWorkers(int value);

  /**
   * Remote worker disk size, in gigabytes, or 0 to use the default size.
   */
  @Description("Remote worker disk size, in gigabytes, or 0 to use the default size.")
  int getDiskSizeGb();
  void setDiskSizeGb(int value);

  /**
   * Docker container image that executes Dataflow worker harness, residing in Google Container
   * Registry.
   */
  @Default.InstanceFactory(WorkerHarnessContainerImageFactory.class)
  @Description("Docker container image that executes Dataflow worker harness, residing in Google "
      + " Container Registry.")
  @Hidden
  String getWorkerHarnessContainerImage();
  void setWorkerHarnessContainerImage(String value);

  /**
   * Returns the default Docker container image that executes Dataflow worker harness, residing in
   * Google Container Registry.
   */
  class WorkerHarnessContainerImageFactory
      implements DefaultValueFactory {
    @Override
    public String create(PipelineOptions options) {
      DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
      if (dataflowOptions.isStreaming()) {
        return DataflowPipelineRunner.STREAMING_WORKER_HARNESS_CONTAINER_IMAGE;
      } else {
        return DataflowPipelineRunner.BATCH_WORKER_HARNESS_CONTAINER_IMAGE;
      }
    }
  }

  /**
   * GCE network for launching
   * workers.
   *
   * Default is up to the Dataflow service.
   */
  @Description("GCE network for launching workers. For more information, see the reference "
      + "documentation https://cloud.google.com/compute/docs/networking. "
      + "Default is up to the Dataflow service.")
  String getNetwork();
  void setNetwork(String value);

  /**
   * GCE subnetwork for launching
   * workers.
   *
   * 
Default is up to the Dataflow service. Expected format is
   * regions/REGION/subnetworks/SUBNETWORK.
   *
   * 
You may also need to specify network option.
   */
  @Description("GCE subnetwork for launching workers. For more information, see the reference "
      + "documentation https://cloud.google.com/compute/docs/networking. "
      + "Default is up to the Dataflow service.")
  String getSubnetwork();
  void setSubnetwork(String value);

  /**
   * GCE availability zone for launching workers.
   *
   * 
Default is up to the Dataflow service.
   */
  @Description("GCE availability zone for launching workers. See "
      + "https://developers.google.com/compute/docs/zones for a list of valid options. "
      + "Default is up to the Dataflow service.")
  String getZone();
  void setZone(String value);

  /**
   * Machine type to create Dataflow worker VMs as.
   *
   * 
See GCE machine types
   * for a list of valid options.
   *
   * 
If unset, the Dataflow service will choose a reasonable default.
   */
  @Description("Machine type to create Dataflow worker VMs as. See "
      + "https://cloud.google.com/compute/docs/machine-types for a list of valid options. "
      + "If unset, the Dataflow service will choose a reasonable default.")
  String getWorkerMachineType();
  void setWorkerMachineType(String value);

  /**
   * The policy for tearing down the workers spun up by the service.
   * @deprecated
   * Dataflow Service will only support TEARDOWN_ALWAYS policy in the future.
   */
  @Deprecated
  enum TeardownPolicy {
    /**
     * All VMs created for a Dataflow job are deleted when the job finishes, regardless of whether
     * it fails or succeeds.
     */
    TEARDOWN_ALWAYS("TEARDOWN_ALWAYS"),
    /**
     * All VMs created for a Dataflow job are left running when the job finishes, regardless of
     * whether it fails or succeeds.
     */
    TEARDOWN_NEVER("TEARDOWN_NEVER"),
    /**
     * All VMs created for a Dataflow job are deleted when the job succeeds, but are left running
     * when it fails. (This is typically used for debugging failing jobs by SSHing into the
     * workers.)
     */
    TEARDOWN_ON_SUCCESS("TEARDOWN_ON_SUCCESS");

    private final String teardownPolicy;

    TeardownPolicy(String teardownPolicy) {
      this.teardownPolicy = teardownPolicy;
    }

    public String getTeardownPolicyName() {
      return this.teardownPolicy;
    }
  }

  /**
   * The teardown policy for the VMs.
   *
   * 
If unset, the Dataflow service will choose a reasonable default.
   */
  @Description("The teardown policy for the VMs. If unset, the Dataflow service will "
      + "choose a reasonable default.")
  TeardownPolicy getTeardownPolicy();
  void setTeardownPolicy(TeardownPolicy value);

  /**
   * List of local files to make available to workers.
   *
   * 
Files are placed on the worker's classpath.
   *
   * 
The default value is the list of jars from the main program's classpath.
   */
  @Description("Files to stage on GCS and make available to workers. "
      + "Files are placed on the worker's classpath. "
      + "The default value is all files from the classpath.")
  @JsonIgnore
  List getFilesToStage();
  void setFilesToStage(List value);

  /**
   * Specifies what type of persistent disk should be used. The value should be a full or partial
   * URL of a disk type resource, e.g., zones/us-central1-f/disks/pd-standard. For
   * more information, see the
   * API reference
   * documentation for DiskTypes.
   */
  @Description("Specifies what type of persistent disk should be used. The value should be a full "
      + "or partial URL of a disk type resource, e.g., zones/us-central1-f/disks/pd-standard. For "
      + "more information, see the API reference documentation for DiskTypes: "
      + "https://cloud.google.com/compute/docs/reference/latest/diskTypes")
  String getWorkerDiskType();
  void setWorkerDiskType(String value);

  /**
   * Specifies whether worker pools should be started with public IP addresses.
   *
   * WARNING: This feature is experimental.  You must be whitelisted to use it.
   */
  @Description("Specifies whether worker pools should be started with public IP addresses. WARNING:"
    + "This feature is experimental. You must be whitelisted to use it.")
  @Experimental
  @JsonIgnore
  @Nullable Boolean getUsePublicIps();
  void setUsePublicIps(@Nullable Boolean value);
}