All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.beam.runners.dataflow.options.DataflowPipelineWorkerPoolOptions Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.beam.runners.dataflow.options;

import com.fasterxml.jackson.annotation.JsonIgnore;
import java.util.List;
import javax.annotation.Nullable;
import org.apache.beam.runners.dataflow.DataflowRunnerInfo;
import org.apache.beam.sdk.annotations.Experimental;
import org.apache.beam.sdk.options.Default;
import org.apache.beam.sdk.options.DefaultValueFactory;
import org.apache.beam.sdk.options.Description;
import org.apache.beam.sdk.options.Hidden;
import org.apache.beam.sdk.options.PipelineOptions;

/**
 * Options that are used to configure the Dataflow pipeline worker pool.
 */
@Description("Options that are used to configure the Dataflow pipeline worker pool.")
public interface DataflowPipelineWorkerPoolOptions extends PipelineOptions {
  /**
   * Number of workers to use when executing the Dataflow job. Note that selection of an autoscaling
   * algorithm other then {@code NONE} will affect the size of the worker pool. If left unspecified,
   * the Dataflow service will determine the number of workers.
   */
  @Description("Number of workers to use when executing the Dataflow job. Note that "
      + "selection of an autoscaling algorithm other then \"NONE\" will affect the "
      + "size of the worker pool. If left unspecified, the Dataflow service will "
      + "determine the number of workers.")
  int getNumWorkers();
  void setNumWorkers(int value);

  /**
   * Type of autoscaling algorithm to use.
   */
  @Experimental(Experimental.Kind.AUTOSCALING)
  enum AutoscalingAlgorithmType {
    /** Use numWorkers machines. Do not autoscale the worker pool. */
    NONE("AUTOSCALING_ALGORITHM_NONE"),

    @Deprecated
    BASIC("AUTOSCALING_ALGORITHM_BASIC"),

    /** Autoscale the workerpool based on throughput (up to maxNumWorkers). */
    THROUGHPUT_BASED("AUTOSCALING_ALGORITHM_BASIC");

    private final String algorithm;

    private AutoscalingAlgorithmType(String algorithm) {
      this.algorithm = algorithm;
    }

    /** Returns the string representation of this type. */
    public String getAlgorithm() {
      return this.algorithm;
    }
  }

  /**
   * [Experimental] The autoscaling algorithm to use for the workerpool.
   *
   * 
    *
  • NONE: does not change the size of the worker pool.
  • *
  • BASIC: autoscale the worker pool size up to maxNumWorkers until the job completes.
  • *
  • THROUGHPUT_BASED: autoscale the workerpool based on throughput (up to maxNumWorkers). *
  • *
*/ @Description("[Experimental] The autoscaling algorithm to use for the workerpool. " + "NONE: does not change the size of the worker pool. " + "BASIC (deprecated): autoscale the worker pool size up to maxNumWorkers until the job " + "completes. " + "THROUGHPUT_BASED: autoscale the workerpool based on throughput (up to maxNumWorkers).") @Experimental(Experimental.Kind.AUTOSCALING) AutoscalingAlgorithmType getAutoscalingAlgorithm(); void setAutoscalingAlgorithm(AutoscalingAlgorithmType value); /** * The maximum number of workers to use for the workerpool. This options limits the size of the * workerpool for the lifetime of the job, including * pipeline updates. * If left unspecified, the Dataflow service will compute a ceiling. */ @Description("The maximum number of workers to use for the workerpool. This options limits the " + "size of the workerpool for the lifetime of the job, including pipeline updates. " + "If left unspecified, the Dataflow service will compute a ceiling.") int getMaxNumWorkers(); void setMaxNumWorkers(int value); /** * Remote worker disk size, in gigabytes, or 0 to use the default size. */ @Description("Remote worker disk size, in gigabytes, or 0 to use the default size.") int getDiskSizeGb(); void setDiskSizeGb(int value); /** * Docker container image that executes Dataflow worker harness, residing in Google Container * Registry. */ @Default.InstanceFactory(WorkerHarnessContainerImageFactory.class) @Description("Docker container image that executes Dataflow worker harness, residing in Google " + " Container Registry.") @Hidden String getWorkerHarnessContainerImage(); void setWorkerHarnessContainerImage(String value); /** * Returns the default Docker container image that executes Dataflow worker harness, residing in * Google Container Registry. */ class WorkerHarnessContainerImageFactory implements DefaultValueFactory { @Override public String create(PipelineOptions options) { String containerVersion = DataflowRunnerInfo.getDataflowRunnerInfo().getContainerVersion(); return String.format("dataflow.gcr.io/v1beta3/IMAGE:%s", containerVersion); } } /** * GCE network for launching * workers. * *

Default is up to the Dataflow service. */ @Description("GCE network for launching workers. For more information, see the reference " + "documentation https://cloud.google.com/compute/docs/networking. " + "Default is up to the Dataflow service.") String getNetwork(); void setNetwork(String value); /** * GCE subnetwork for launching * workers. * *

Default is up to the Dataflow service. Expected format is * regions/REGION/subnetworks/SUBNETWORK or the fully qualified subnetwork name, beginning with * https://..., e.g. https://www.googleapis.com/compute/alpha/projects/PROJECT/ * regions/REGION/subnetworks/SUBNETWORK */ @Description("GCE subnetwork for launching workers. For more information, see the reference " + "documentation https://cloud.google.com/compute/docs/networking. " + "Default is up to the Dataflow service.") String getSubnetwork(); void setSubnetwork(String value); /** * GCE availability zone for launching workers. * *

Default is up to the Dataflow service. */ @Description("GCE availability zone for launching workers. See " + "https://developers.google.com/compute/docs/zones for a list of valid options. " + "Default is up to the Dataflow service.") String getZone(); void setZone(String value); /** * Machine type to create Dataflow worker VMs as. * *

See GCE machine types * for a list of valid options. * *

If unset, the Dataflow service will choose a reasonable default. */ @Description("Machine type to create Dataflow worker VMs as. See " + "https://cloud.google.com/compute/docs/machine-types for a list of valid options. " + "If unset, the Dataflow service will choose a reasonable default.") String getWorkerMachineType(); void setWorkerMachineType(String value); /** * List of local files to make available to workers. * *

Files are placed on the worker's classpath. * *

The default value is the list of jars from the main program's classpath. */ @Description("Files to stage on GCS and make available to workers. " + "Files are placed on the worker's classpath. " + "The default value is all files from the classpath.") @JsonIgnore List getFilesToStage(); void setFilesToStage(List value); /** * Specifies what type of persistent disk is used. The value is a full disk type resource, * e.g., compute.googleapis.com/projects//zones//diskTypes/pd-ssd. For more information, * see the API * reference documentation for DiskTypes. */ @Description("Specifies what type of persistent disk is used. The " + "value is a full URL of a disk type resource, e.g., " + "compute.googleapis.com/projects//zones//diskTypes/pd-ssd. For more " + "information, see the API reference documentation for DiskTypes: " + "https://cloud.google.com/compute/docs/reference/latest/diskTypes") String getWorkerDiskType(); void setWorkerDiskType(String value); /** * Specifies whether worker pools should be started with public IP addresses. * *

WARNING: This feature is experimental. You must be whitelisted to use it. */ @Description("Specifies whether worker pools should be started with public IP addresses. WARNING:" + "This feature is experimental. You must be whitelisted to use it.") @Experimental @JsonIgnore @Nullable Boolean getUsePublicIps(); void setUsePublicIps(@Nullable Boolean value); }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy