All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.dataflow.sdk.options.GcsOptions Maven / Gradle / Ivy

/*
 * Copyright (C) 2015 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.google.cloud.dataflow.sdk.options;

import com.google.cloud.dataflow.sdk.util.AppEngineEnvironment;
import com.google.cloud.dataflow.sdk.util.GcsUtil;
import com.google.common.util.concurrent.MoreExecutors;
import com.google.common.util.concurrent.ThreadFactoryBuilder;

import com.fasterxml.jackson.annotation.JsonIgnore;

import java.util.concurrent.ExecutorService;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

/**
 * Options used to configure Google Cloud Storage.
 */
public interface GcsOptions extends
    ApplicationNameOptions, GcpOptions, PipelineOptions {
  /**
   * The GcsUtil instance that should be used to communicate with Google Cloud Storage.
   */
  @JsonIgnore
  @Description("The GcsUtil instance that should be used to communicate with Google Cloud Storage.")
  @Default.InstanceFactory(GcsUtil.GcsUtilFactory.class)
  @Hidden
  GcsUtil getGcsUtil();
  void setGcsUtil(GcsUtil value);

  /**
   * The ExecutorService instance to use to create threads, can be overridden to specify an
   * ExecutorService that is compatible with the users environment. If unset, the
   * default is to create an ExecutorService with an unbounded number of threads; this
   * is compatible with Google AppEngine.
   */
  @JsonIgnore
  @Description("The ExecutorService instance to use to create multiple threads. Can be overridden "
      + "to specify an ExecutorService that is compatible with the users environment. If unset, "
      + "the default is to create an ExecutorService with an unbounded number of threads; this "
      + "is compatible with Google AppEngine.")
  @Default.InstanceFactory(ExecutorServiceFactory.class)
  @Hidden
  ExecutorService getExecutorService();
  void setExecutorService(ExecutorService value);

  /**
   * GCS endpoint to use. If unspecified, uses the default endpoint.
   */
  @JsonIgnore
  @Hidden
  @Description("The URL for the GCS API.")
  String getGcsEndpoint();
  void setGcsEndpoint(String value);

  /**
   * Returns the default {@link ExecutorService} to use within the Dataflow SDK. The
   * {@link ExecutorService} is compatible with AppEngine.
   */
  public static class ExecutorServiceFactory implements DefaultValueFactory {
    @Override
    public ExecutorService create(PipelineOptions options) {
      ThreadFactoryBuilder threadFactoryBuilder = new ThreadFactoryBuilder();
      threadFactoryBuilder.setThreadFactory(MoreExecutors.platformThreadFactory());
      if (!AppEngineEnvironment.IS_APP_ENGINE) {
        // AppEngine doesn't allow modification of threads to be daemon threads.
        threadFactoryBuilder.setDaemon(true);
      }
      /* The SDK requires an unbounded thread pool because a step may create X writers
       * each requiring their own thread to perform the writes otherwise a writer may
       * block causing deadlock for the step because the writers buffer is full.
       * Also, the MapTaskExecutor launches the steps in reverse order and completes
       * them in forward order thus requiring enough threads so that each step's writers
       * can be active.
       */
      return new ThreadPoolExecutor(
          0, Integer.MAX_VALUE, // Allow an unlimited number of re-usable threads.
          Long.MAX_VALUE, TimeUnit.NANOSECONDS, // Keep non-core threads alive forever.
          new SynchronousQueue(),
          threadFactoryBuilder.build());
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy