All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.scio.bigtable.BigtableDoFn Maven / Gradle / Ivy

/*
 * Copyright 2017 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.scio.bigtable;

import com.google.cloud.bigtable.config.BigtableOptions;
import com.google.cloud.bigtable.grpc.BigtableSession;
import com.google.common.util.concurrent.ListenableFuture;
import com.spotify.scio.transforms.BaseAsyncLookupDoFn;
import com.spotify.scio.transforms.GuavaAsyncLookupDoFn;
import java.io.IOException;
import org.apache.beam.sdk.transforms.DoFn;

/**
 * A {@link DoFn} that performs asynchronous lookup using Google Cloud Bigtable.
 *
 * @param  input element type.
 * @param  Bigtable lookup value type.
 */
public abstract class BigtableDoFn extends GuavaAsyncLookupDoFn {

  private final BigtableOptions options;

  /** Perform asynchronous Bigtable lookup. */
  public abstract ListenableFuture asyncLookup(BigtableSession session, A input);

  /**
   * Create a {@link BigtableDoFn} instance.
   *
   * @param options Bigtable options.
   */
  public BigtableDoFn(BigtableOptions options) {
    this(options, 1000);
  }

  /**
   * Create a {@link BigtableDoFn} instance.
   *
   * @param options Bigtable options.
   * @param maxPendingRequests maximum number of pending requests on every cloned DoFn. This
   *     prevents runner from timing out and retrying bundles.
   */
  public BigtableDoFn(BigtableOptions options, int maxPendingRequests) {
    this(options, maxPendingRequests, new BaseAsyncLookupDoFn.NoOpCacheSupplier<>());
  }

  /**
   * Create a {@link BigtableDoFn} instance.
   *
   * @param options Bigtable options.
   * @param maxPendingRequests maximum number of pending requests on every cloned DoFn. This
   *     prevents runner from timing out and retrying bundles.
   * @param cacheSupplier supplier for lookup cache.
   */
  public BigtableDoFn(
      BigtableOptions options,
      int maxPendingRequests,
      BaseAsyncLookupDoFn.CacheSupplier cacheSupplier) {
    super(maxPendingRequests, cacheSupplier);
    this.options = options;
  }

  /**
   * Create a {@link BigtableDoFn} instance.
   *
   * @param options Bigtable options.
   * @param maxPendingRequests maximum number of pending requests on every cloned DoFn. This
   *     prevents runner from timing out and retrying bundles.
   * @param deduplicate if an attempt should be made to de-duplicate simultaneous requests for the
   *     same input
   * @param cacheSupplier supplier for lookup cache.
   */
  public BigtableDoFn(
      BigtableOptions options,
      int maxPendingRequests,
      boolean deduplicate,
      BaseAsyncLookupDoFn.CacheSupplier cacheSupplier) {
    super(maxPendingRequests, deduplicate, cacheSupplier);
    this.options = options;
  }

  @Override
  public ResourceType getResourceType() {
    // BigtableSession is backed by a gRPC thread safe client
    return ResourceType.PER_INSTANCE;
  }

  protected BigtableSession newClient() {
    try {
      return new BigtableSession(options);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }
}