All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.scio.bigtable.BigtableBulkWriter Maven / Gradle / Ivy

There is a newer version: 0.14.8
Show newest version
/*
 * Copyright 2018 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.scio.bigtable;

import com.google.bigtable.v2.Mutation;
import com.google.cloud.bigtable.config.BigtableOptions;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ThreadLocalRandom;
import org.apache.beam.sdk.io.gcp.bigtable.BigtableServiceHelper;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.GroupByKey;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.display.DisplayData;
import org.apache.beam.sdk.transforms.windowing.AfterProcessingTime;
import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
import org.apache.beam.sdk.transforms.windowing.Repeatedly;
import org.apache.beam.sdk.transforms.windowing.Window;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PDone;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting;
import org.joda.time.Duration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class BigtableBulkWriter
    extends PTransform>>, PDone> {

  private static final Logger LOG = LoggerFactory.getLogger(BigtableBulkWriter.class);

  private final BigtableOptions bigtableOptions;
  private final String tableName;
  private final int numOfShards;
  private final Duration flushInterval;

  public BigtableBulkWriter(
      final String tableName,
      final BigtableOptions bigtableOptions,
      final int numOfShards,
      final Duration flushInterval) {
    this.bigtableOptions = bigtableOptions;
    this.tableName = tableName;
    this.numOfShards = numOfShards;
    this.flushInterval = flushInterval;
  }

  @Override
  public PDone expand(PCollection>> input) {
    createBulkShards(input, numOfShards, flushInterval)
        .apply("Bigtable BulkWrite", ParDo.of(new BigtableBulkWriterFn()));
    return PDone.in(input.getPipeline());
  }

  @VisibleForTesting
  static PCollection>>> createBulkShards(
      final PCollection>> input,
      final int numOfShards,
      final Duration flushInterval) {
    return input
        .apply("Assign To Shard", ParDo.of(new AssignToShard(numOfShards)))
        .apply(
            "Window",
            Window.>>>into(new GlobalWindows())
                .triggering(
                    Repeatedly.forever(
                        AfterProcessingTime.pastFirstElementInPane().plusDelayOf(flushInterval)))
                .discardingFiredPanes()
                .withAllowedLateness(Duration.ZERO))
        .apply("Group By Shard", GroupByKey.create())
        .apply(
            "Gets Mutations",
            ParDo.of(
                new DoFn<
                    KV>>>,
                    Iterable>>>() {
                  @ProcessElement
                  public void process(
                      @Element KV>>> element,
                      OutputReceiver>>> out) {
                    out.output(element.getValue());
                  }
                }));
  }

  private class BigtableBulkWriterFn
      extends DoFn>>, Void> {

    private BigtableServiceHelper.Writer bigtableWriter;
    private long recordsWritten;
    private final ConcurrentLinkedQueue failures;

    public BigtableBulkWriterFn() {
      this.failures = new ConcurrentLinkedQueue<>();
    }

    @StartBundle
    public void startBundle(StartBundleContext c) throws IOException {
      bigtableWriter =
          new BigtableServiceHelper(bigtableOptions, c.getPipelineOptions())
              .openForWriting(tableName);
      recordsWritten = 0;
    }

    @ProcessElement
    public void processElement(@Element Iterable>> element)
        throws Exception {
      checkForFailures(failures);
      for (KV> r : element) {
        bigtableWriter
            .writeRecord(r)
            .whenComplete(
                (mutationResult, exception) -> {
                  if (exception != null) {
                    failures.add(new BigtableWriteException(r, exception));
                  }
                });
        ++recordsWritten;
      }
    }

    @FinishBundle
    public void finishBundle() throws Exception {
      // close the writer and wait for all writes to complete
      bigtableWriter.close();
      checkForFailures(failures);
      LOG.debug("Wrote {} records", recordsWritten);
    }

    @Override
    public void populateDisplayData(DisplayData.Builder builder) {
      super.populateDisplayData(builder);
      builder.add(DisplayData.item("Records Written", recordsWritten));
    }

    /** If any write has asynchronously failed, fail the bundle with a useful error. */
    private void checkForFailures(final ConcurrentLinkedQueue failures)
        throws IOException {
      // Note that this function is never called by multiple threads and is the only place that
      // we remove from failures, so this code is safe.
      if (failures.isEmpty()) {
        return;
      }

      StringBuilder logEntry = new StringBuilder();
      int i = 0;
      List suppressed = new ArrayList<>();
      for (; i < 10 && !failures.isEmpty(); ++i) {
        BigtableWriteException exc = failures.remove();
        logEntry.append("\n").append(exc.getMessage());
        if (exc.getCause() != null) {
          logEntry.append(": ").append(exc.getCause().getMessage());
        }
        suppressed.add(exc);
      }
      String message =
          String.format(
              "At least %d errors occurred writing to Bigtable. First %d errors: %s",
              i + failures.size(), i, logEntry.toString());
      LOG.error(message);
      IOException exception = new IOException(message);
      for (BigtableWriteException e : suppressed) {
        exception.addSuppressed(e);
      }
      throw exception;
    }

    /** An exception that puts information about the failed record being written in its message. */
    class BigtableWriteException extends IOException {

      public BigtableWriteException(
          final KV> record, Throwable cause) {
        super(
            String.format(
                "Error mutating row %s with mutations %s",
                record.getKey().toStringUtf8(), record.getValue()),
            cause);
      }
    }
  }

  static class AssignToShard
      extends DoFn<
          KV>, KV>>> {

    private final int numOfShards;

    AssignToShard(final int numOfShards) {
      this.numOfShards = numOfShards;
    }

    @ProcessElement
    public void processElement(
        @Element KV> element,
        OutputReceiver>>> out) {
      // assign this element to a random shard
      final long shard = ThreadLocalRandom.current().nextLong(numOfShards);
      out.output(KV.of(shard, element));
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy