com.spotify.scio.bigtable.BigtableBulkWriter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of scio-google-cloud-platform_2.12 Show documentation
Show all versions of scio-google-cloud-platform_2.12 Show documentation
Scio add-on for Google Cloud Platform
/*
* Copyright 2018 Spotify AB.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.spotify.scio.bigtable;
import com.google.bigtable.v2.Mutation;
import com.google.cloud.bigtable.config.BigtableOptions;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ThreadLocalRandom;
import org.apache.beam.sdk.io.gcp.bigtable.BigtableServiceHelper;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.GroupByKey;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.display.DisplayData;
import org.apache.beam.sdk.transforms.windowing.AfterProcessingTime;
import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
import org.apache.beam.sdk.transforms.windowing.Repeatedly;
import org.apache.beam.sdk.transforms.windowing.Window;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PDone;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting;
import org.joda.time.Duration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class BigtableBulkWriter
extends PTransform>>, PDone> {
private static final Logger LOG = LoggerFactory.getLogger(BigtableBulkWriter.class);
private final BigtableOptions bigtableOptions;
private final String tableName;
private final int numOfShards;
private final Duration flushInterval;
public BigtableBulkWriter(
final String tableName,
final BigtableOptions bigtableOptions,
final int numOfShards,
final Duration flushInterval) {
this.bigtableOptions = bigtableOptions;
this.tableName = tableName;
this.numOfShards = numOfShards;
this.flushInterval = flushInterval;
}
@Override
public PDone expand(PCollection>> input) {
createBulkShards(input, numOfShards, flushInterval)
.apply("Bigtable BulkWrite", ParDo.of(new BigtableBulkWriterFn()));
return PDone.in(input.getPipeline());
}
@VisibleForTesting
static PCollection>>> createBulkShards(
final PCollection>> input,
final int numOfShards,
final Duration flushInterval) {
return input
.apply("Assign To Shard", ParDo.of(new AssignToShard(numOfShards)))
.apply(
"Window",
Window.>>>into(new GlobalWindows())
.triggering(
Repeatedly.forever(
AfterProcessingTime.pastFirstElementInPane().plusDelayOf(flushInterval)))
.discardingFiredPanes()
.withAllowedLateness(Duration.ZERO))
.apply("Group By Shard", GroupByKey.create())
.apply(
"Gets Mutations",
ParDo.of(
new DoFn<
KV>>>,
Iterable>>>() {
@ProcessElement
public void process(
@Element KV>>> element,
OutputReceiver>>> out) {
out.output(element.getValue());
}
}));
}
private class BigtableBulkWriterFn
extends DoFn>>, Void> {
private BigtableServiceHelper.Writer bigtableWriter;
private long recordsWritten;
private final ConcurrentLinkedQueue failures;
public BigtableBulkWriterFn() {
this.failures = new ConcurrentLinkedQueue<>();
}
@StartBundle
public void startBundle(StartBundleContext c) throws IOException {
bigtableWriter =
new BigtableServiceHelper(bigtableOptions, c.getPipelineOptions())
.openForWriting(tableName);
recordsWritten = 0;
}
@ProcessElement
public void processElement(@Element Iterable>> element)
throws Exception {
checkForFailures(failures);
for (KV> r : element) {
bigtableWriter
.writeRecord(r)
.whenComplete(
(mutationResult, exception) -> {
if (exception != null) {
failures.add(new BigtableWriteException(r, exception));
}
});
++recordsWritten;
}
}
@FinishBundle
public void finishBundle() throws Exception {
// close the writer and wait for all writes to complete
bigtableWriter.close();
checkForFailures(failures);
LOG.debug("Wrote {} records", recordsWritten);
}
@Override
public void populateDisplayData(DisplayData.Builder builder) {
super.populateDisplayData(builder);
builder.add(DisplayData.item("Records Written", recordsWritten));
}
/** If any write has asynchronously failed, fail the bundle with a useful error. */
private void checkForFailures(final ConcurrentLinkedQueue failures)
throws IOException {
// Note that this function is never called by multiple threads and is the only place that
// we remove from failures, so this code is safe.
if (failures.isEmpty()) {
return;
}
StringBuilder logEntry = new StringBuilder();
int i = 0;
List suppressed = new ArrayList<>();
for (; i < 10 && !failures.isEmpty(); ++i) {
BigtableWriteException exc = failures.remove();
logEntry.append("\n").append(exc.getMessage());
if (exc.getCause() != null) {
logEntry.append(": ").append(exc.getCause().getMessage());
}
suppressed.add(exc);
}
String message =
String.format(
"At least %d errors occurred writing to Bigtable. First %d errors: %s",
i + failures.size(), i, logEntry.toString());
LOG.error(message);
IOException exception = new IOException(message);
for (BigtableWriteException e : suppressed) {
exception.addSuppressed(e);
}
throw exception;
}
/** An exception that puts information about the failed record being written in its message. */
class BigtableWriteException extends IOException {
public BigtableWriteException(
final KV> record, Throwable cause) {
super(
String.format(
"Error mutating row %s with mutations %s",
record.getKey().toStringUtf8(), record.getValue()),
cause);
}
}
}
static class AssignToShard
extends DoFn<
KV>, KV>>> {
private final int numOfShards;
AssignToShard(final int numOfShards) {
this.numOfShards = numOfShards;
}
@ProcessElement
public void processElement(
@Element KV> element,
OutputReceiver>>> out) {
// assign this element to a random shard
final long shard = ThreadLocalRandom.current().nextLong(numOfShards);
out.output(KV.of(shard, element));
}
}
}