All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.bigquery.storage.v1beta2.StreamWriterV2 Maven / Gradle / Ivy

/*
 * Copyright 2020 Google LLC
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.google.cloud.bigquery.storage.v1beta2;

import com.google.api.core.ApiFuture;
import com.google.api.core.SettableApiFuture;
import com.google.api.gax.core.CredentialsProvider;
import com.google.api.gax.rpc.FixedHeaderProvider;
import com.google.api.gax.rpc.TransportChannelProvider;
import com.google.cloud.bigquery.storage.v1beta2.AppendRowsRequest.ProtoData;
import com.google.cloud.bigquery.storage.v1beta2.StreamConnection.DoneCallback;
import com.google.cloud.bigquery.storage.v1beta2.StreamConnection.RequestCallback;
import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.Uninterruptibles;
import com.google.protobuf.Int64Value;
import io.grpc.Status;
import io.grpc.Status.Code;
import io.grpc.StatusRuntimeException;
import java.io.IOException;
import java.util.Deque;
import java.util.LinkedList;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.logging.Logger;
import javax.annotation.concurrent.GuardedBy;

/**
 * A BigQuery Stream Writer that can be used to write data into BigQuery Table.
 *
 * 

TODO: Support batching. * *

TODO: Support schema change. */ public class StreamWriterV2 implements AutoCloseable { private static final Logger log = Logger.getLogger(StreamWriterV2.class.getName()); private Lock lock; private Condition hasMessageInWaitingQueue; private Condition inflightReduced; /* * The identifier of stream to write to. */ private final String streamName; /* * The proto schema of rows to write. */ private final ProtoSchema writerSchema; /* * Max allowed inflight requests in the stream. Method append is blocked at this. */ private final long maxInflightRequests; /* * Max allowed inflight bytes in the stream. Method append is blocked at this. */ private final long maxInflightBytes; /* * TraceId for debugging purpose. */ private final String traceId; /* * Tracks current inflight requests in the stream. */ @GuardedBy("lock") private long inflightRequests = 0; /* * Tracks current inflight bytes in the stream. */ @GuardedBy("lock") private long inflightBytes = 0; /* * Indicates whether user has called Close() or not. */ @GuardedBy("lock") private boolean userClosed = false; /* * The final status of connection. Set to nonnull when connection is permanently closed. */ @GuardedBy("lock") private Throwable connectionFinalStatus = null; /* * Contains requests buffered in the client and not yet sent to server. */ @GuardedBy("lock") private final Deque waitingRequestQueue; /* * Contains sent append requests waiting for response from server. */ @GuardedBy("lock") private final Deque inflightRequestQueue; /* * A client used to interact with BigQuery. */ private BigQueryWriteClient client; /* * If true, the client above is created by this writer and should be closed. */ private boolean ownsBigQueryWriteClient = false; /* * Wraps the underlying bi-directional stream connection with server. */ private StreamConnection streamConnection; /* * A separate thread to handle actual communication with server. */ private Thread appendThread; /** The maximum size of one request. Defined by the API. */ public static long getApiMaxRequestBytes() { return 10L * 1000L * 1000L; // 10 megabytes (https://en.wikipedia.org/wiki/Megabyte) } private StreamWriterV2(Builder builder) throws IOException { this.lock = new ReentrantLock(); this.hasMessageInWaitingQueue = lock.newCondition(); this.inflightReduced = lock.newCondition(); this.streamName = builder.streamName; if (builder.writerSchema == null) { throw new StatusRuntimeException( Status.fromCode(Code.INVALID_ARGUMENT) .withDescription("Writer schema must be provided when building this writer.")); } this.writerSchema = builder.writerSchema; this.maxInflightRequests = builder.maxInflightRequest; this.maxInflightBytes = builder.maxInflightBytes; this.traceId = builder.traceId; this.waitingRequestQueue = new LinkedList(); this.inflightRequestQueue = new LinkedList(); if (builder.client == null) { BigQueryWriteSettings stubSettings = BigQueryWriteSettings.newBuilder() .setCredentialsProvider(builder.credentialsProvider) .setTransportChannelProvider(builder.channelProvider) .setEndpoint(builder.endpoint) // (b/185842996): Temporily fix this by explicitly providing the header. .setHeaderProvider( FixedHeaderProvider.create( "x-goog-request-params", "write_stream=" + this.streamName)) .build(); this.client = BigQueryWriteClient.create(stubSettings); this.ownsBigQueryWriteClient = true; } else { this.client = builder.client; this.ownsBigQueryWriteClient = false; } this.streamConnection = new StreamConnection( this.client, new RequestCallback() { @Override public void run(AppendRowsResponse response) { requestCallback(response); } }, new DoneCallback() { @Override public void run(Throwable finalStatus) { doneCallback(finalStatus); } }); this.appendThread = new Thread( new Runnable() { @Override public void run() { appendLoop(); } }); this.appendThread.start(); } /** * Schedules the writing of a message. * *

Example of writing a message. * *

{@code
   * AppendRowsRequest message;
   * ApiFuture messageIdFuture = writer.append(message);
   * ApiFutures.addCallback(messageIdFuture, new ApiFutureCallback() {
   *   public void onSuccess(AppendRowsResponse response) {
   *     if (!response.hasError()) {
   *       System.out.println("written with offset: " + response.getAppendResult().getOffset());
   *     } else {
   *       System.out.println("received an in stream error: " + response.getError().toString());
   *     }
   *   }
   *
   *   public void onFailure(Throwable t) {
   *     System.out.println("failed to write: " + t);
   *   }
   * }, MoreExecutors.directExecutor());
   * }
* * @param rows the rows in serialized format to write to BigQuery. * @param offset the offset of the first row. * @return the append response wrapped in a future. */ public ApiFuture append(ProtoRows rows, long offset) { AppendRowsRequest.Builder requestBuilder = AppendRowsRequest.newBuilder(); requestBuilder.setProtoRows(ProtoData.newBuilder().setRows(rows).build()); if (offset >= 0) { requestBuilder.setOffset(Int64Value.of(offset)); } return appendInternal(requestBuilder.build()); } private ApiFuture appendInternal(AppendRowsRequest message) { AppendRequestAndResponse requestWrapper = new AppendRequestAndResponse(message); if (requestWrapper.messageSize > getApiMaxRequestBytes()) { requestWrapper.appendResult.setException( new StatusRuntimeException( Status.fromCode(Code.INVALID_ARGUMENT) .withDescription( "MessageSize is too large. Max allow: " + getApiMaxRequestBytes() + " Actual: " + requestWrapper.messageSize))); return requestWrapper.appendResult; } this.lock.lock(); try { if (userClosed) { requestWrapper.appendResult.setException( new StatusRuntimeException( Status.fromCode(Status.Code.FAILED_PRECONDITION) .withDescription("Stream is already closed"))); return requestWrapper.appendResult; } if (connectionFinalStatus != null) { requestWrapper.appendResult.setException( new StatusRuntimeException( Status.fromCode(Status.Code.FAILED_PRECONDITION) .withDescription( "Stream is closed due to " + connectionFinalStatus.toString()))); return requestWrapper.appendResult; } ++this.inflightRequests; this.inflightBytes += requestWrapper.messageSize; waitingRequestQueue.addLast(requestWrapper); hasMessageInWaitingQueue.signal(); maybeWaitForInflightQuota(); return requestWrapper.appendResult; } finally { this.lock.unlock(); } } @GuardedBy("lock") private void maybeWaitForInflightQuota() { while (this.inflightRequests >= this.maxInflightRequests || this.inflightBytes >= this.maxInflightBytes) { try { inflightReduced.await(100, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { log.warning( "Interrupted while waiting for inflight quota. Stream: " + streamName + " Error: " + e.toString()); throw new StatusRuntimeException( Status.fromCode(Code.CANCELLED) .withCause(e) .withDescription("Interrupted while waiting for quota.")); } } } /** Close the stream writer. Shut down all resources. */ @Override public void close() { log.info("User closing stream: " + streamName); this.lock.lock(); try { this.userClosed = true; } finally { this.lock.unlock(); } log.info("Waiting for append thread to finish. Stream: " + streamName); try { appendThread.join(); log.info("User close complete. Stream: " + streamName); } catch (InterruptedException e) { // Unexpected. Just swallow the exception with logging. log.warning( "Append handler join is interrupted. Stream: " + streamName + " Error: " + e.toString()); } if (this.ownsBigQueryWriteClient) { this.client.close(); try { this.client.awaitTermination(1, TimeUnit.MINUTES); } catch (InterruptedException ignored) { } } } /* * This loop is executed in a separate thread. * * It takes requests from waiting queue and sends them to server. */ private void appendLoop() { boolean isFirstRequestInConnection = true; Deque localQueue = new LinkedList(); while (!waitingQueueDrained()) { this.lock.lock(); try { hasMessageInWaitingQueue.await(100, TimeUnit.MILLISECONDS); while (!this.waitingRequestQueue.isEmpty()) { AppendRequestAndResponse requestWrapper = this.waitingRequestQueue.pollFirst(); this.inflightRequestQueue.addLast(requestWrapper); localQueue.addLast(requestWrapper); } } catch (InterruptedException e) { log.warning( "Interrupted while waiting for message. Stream: " + streamName + " Error: " + e.toString()); } finally { this.lock.unlock(); } if (localQueue.isEmpty()) { continue; } // TODO: Add reconnection here. while (!localQueue.isEmpty()) { AppendRowsRequest preparedRequest = prepareRequestBasedOnPosition( localQueue.pollFirst().message, isFirstRequestInConnection); this.streamConnection.send(preparedRequest); isFirstRequestInConnection = false; } } log.info("Cleanup starts. Stream: " + streamName); // At this point, the waiting queue is drained, so no more requests. // We can close the stream connection and handle the remaining inflight requests. this.streamConnection.close(); waitForDoneCallback(); // At this point, there cannot be more callback. It is safe to clean up all inflight requests. log.info( "Stream connection is fully closed. Cleaning up inflight requests. Stream: " + streamName); cleanupInflightRequests(); log.info("Append thread is done. Stream: " + streamName); } /* * Returns true if waiting queue is drain, a.k.a. no more requests in the waiting queue. * * It serves as a signal to append thread that there cannot be any more requests in the waiting * queue and it can prepare to stop. */ private boolean waitingQueueDrained() { this.lock.lock(); try { return (this.userClosed || this.connectionFinalStatus != null) && this.waitingRequestQueue.isEmpty(); } finally { this.lock.unlock(); } } private void waitForDoneCallback() { log.info("Waiting for done callback from stream connection. Stream: " + streamName); while (true) { this.lock.lock(); try { if (connectionFinalStatus != null) { // Done callback is received, return. return; } } finally { this.lock.unlock(); } Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS); } } private AppendRowsRequest prepareRequestBasedOnPosition( AppendRowsRequest original, boolean isFirstRequest) { AppendRowsRequest.Builder requestBuilder = original.toBuilder(); if (isFirstRequest) { if (this.writerSchema != null) { requestBuilder.getProtoRowsBuilder().setWriterSchema(this.writerSchema); } requestBuilder.setWriteStream(this.streamName); if (this.traceId != null) { requestBuilder.setTraceId(this.traceId); } } else { requestBuilder.clearWriteStream(); requestBuilder.getProtoRowsBuilder().clearWriterSchema(); } return requestBuilder.build(); } private void cleanupInflightRequests() { Throwable finalStatus; Deque localQueue = new LinkedList(); this.lock.lock(); try { finalStatus = this.connectionFinalStatus; while (!this.inflightRequestQueue.isEmpty()) { localQueue.addLast(pollInflightRequestQueue()); } } finally { this.lock.unlock(); } log.info( "Cleaning " + localQueue.size() + " inflight requests with error: " + finalStatus.toString()); while (!localQueue.isEmpty()) { localQueue.pollFirst().appendResult.setException(finalStatus); } } private void requestCallback(AppendRowsResponse response) { AppendRequestAndResponse requestWrapper; this.lock.lock(); try { requestWrapper = pollInflightRequestQueue(); } finally { this.lock.unlock(); } if (response.hasError()) { StatusRuntimeException exception = new StatusRuntimeException( Status.fromCodeValue(response.getError().getCode()) .withDescription(response.getError().getMessage())); requestWrapper.appendResult.setException(exception); } else { requestWrapper.appendResult.set(response); } } private void doneCallback(Throwable finalStatus) { log.info( "Received done callback. Stream: " + streamName + " Final status: " + finalStatus.toString()); this.lock.lock(); try { this.connectionFinalStatus = finalStatus; } finally { this.lock.unlock(); } } @GuardedBy("lock") private AppendRequestAndResponse pollInflightRequestQueue() { AppendRequestAndResponse requestWrapper = this.inflightRequestQueue.pollFirst(); --this.inflightRequests; this.inflightBytes -= requestWrapper.messageSize; this.inflightReduced.signal(); return requestWrapper; } /** * Constructs a new {@link StreamWriterV2.Builder} using the given stream and client. AppendRows * needs special headers to be added to client, so a passed in client will not work. This should * be used by test only. */ public static StreamWriterV2.Builder newBuilder(String streamName, BigQueryWriteClient client) { return new StreamWriterV2.Builder(streamName, client); } /** Constructs a new {@link StreamWriterV2.Builder} using the given stream. */ public static StreamWriterV2.Builder newBuilder(String streamName) { return new StreamWriterV2.Builder(streamName); } /** A builder of {@link StreamWriterV2}s. */ public static final class Builder { private static final long DEFAULT_MAX_INFLIGHT_REQUESTS = 1000L; private static final long DEFAULT_MAX_INFLIGHT_BYTES = 100 * 1024 * 1024; // 100Mb. private String streamName; private BigQueryWriteClient client; private ProtoSchema writerSchema = null; private long maxInflightRequest = DEFAULT_MAX_INFLIGHT_REQUESTS; private long maxInflightBytes = DEFAULT_MAX_INFLIGHT_BYTES; private String endpoint = BigQueryWriteSettings.getDefaultEndpoint(); private TransportChannelProvider channelProvider = BigQueryWriteSettings.defaultGrpcTransportProviderBuilder().setChannelsPerCpu(1).build(); private CredentialsProvider credentialsProvider = BigQueryWriteSettings.defaultCredentialsProviderBuilder().build(); private String traceId = null; private Builder(String streamName) { this.streamName = Preconditions.checkNotNull(streamName); this.client = null; } private Builder(String streamName, BigQueryWriteClient client) { this.streamName = Preconditions.checkNotNull(streamName); this.client = Preconditions.checkNotNull(client); } /** Sets the proto schema of the rows. */ public Builder setWriterSchema(ProtoSchema writerSchema) { this.writerSchema = writerSchema; return this; } public Builder setMaxInflightRequests(long value) { this.maxInflightRequest = value; return this; } public Builder setMaxInflightBytes(long value) { this.maxInflightBytes = value; return this; } /** Gives the ability to override the gRPC endpoint. */ public Builder setEndpoint(String endpoint) { this.endpoint = Preconditions.checkNotNull(endpoint, "Endpoint is null."); return this; } /** * {@code ChannelProvider} to use to create Channels, which must point at Cloud BigQuery Storage * API endpoint. * *

For performance, this client benefits from having multiple underlying connections. See * {@link com.google.api.gax.grpc.InstantiatingGrpcChannelProvider.Builder#setPoolSize(int)}. */ public Builder setChannelProvider(TransportChannelProvider channelProvider) { this.channelProvider = Preconditions.checkNotNull(channelProvider, "ChannelProvider is null."); return this; } /** {@code CredentialsProvider} to use to create Credentials to authenticate calls. */ public Builder setCredentialsProvider(CredentialsProvider credentialsProvider) { this.credentialsProvider = Preconditions.checkNotNull(credentialsProvider, "CredentialsProvider is null."); return this; } /** * Sets traceId for debuging purpose. TraceId must follow the format of * CustomerDomain:DebugString, e.g. DATAFLOW:job_id_x. */ public Builder setTraceId(String traceId) { int colonIndex = traceId.indexOf(':'); if (colonIndex == -1 || colonIndex == 0 || colonIndex == traceId.length() - 1) { throw new IllegalArgumentException( "TraceId must follow the format of A:B. Actual:" + traceId); } this.traceId = traceId; return this; } /** Builds the {@code StreamWriterV2}. */ public StreamWriterV2 build() throws IOException { return new StreamWriterV2(this); } } // Class that wraps AppendRowsRequest and its corresponding Response future. private static final class AppendRequestAndResponse { final SettableApiFuture appendResult; final AppendRowsRequest message; final long messageSize; AppendRequestAndResponse(AppendRowsRequest message) { this.appendResult = SettableApiFuture.create(); this.message = message; this.messageSize = message.getProtoRows().getSerializedSize(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy