
io.cdap.plugin.gcp.publisher.source.PubSubSubscriberUtil Maven / Gradle / Ivy
/*
* Copyright © 2020 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package io.cdap.plugin.gcp.publisher.source;
import com.google.api.gax.core.FixedCredentialsProvider;
import com.google.api.gax.retrying.RetrySettings;
import com.google.api.gax.rpc.ApiException;
import com.google.api.gax.rpc.StatusCode;
import com.google.auth.Credentials;
import com.google.cloud.pubsub.v1.SubscriptionAdminClient;
import com.google.cloud.pubsub.v1.SubscriptionAdminSettings;
import com.google.pubsub.v1.PushConfig;
import io.cdap.cdap.api.data.format.StructuredRecord;
import io.cdap.cdap.etl.api.streaming.StreamingContext;
import io.cdap.plugin.gcp.common.GCPUtils;
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.dstream.DStream;
import org.apache.spark.streaming.dstream.ReceiverInputDStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.threeten.bp.Duration;
import scala.collection.JavaConverters;
import scala.reflect.ClassTag;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Set;
import java.util.function.BooleanSupplier;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.annotation.Nullable;
/**
* Utility class to create a JavaDStream of received messages.
*/
public final class PubSubSubscriberUtil {
protected static final Logger LOG = LoggerFactory.getLogger(PubSubSubscriberUtil.class);
// Retryable status codes. These need to be handled in case Pub/Sub throws a StatusRuntimeException.
private static final int RESOURCE_EXHAUSTED = StatusCode.Code.RESOURCE_EXHAUSTED.getHttpStatusCode();
private static final int CANCELLED = StatusCode.Code.CANCELLED.getHttpStatusCode();
private static final int INTERNAL = StatusCode.Code.INTERNAL.getHttpStatusCode();
private static final int UNAVAILABLE = StatusCode.Code.UNAVAILABLE.getHttpStatusCode();
private static final int DEADLINE_EXCEEDED = StatusCode.Code.DEADLINE_EXCEEDED.getHttpStatusCode();
private static final Set RETRYABLE_STATUS_CODES =
Stream.of(RESOURCE_EXHAUSTED, CANCELLED, INTERNAL, UNAVAILABLE, DEADLINE_EXCEEDED).
collect(Collectors.toSet());
private static final int MAX_ATTEMPTS = 5;
private PubSubSubscriberUtil() {
}
/**
* Get a JavaDStream of received PubSubMessages.
*
* @param streamingContext the screaming context
* @param config The subscriver configuration
* @return JavaDStream of all received pub/sub messages.
* @throws Exception when the credentials could not be loaded.
*/
public static JavaDStream getStream(
StreamingContext streamingContext, PubSubSubscriberConfig config,
SerializableFunction mappingFunction) throws Exception {
boolean autoAcknowledge = true;
if (streamingContext.isPreviewEnabled()) {
autoAcknowledge = false;
}
return getInputDStream(streamingContext, config, autoAcknowledge, mappingFunction);
}
/**
* Get a merged JavaDStream containing all received messages from multiple receivers.
*
* @param streamingContext the streaming context
* @param config subscriber config
* @param autoAcknowledge if the messages should be acknowleged or not.
* @return JavaDStream containing all received messages.
*/
@SuppressWarnings("unchecked")
protected static JavaDStream getInputDStream(StreamingContext streamingContext,
PubSubSubscriberConfig config,
boolean autoAcknowledge,
SerializableFunction mappingFn) {
if (streamingContext.isStateStoreEnabled()) {
ClassTag tag = scala.reflect.ClassTag$.MODULE$.apply(PubSubMessage.class);
PubSubDirectDStream pubSubDirectDStream = new PubSubDirectDStream(streamingContext, config,
streamingContext.getBatchInterval(),
autoAcknowledge,
mappingFn);
return new JavaDStream<>(pubSubDirectDStream, tag);
}
ArrayList> receivers = new ArrayList<>(config.getNumberOfReaders());
ClassTag tag = scala.reflect.ClassTag$.MODULE$.apply(PubSubMessage.class);
for (int i = 1; i <= config.getNumberOfReaders(); i++) {
ReceiverInputDStream receiverInputDStream =
new PubSubInputDStream(streamingContext.getSparkStreamingContext().ssc(), config, StorageLevel.MEMORY_ONLY(),
autoAcknowledge);
receivers.add(receiverInputDStream);
}
DStream dStream = streamingContext.getSparkStreamingContext().ssc()
.union(JavaConverters.collectionAsScalaIterableConverter(receivers).asScala().toSeq(), tag);
return new JavaDStream<>(dStream, tag).map(message -> mappingFn.apply(message));
}
/**
* Create a new subscription (if needed) for the supplied topic.
*
* @param preCheck Any checks that need to be applied before each retry.
* @param backoffConfig {@link BackoffConfig} for retries.
* @param subscription Subscription name string.
* @param topic Topic name string.
* @param clientSupplier Supplier for creating {@link SubscriptionAdminClient}
* @param isRetryableException Predicate for checking if the exception is retryable.
* @throws InterruptedException If the wait for retry is interrupted.
* @throws IOException If {@link SubscriptionAdminClient} cannot be created.
*/
public static void createSubscription(BooleanSupplier preCheck, BackoffConfig backoffConfig, String subscription,
String topic, Supplier clientSupplier,
Predicate isRetryableException)
throws InterruptedException, IOException {
int backoff = backoffConfig.getInitialBackoffMs();
int attempts = 5;
ApiException lastApiException = null;
while (preCheck.getAsBoolean() && attempts-- > 0) {
try {
SubscriptionAdminClient subscriptionAdminClient = clientSupplier.get();
int ackDeadline = 60; // 60 seconds before resending the message.
subscriptionAdminClient.createSubscription(
subscription, topic, PushConfig.getDefaultInstance(), ackDeadline);
return;
} catch (ApiException ae) {
lastApiException = ae;
//If the subscription already exists, ignore the error.
if (ae.getStatusCode().getCode().equals(StatusCode.Code.ALREADY_EXISTS)) {
return;
}
//Retry if the exception is retryable.
if (isRetryableException.test(ae)) {
backoff = sleepAndIncreaseBackoff(preCheck, backoff, backoffConfig);
continue;
}
throw ae;
}
}
throw new RuntimeException(lastApiException);
}
/**
* Method to determine if an API Exception is retryable. This uses the built-in method in API Exception as well
* as checking the status code.
*
* In testing, we noticed the client was wrapping some network exceptions declaring those as not retryable,
* even when the Pub/Sub documentation states that the request may be retryable.
*
* @param ae the API Exception
* @return boolean stating whether we should retry this request.
*/
public static boolean isApiExceptionRetryable(ApiException ae) {
return ae.isRetryable() || RETRYABLE_STATUS_CODES.contains(ae.getStatusCode().getCode().getHttpStatusCode());
}
/**
* Return the mapping function to convert PubSubMessage to StructuredRecord.
*
* @param config {@link GoogleSubscriberConfig}
* @return {@link SerializableFunction}
*/
public static SerializableFunction
getMappingFunction(GoogleSubscriberConfig config) {
return new PubSubStructuredRecordConverter(config);
}
public static RetrySettings getRetrySettings() {
BackoffConfig backoffConfig = BackoffConfig.defaultInstance();
return RetrySettings.newBuilder()
.setInitialRetryDelay(Duration.ofMillis(backoffConfig.getInitialBackoffMs()))
.setMaxRetryDelay(Duration.ofMillis(backoffConfig.getMaximumBackoffMs()))
.setRetryDelayMultiplier(backoffConfig.getBackoffFactor()).setMaxAttempts(MAX_ATTEMPTS).build();
}
/**
* Create {@link Credentials} based on the params.
* @param serviceAccount Service account string, could be a path or JSON.
* @param serviceAccountFilePath Indicates whether first param is service account path.
* @return {@link Credentials} or null.
*/
@Nullable
public static Credentials createCredentials(String serviceAccount, boolean serviceAccountFilePath) {
try {
return serviceAccount == null ? null : GCPUtils.loadServiceAccountCredentials(serviceAccount,
serviceAccountFilePath);
} catch (IOException e) {
throw new RuntimeException("Error creating credentials from service account.", e);
}
}
/**
* Call provided supplier with retries.
*
* @param supplier The supplier to be invoked
* @param backoffConfig {@link BackoffConfig} for the retries
* @param maxAttempts Integer indicating max number of attempts
* @param
* @return Value returned by supplier
* @throws Exception Any exception that is not retryable or exceeded retry
*/
public static T callWithRetry(Supplier supplier,
BackoffConfig backoffConfig, int maxAttempts) throws Exception {
int backoff = backoffConfig.getInitialBackoffMs();
ApiException lastApiException = null;
while (maxAttempts-- > 0) {
try {
return supplier.get();
} catch (ApiException ae) {
lastApiException = ae;
//Retry if the exception is retryable.
if (PubSubSubscriberUtil.isApiExceptionRetryable(ae)) {
backoff = PubSubSubscriberUtil.sleepAndIncreaseBackoff(() -> true, backoff, backoffConfig);
continue;
}
throw ae;
}
}
throw new RuntimeException(lastApiException);
}
private static SubscriptionAdminClient buildSubscriptionAdminClient(Credentials credentials) throws IOException {
SubscriptionAdminSettings.Builder builder = SubscriptionAdminSettings.newBuilder();
if (credentials != null) {
builder.setCredentialsProvider(FixedCredentialsProvider.create(credentials));
}
return SubscriptionAdminClient.create(builder.build());
}
private static int sleepAndIncreaseBackoff(BooleanSupplier preCheck, int backoff,
BackoffConfig backoffConfig) throws InterruptedException {
if (preCheck.getAsBoolean()) {
LOG.trace("Backoff - Sleeping for {} ms.", backoff);
Thread.sleep(backoff);
}
return calculateUpdatedBackoff(backoff, backoffConfig);
}
private static int calculateUpdatedBackoff(int backoff, BackoffConfig backoffConfig) {
return Math.min((int) (backoff * backoffConfig.getBackoffFactor()), backoffConfig.getMaximumBackoffMs());
}
}