All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.google.cloud.pubsub.client.Publisher Maven / Gradle / Ivy

There is a newer version: 1.34
Show newest version
/*
 * Copyright (c) 2011-2015 Spotify AB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.spotify.google.cloud.pubsub.client;

import com.google.common.util.concurrent.MoreExecutors;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Closeable;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;

import static java.util.Collections.unmodifiableList;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;

/**
 * A tool for publishing larger volumes of messages to Google Pub/Sub. Does concurrent per-topic batching in order to
 * provide good throughput with large volumes of messages across many different topics.
 *
 * 

Messages are gathered into batches, bounded by specified batch size and maximum latency. The publisher waits up * to the specified max latency before sending a batch of messages for a topic. If enough messages to fill a batch are * submitted before the max latency deadline, then the batch is sent immediately. * *

This batching strategy trades publish request quota for increased publishing latency as outgoing messages might * spend more time waiting in the publisher per-topic queues before getting sent. The rationale for this strategy is to * avoid (expensive and empirically observed) excessive numbers of very small batch publish requests during off-peak. */ public class Publisher implements Closeable { private static final Logger log = LoggerFactory.getLogger(Publisher.class); /** * A listener for monitoring operations performed by the {@link Publisher}. */ public interface Listener { /** * Called when a new {@link Publisher} is instantiated. * * @param publisher The {@link Publisher} */ void publisherCreated(Publisher publisher); /** * Called when a {@link Publisher} is closed. * * @param publisher The {@link Publisher} */ void publisherClosed(Publisher publisher); /** * Called when a {@link Publisher} receieves a new message for publication. * * @param publisher The {@link Publisher} * @param topic The message topic. * @param message The message. * @param future The future result. */ void publishingMessage(Publisher publisher, String topic, Message message, CompletableFuture future); /** * Called when a {@link Publisher} is sending a batch of messages to Google Cloud Pub/Sub. * * @param publisher The {@link Publisher} * @param topic The topic of the message batch. * @param batch The batch of messages being sent. * @param future The future result of the entire batch. */ @Deprecated void sendingBatch(Publisher publisher, String topic, List batch, CompletableFuture> future); /** * Called when a {@link Publisher} is sending a batch of messages to Google Cloud Pub/Sub. * * @param publisher The {@link Publisher} * @param topic The topic of the message batch. * @param batch The batch of messages being sent. * @param future The future result of the entire batch. */ default void sendingBatch(Publisher publisher, String topic, List batch, PubsubFuture> future) { sendingBatch(publisher, topic, batch, (CompletableFuture>) future); } /** * Called when a topic is enqueued as pending for future batch sending due to the publisher hitting the concurrency * limit. * * @param publisher The {@link Publisher} * @param topic The topic. * @param outstanding The current number of outstanding batch requests to Google Cloud Pub/Sub. * @param concurrency The configured concurrency limit. */ void topicPending(Publisher publisher, String topic, int outstanding, int concurrency); } private final Pubsub pubsub; private final String project; private final int queueSize; private final int batchSize; private final int concurrency; private final long maxLatencyMs; private final Listener listener; private final AtomicInteger outstanding = new AtomicInteger(); private final ConcurrentLinkedQueue pendingTopics = new ConcurrentLinkedQueue<>(); private final ConcurrentMap topics = new ConcurrentHashMap<>(); private final CompletableFuture closeFuture = new CompletableFuture<>(); private final ScheduledExecutorService scheduler = MoreExecutors.getExitingScheduledExecutorService(new ScheduledThreadPoolExecutor(1)); private final AtomicInteger schedulerQueueSize = new AtomicInteger(); private Publisher(final Builder builder) { this.pubsub = Objects.requireNonNull(builder.pubsub, "pubsub"); this.project = Objects.requireNonNull(builder.project, "project"); this.concurrency = builder.concurrency; this.batchSize = builder.batchSize; this.queueSize = Optional.ofNullable(builder.queueSize).orElseGet(() -> batchSize * 10); this.maxLatencyMs = builder.maxLatencyMs; this.listener = builder.listener == null ? new ListenerAdapter() : builder.listener; listener.publisherCreated(this); } /** * Publish a message on a specific topic. * * @param topic The topic name to publish on. Note that this is the short name, not the fully qualified name * including project. The project to publish on is configured using the {@link Builder}. * @param message The message to publish. * @return A future that is fulfilled with the resulting Google Pub/Sub message ID when the message has been * successfully published. */ public CompletableFuture publish(final String topic, final Message message) { final TopicQueue queue = topics.computeIfAbsent(topic, TopicQueue::new); final CompletableFuture future = queue.send(message); listener.publishingMessage(this, topic, message, future); return future; } /** * Close this {@link Publisher}. This will also close the underlying {@link Pubsub} client. */ @Override public void close() { // TODO (dano): fail outstanding futures scheduler.shutdownNow(); try { scheduler.awaitTermination(30, SECONDS); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } closeFuture.complete(null); listener.publisherClosed(Publisher.this); } /** * Get a future that is completed when this {@link Publisher} is closed. */ public CompletableFuture closeFuture() { return closeFuture.thenApply(ignore -> null); } /** * Get the current number of outstanding (batch) requests to Google Pub/Sub. */ public int outstandingRequests() { return outstanding.get(); } /** * Get the total number of scheduled requests. */ public int scheduledQueueSize() { return schedulerQueueSize.get(); } /** * Get the current number of topics that are pending batch sending to Google Pub/Sub. */ public int pendingTopics() { return pendingTopics.size(); } /** * Get the Google Cloud project this {@link Publisher} is publishing to. */ public String project() { return project; } /** * Get the concurrent Google Pub/Sub request limit. */ public int concurrency() { return concurrency; } /** * Get the current queue size for the given topic name. Returns 0 if the topic does not exist. * * @param topic the topic name */ public int topicQueueSize(final String topic) { final TopicQueue topicQueue = this.topics.get(topic); return topicQueue == null ? 0 : topicQueue.queue.size(); } /** * Get the per-topic queue size. */ public int queueSize() { return queueSize; } /** * Get the max Google Pub/Sub request batch size. */ public int batchSize() { return batchSize; } /** * The per-topic queue of messages. */ private class TopicQueue { private final AtomicInteger size = new AtomicInteger(); private final ConcurrentLinkedQueue queue = new ConcurrentLinkedQueue<>(); private final String topic; private volatile boolean pending; private final AtomicBoolean scheduled = new AtomicBoolean(); private TopicQueue(final String topic) { this.topic = topic; } /** * Enqueue a message for sending on this topic queue. */ private CompletableFuture send(final Message message) { final CompletableFuture future = new CompletableFuture<>(); // Enforce queue size limit int currentSize; int newSize; do { currentSize = size.get(); newSize = currentSize + 1; if (newSize > queueSize) { future.completeExceptionally(new QueueFullException()); return future; } } while (!size.compareAndSet(currentSize, newSize)); // Enqueue outgoing message queue.add(new QueuedMessage(message, future)); // Schedule future batch sending scheduleSend(newSize); return future; } /** * Schedule this topic for future enqueuing for batch sending. If the batch size has been reached, enqueue for * sending immediately. * * @param queueSize The current number of enqueued messages in this topic. */ private void scheduleSend(final int queueSize) { // Bail if this topic is already enqueued for sending. if (pending) { return; } // Reached the batch size? Enqueue topic for sending immediately. if (queueSize >= batchSize) { enqueueSend(); return; } // Schedule this topic for later enqueuing, allowing more messages to gather into a larger batch. if (scheduled.compareAndSet(false, true)) { try { scheduler.schedule(this::scheduledEnqueueSend, maxLatencyMs, MILLISECONDS); schedulerQueueSize.incrementAndGet(); } catch (RejectedExecutionException ignore) { // Race with a call to close(). Ignore. } } } /** * Decrements the scheduled queue counter and enqueues the request. */ private void scheduledEnqueueSend() { schedulerQueueSize.decrementAndGet(); // Clear the scheduled flag before enqueuing or sending. scheduled.set(false); enqueueSendWithErrorLogging(); } /** * A wrapper around enqueueSend which catches and logs any exceptions that are thrown. This is * called by the executor, which will silently swallow exceptions if we don't handle them here. */ private void enqueueSendWithErrorLogging() { try { enqueueSend(); } catch (Exception e) { log.error("Error while enqueueing or sending messages on background thread", e); } } /** * Enqueue this topic for batch sending. If the request concurrency level is below the limit, send immediately. */ private void enqueueSend() { final int currentOutstanding = outstanding.get(); // Below outstanding limit so we can send immediately? if (currentOutstanding < concurrency) { sendBatch(); return; } // Enqueue as pending for sending by the earliest available concurrent request slot pending = true; pendingTopics.offer(this); // Tell the listener that a topic became pending for sending as early as possible. listener.topicPending(Publisher.this, topic, currentOutstanding, concurrency); // Attempt to send pending to guard against losing a race while enqueuing this topic as pending. sendPending(); } /** * Send a batch of messages. */ private int sendBatch() { final List batch = new ArrayList<>(); final List> futures = new ArrayList<>(); // Drain queue up to batch size while (batch.size() < batchSize) { final QueuedMessage message = queue.poll(); if (message == null) { break; } batch.add(message.message); futures.add(message.future); } // Was there anything to send? if (batch.size() == 0) { return 0; } // Decrement the queue size counter size.updateAndGet(i -> i - batch.size()); // Send the batch request and increment the outstanding request counter outstanding.incrementAndGet(); final PubsubFuture> batchFuture = pubsub.publish(project, topic, batch); listener.sendingBatch(Publisher.this, topic, unmodifiableList(batch), batchFuture); batchFuture.whenComplete( (List messageIds, Throwable ex) -> { // Decrement the outstanding request counter outstanding.decrementAndGet(); // Fail all futures if the batch request failed if (ex != null) { futures.forEach(f -> f.completeExceptionally(ex)); return; } // Verify that the number of message id's and messages match up if (futures.size() != messageIds.size()) { futures.forEach(f -> f.completeExceptionally( new PubsubException( "message id count mismatch: " + futures.size() + " != " + messageIds.size()))); } // Complete each future with the appropriate message id for (int i = 0; i < futures.size(); i++) { final String messageId = messageIds.get(i); final CompletableFuture future = futures.get(i); future.complete(messageId); } }) // When batch is complete, process pending topics. .whenComplete((v, t) -> sendPending()); return batch.size(); } } /** * Send any pending topics. */ private void sendPending() { while (outstanding.get() < concurrency) { final TopicQueue queue = pendingTopics.poll(); if (queue == null) { return; } queue.pending = false; final int sent = queue.sendBatch(); // Did we send a whole batch? Then there might be more messages in the queue. Mark as pending again. if (sent == batchSize) { queue.pending = true; pendingTopics.offer(queue); } } } /** * An outgoing message with the future that should be completed when the message has been published. */ private static class QueuedMessage { private final Message message; private final CompletableFuture future; public QueuedMessage(final Message message, final CompletableFuture future) { this.message = message; this.future = future; } } /** * Create a builder that can be used to build a {@link Publisher}. */ public static Builder builder() { return new Builder(); } /** * A builder that can be used to build a {@link Publisher}. */ public static class Builder { private Pubsub pubsub; private String project; private Integer queueSize; private int batchSize = 1000; private int concurrency = 64; private Listener listener; private long maxLatencyMs = 100; /** * Set the {@link Pubsub} client to use. * *

Note: The client should be configured to at least allow as many connections as the concurrency level of this * {@link Publisher}.

*/ public Builder pubsub(final Pubsub pubsub) { this.pubsub = pubsub; return this; } /** * Set the Google Cloud project to publish to. */ public Builder project(final String project) { this.project = project; return this; } /** * Set the maximum batch size. Default is {@code 1000}, which is also the maximum Google Cloud Pub/Sub batch size. */ public Builder batchSize(final int batchSize) { this.batchSize = batchSize; return this; } /** * Set the per-topic queue size. Default is {@code batchSize * concurrency * 10}. */ public Builder queueSize(final Integer queueSize) { this.queueSize = queueSize; return this; } /** * Set the Google Cloud Pub/Sub request concurrency level. Default is {@code 64}. */ public Builder concurrency(final int concurrency) { this.concurrency = concurrency; return this; } /** * Set the maximum latency in millis before sending an incomplete Google Cloud Pub/Sub publish batch request. * Default is {@code 100 ms}. */ public Builder maxLatencyMs(final long maxLatencyMs) { this.maxLatencyMs = maxLatencyMs; return this; } /** * Set a {@link Listener} for monitoring operations performed by the {@link Publisher}. */ public Builder listener(final Listener listener) { this.listener = listener; return this; } /** * Build a {@link Publisher}. */ public Publisher build() { return new Publisher(this); } } public static class ListenerAdapter implements Listener { @Override public void publisherCreated(final Publisher publisher) { } @Override public void publisherClosed(final Publisher publisher) { } @Override public void publishingMessage(final Publisher publisher, final String topic, final Message message, final CompletableFuture future) { } @Override public void sendingBatch(final Publisher publisher, final String topic, final List batch, final PubsubFuture> future) { } @Override public void sendingBatch(final Publisher publisher, final String topic, final List batch, final CompletableFuture> future) { } @Override public void topicPending(final Publisher publisher, final String topic, final int outstanding, final int concurrency) { } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy