cz.o2.proxima.direct.io.pubsub.PubSubReader Maven / Gradle / Ivy
/*
* Copyright 2017-2024 O2 Czech Republic, a.s.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cz.o2.proxima.direct.io.pubsub;
import static cz.o2.proxima.direct.core.commitlog.ObserverUtils.asOnNextContext;
import static cz.o2.proxima.direct.core.commitlog.ObserverUtils.asRepartitionContext;
import com.google.api.gax.batching.FlowControlSettings;
import com.google.api.gax.batching.FlowController.LimitExceededBehavior;
import com.google.api.gax.rpc.AlreadyExistsException;
import com.google.cloud.pubsub.v1.AckReplyConsumer;
import com.google.cloud.pubsub.v1.MessageReceiver;
import com.google.cloud.pubsub.v1.Subscriber;
import com.google.cloud.pubsub.v1.SubscriptionAdminClient;
import com.google.protobuf.FieldMask;
import com.google.pubsub.v1.ProjectSubscriptionName;
import com.google.pubsub.v1.ProjectTopicName;
import com.google.pubsub.v1.PushConfig;
import com.google.pubsub.v1.Subscription;
import com.google.pubsub.v1.UpdateSubscriptionRequest;
import cz.o2.proxima.core.annotations.Stable;
import cz.o2.proxima.core.functional.UnaryFunction;
import cz.o2.proxima.core.storage.AbstractStorage;
import cz.o2.proxima.core.storage.Partition;
import cz.o2.proxima.core.storage.StreamElement;
import cz.o2.proxima.core.storage.commitlog.Position;
import cz.o2.proxima.core.time.WatermarkEstimator;
import cz.o2.proxima.core.time.WatermarkEstimatorFactory;
import cz.o2.proxima.core.time.WatermarkSupplier;
import cz.o2.proxima.direct.core.Context;
import cz.o2.proxima.direct.core.commitlog.CommitLogObserver;
import cz.o2.proxima.direct.core.commitlog.CommitLogObserver.OffsetCommitter;
import cz.o2.proxima.direct.core.commitlog.CommitLogReader;
import cz.o2.proxima.direct.core.commitlog.ObserveHandle;
import cz.o2.proxima.direct.core.commitlog.Offset;
import cz.o2.proxima.internal.com.google.common.annotations.VisibleForTesting;
import cz.o2.proxima.internal.com.google.common.base.Preconditions;
import cz.o2.proxima.internal.com.google.common.collect.Iterables;
import cz.o2.proxima.internal.com.google.common.collect.Lists;
import cz.o2.proxima.io.pubsub.util.PubSubUtils;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import javax.annotation.Nullable;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.threeten.bp.Duration;
/** A {@link CommitLogReader} for Google PubSub. */
@Stable
@Slf4j
class PubSubReader extends AbstractStorage implements CommitLogReader {
static final class PubSubPartition implements Partition {
@Getter private final String consumerName;
PubSubPartition(String consumerName) {
this.consumerName = Objects.requireNonNull(consumerName);
}
@Override
public int getId() {
return 0;
}
@Override
public boolean isSplittable() {
return true;
}
@Override
public Collection split(int desiredCount) {
log.info("Splitting partition {} into {} parts", this, desiredCount);
return IntStream.range(0, desiredCount).mapToObj(i -> this).collect(Collectors.toList());
}
@Override
public String toString() {
return "PubSubPartition(" + consumerName + ")";
}
}
@VisibleForTesting
static class PubSubOffset implements Offset {
@Getter private final String consumerName;
@Getter private final long watermark;
PubSubOffset(String consumerName, long watermark) {
this.consumerName = Objects.requireNonNull(consumerName);
this.watermark = watermark;
}
@Override
public Partition getPartition() {
return new PubSubPartition(consumerName);
}
@Override
public String toString() {
return "PubSubOffset(consumerName=" + consumerName + ", watermark=" + watermark + ")";
}
@Override
public boolean equals(Object obj) {
if (obj instanceof PubSubOffset) {
PubSubOffset off = (PubSubOffset) obj;
return off.consumerName.equals(consumerName) && off.watermark == watermark;
}
return false;
}
@Override
public int hashCode() {
return Objects.hash(consumerName, watermark);
}
}
@FunctionalInterface
private interface PubSubConsumer extends Serializable {
boolean consume(StreamElement elem, WatermarkSupplier watermark, AckReplyConsumer ack);
}
private final PubSubAccessor accessor;
private final Context context;
private final String project;
private final String topic;
private final int maxAckDeadline;
private final int subscriptionAckDeadline;
private final boolean subscriptionAutoCreate;
private final WatermarkEstimatorFactory watermarkFactory;
private transient ExecutorService executor;
PubSubReader(PubSubAccessor accessor, Context context) {
super(accessor.getEntityDescriptor(), accessor.getUri());
this.accessor = accessor;
this.context = context;
this.project = accessor.getProject();
this.topic = accessor.getTopic();
this.maxAckDeadline = accessor.getMaxAckDeadline();
this.subscriptionAckDeadline = accessor.getSubscriptionAckDeadline();
this.subscriptionAutoCreate = accessor.isSubscriptionAutoCreate();
this.watermarkFactory = accessor.getWatermarkConfiguration().getWatermarkEstimatorFactory();
}
@Override
public List getPartitions() {
// pubsub has only single (splittable) partition from the client perspective
return Arrays.asList(new PubSubPartition(asConsumerName(null)));
}
@Override
public ObserveHandle observe(
@Nullable String name, Position position, CommitLogObserver observer) {
return observe(name, position, Long.MIN_VALUE, observer);
}
private ObserveHandle observe(
@Nullable String name, Position position, long minWatermark, CommitLogObserver observer) {
validatePosition(position);
String consumerName = asConsumerName(name);
AtomicLong committedWatermark = new AtomicLong(minWatermark);
return consume(
consumerName,
(e, w, c) -> {
OffsetCommitter committer =
(succ, exc) -> {
if (succ) {
log.debug("Confirming message {} to PubSub", e);
committedWatermark.set(w.getWatermark());
c.ack();
} else {
if (exc != null) {
log.warn("Error during processing of {}", e, exc);
} else {
log.info("Nacking message {} by request", e);
}
c.nack();
}
};
try {
long watermark = w.getWatermark();
Offset offset = new PubSubOffset(consumerName, watermark);
boolean ret = observer.onNext(e, asOnNextContext(committer, offset));
if (!ret) {
observer.onCompleted();
}
return ret;
} catch (Exception ex) {
log.error("Error calling onNext", ex);
committer.fail(ex);
throw new RuntimeException(ex);
}
},
observer::onError,
null,
() -> {},
observer::onCancelled,
committedWatermark);
}
@Override
public ObserveHandle observePartitions(
@Nullable String name,
Collection partitions,
Position position,
boolean stopAtCurrent,
CommitLogObserver observer) {
validateNotStopAtCurrent(stopAtCurrent);
name = findConsumerFromPartitions(name, partitions);
return observe(name, position, observer);
}
/**
* Observe PubSub in a bulk fashion. Note that due to current PubSub implementation the bulk
* commit must happen before the ack timeout. If the message is not acknowledged before this
* timeout the message will be redelivered, which will result in duplicate messages.
*
* @param name name of the observer subscription
* @param position must be set to NEWEST
* @param stopAtCurrent throw {@link UnsupportedOperationException} when {@code true}
* @param observer the observer of data
* @return handle to interact with the observation thread
*/
@Override
public ObserveHandle observeBulk(
@Nullable String name, Position position, boolean stopAtCurrent, CommitLogObserver observer) {
return observeBulk(name, position, stopAtCurrent, Long.MIN_VALUE, observer);
}
private ObserveHandle observeBulk(
@Nullable String name,
Position position,
boolean stopAtCurrent,
long minWatermark,
CommitLogObserver observer) {
validateNotStopAtCurrent(stopAtCurrent);
validatePosition(position);
AtomicReference> unconfirmed = new AtomicReference<>(new ArrayList<>());
Object lock = new Object();
Object listLock = new Object();
AtomicLong globalOffset = new AtomicLong();
String consumerName = asConsumerName(name);
AtomicLong committedWatermark = new AtomicLong(minWatermark);
PubSubPartition partition = new PubSubPartition(consumerName);
return consume(
consumerName,
(e, w, c) -> {
final long confirmUntil;
synchronized (listLock) {
List list = unconfirmed.get();
list.add(c);
confirmUntil = list.size() + globalOffset.get();
}
OffsetCommitter committer =
createBulkCommitter(
listLock, confirmUntil, globalOffset, unconfirmed, w, committedWatermark);
// our observers are not supposed to be thread safe, so we must
// ensure explicit synchronization here
synchronized (lock) {
try {
Offset offset = new PubSubOffset(consumerName, w.getWatermark());
if (!observer.onNext(e, asOnNextContext(committer, offset))) {
observer.onCompleted();
return false;
}
return true;
} catch (Exception ex) {
log.error("Error calling on next", ex);
committer.fail(ex);
throw new RuntimeException(ex);
}
}
},
observer::onError,
() -> observer.onRepartition(asRepartitionContext(Arrays.asList(partition))),
() -> observer.onRepartition(asRepartitionContext(Arrays.asList(partition))),
observer::onCancelled,
committedWatermark);
}
private OffsetCommitter createBulkCommitter(
Object listLock,
long confirmUntil,
AtomicLong globalOffset,
AtomicReference> unconfirmed,
WatermarkSupplier watermarkSupplier,
AtomicLong committedWatermark) {
return (succ, exc) -> {
// the implementation can use some other
// thread for this, so we need to synchronize this
synchronized (listLock) {
int confirmCount = (int) (confirmUntil - globalOffset.get());
if (confirmCount > 0) {
final Consumer apply;
if (succ) {
log.debug("Bulk confirming {} messages", confirmCount);
apply = AckReplyConsumer::ack;
committedWatermark.set(watermarkSupplier.getWatermark());
} else {
if (exc != null) {
log.warn("Error during processing of last bulk", exc);
} else {
log.info("Nacking last bulk by request");
}
apply = AckReplyConsumer::nack;
}
List list = unconfirmed.get();
for (int i = 0; i < confirmCount; i++) {
apply.accept(list.get(i));
}
globalOffset.addAndGet(confirmCount);
unconfirmed.set(Lists.newArrayList(list.subList(confirmCount, list.size())));
}
}
};
}
@Override
public ObserveHandle observeBulkPartitions(
@Nullable String name,
Collection partitions,
Position position,
boolean stopAtCurrent,
CommitLogObserver observer) {
name = findConsumerFromPartitions(name, partitions);
return observeBulkWithMinWatermark(name, position, stopAtCurrent, Long.MIN_VALUE, observer);
}
private ObserveHandle observeBulkWithMinWatermark(
@Nullable String name,
Position position,
boolean stopAtCurrent,
long minWatermark,
CommitLogObserver observer) {
validateNotStopAtCurrent(stopAtCurrent);
return observeBulk(name, position, false, minWatermark, observer);
}
@Override
public ObserveHandle observeBulkOffsets(
Collection offsets, boolean stopAtCurrent, CommitLogObserver observer) {
List names =
offsets.stream()
.map(o -> ((PubSubOffset) o).getConsumerName())
.distinct()
.collect(Collectors.toList());
Preconditions.checkArgument(
names.size() == 1, "Offsets should be reading same consumer, got %s", names);
String name = Iterables.getOnlyElement(names);
long watermark =
offsets.stream()
.mapToLong(o -> ((PubSubOffset) o).getWatermark())
.min()
.orElse(Long.MIN_VALUE);
return observeBulkWithMinWatermark(
asConsumerName(name), Position.NEWEST, stopAtCurrent, watermark, observer);
}
@VisibleForTesting
Subscriber newSubscriber(ProjectSubscriptionName subscription, MessageReceiver receiver) {
if (subscriptionAutoCreate) {
try (SubscriptionAdminClient client = SubscriptionAdminClient.create()) {
createSubscription(client, subscription);
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
return Subscriber.newBuilder(subscription, receiver)
.setMaxAckExtensionPeriod(Duration.ofMillis(maxAckDeadline))
.setFlowControlSettings(
FlowControlSettings.newBuilder()
.setLimitExceededBehavior(LimitExceededBehavior.Ignore)
.build())
.build();
}
WatermarkEstimator createWatermarkEstimator(long minWatermark) {
WatermarkEstimator res = watermarkFactory.create();
res.setMinWatermark(minWatermark);
return res;
}
private void createSubscription(
SubscriptionAdminClient client, ProjectSubscriptionName subscription) {
try {
ProjectTopicName topicName = ProjectTopicName.of(project, topic);
client.createSubscription(
subscription, topicName, PushConfig.newBuilder().build(), this.subscriptionAckDeadline);
log.info(
"Automatically creating subscription {} for topic {} with ackDeadline {}"
+ " as requested",
subscription,
topicName,
subscriptionAckDeadline);
} catch (AlreadyExistsException ex) {
Subscription subs = client.getSubscription(subscription);
if (!subs.getTopic().equals(ProjectTopicName.of(project, topic).toString())) {
throw new IllegalStateException(
"Existed subscription "
+ subscription.getSubscription()
+ " use topic "
+ subs.getTopic()
+ " which is different than configured "
+ ProjectTopicName.of(project, topic)
+ ".");
}
if (subs.getAckDeadlineSeconds() != this.subscriptionAckDeadline) {
client.updateSubscription(
UpdateSubscriptionRequest.newBuilder()
.setUpdateMask(FieldMask.newBuilder().addPaths("ack_deadline_seconds").build())
.setSubscription(
Subscription.newBuilder()
.setAckDeadlineSeconds(this.subscriptionAckDeadline)
.setName(subscription.toString())
.build())
.build());
log.info(
"Subscription ack deadline {} for subscription {} was different than "
+ "configured: {}. Subscription updated.",
subs.getAckDeadlineSeconds(),
subscription,
this.subscriptionAckDeadline);
} else {
log.debug("Subscription {} already exists. Skipping creation.", subscription);
}
}
}
private void validatePosition(Position position) {
if (position == Position.OLDEST) {
failUnsupported();
}
}
private void validateNotStopAtCurrent(boolean stopAtCurrent) {
if (stopAtCurrent) {
failUnsupported();
}
}
private void failUnsupported() {
throw new UnsupportedOperationException("PubSub can observe only current data.");
}
private String asConsumerName(String name) {
return name != null ? name : "unnamed-consumer-" + UUID.randomUUID().toString();
}
private ObserveHandle consume(
String consumerName,
PubSubConsumer consumer,
UnaryFunction errorHandler,
@Nullable Runnable onInit,
Runnable onRestart,
Runnable onCancel,
AtomicLong committedWatermark) {
ProjectSubscriptionName subscription = ProjectSubscriptionName.of(project, consumerName);
AtomicReference subscriber = new AtomicReference<>();
AtomicBoolean stopProcessing = new AtomicBoolean();
AtomicReference receiver = new AtomicReference<>();
WatermarkEstimator watermarkEstimator = createWatermarkEstimator(committedWatermark.get());
receiver.set(
createMessageReceiver(
subscription,
subscriber,
stopProcessing,
consumer,
watermarkEstimator,
errorHandler,
onRestart,
receiver));
subscriber.set(newSubscriber(subscription, receiver.get()));
subscriber.get().startAsync();
if (onInit != null) {
executor()
.submit(
() -> {
subscriber.get().awaitRunning();
if (onInit != null) {
onInit.run();
}
});
}
return new ObserveHandle() {
@Override
public void close() {
log.debug("Cancelling observer {}", consumerName);
stopProcessing.set(true);
Subscriber sub = stopAsync(subscriber);
if (sub != null) {
sub.awaitTerminated();
}
onCancel.run();
}
@Override
public List getCommittedOffsets() {
return Collections.singletonList(new PubSubOffset(consumerName, committedWatermark.get()));
}
@Override
public void resetOffsets(List offsets) {
// nop
}
@Override
public List getCurrentOffsets() {
return getCommittedOffsets();
}
@Override
public void waitUntilReady() {
subscriber.get().awaitRunning();
}
};
}
private MessageReceiver createMessageReceiver(
ProjectSubscriptionName subscription,
AtomicReference subscriber,
AtomicBoolean stopProcessing,
PubSubConsumer consumer,
WatermarkEstimator watermarkEstimator,
UnaryFunction errorHandler,
Runnable onRestart,
AtomicReference receiver) {
return (m, c) -> {
try {
log.trace("Received message {}", m);
if (stopProcessing.get()) {
log.debug("Returning rejected message {}", m);
c.nack();
return;
}
Optional elem =
PubSubUtils.toStreamElement(
getEntityDescriptor(), m.getMessageId(), m.getData().toByteArray());
if (elem.isPresent()) {
long current = watermarkEstimator.getWatermark();
watermarkEstimator.update(elem.get());
if (watermarkEstimator.getWatermark() < current) {
log.warn(
"Element {} is moving watermark backwards of {} ms. "
+ "If this happens too often, then it is likely you need to extend "
+ "ack deadline.",
elem.get(),
current - watermarkEstimator.getWatermark());
}
if (!consumer.consume(elem.get(), watermarkEstimator, c)) {
log.info("Terminating consumption by request.");
stopAsync(subscriber);
}
} else {
log.warn("Skipping unparseable element {}", m);
c.ack();
}
} catch (Throwable ex) {
log.error("Failed to consume element {}", m, ex);
if (Boolean.TRUE.equals(errorHandler.apply(ex))) {
log.info("Restarting consumption by request.");
stopAsync(subscriber).awaitTerminated();
onRestart.run();
subscriber.set(newSubscriber(subscription, receiver.get()));
subscriber.get().startAsync().awaitRunning();
} else {
log.info("Terminating consumption after error.");
stopAsync(subscriber);
}
}
};
}
Subscriber stopAsync(AtomicReference subscriber) {
return Optional.ofNullable(subscriber.getAndSet(null))
.map(
s -> {
log.info("Closing subscriber {}", s);
s.stopAsync();
return s;
})
.orElse(null);
}
ExecutorService executor() {
if (executor == null) {
executor = context.getExecutorService();
}
return executor;
}
@Override
public boolean hasExternalizableOffsets() {
// all offsets represent the same read position
return false;
}
@Override
public Factory> asFactory() {
final PubSubAccessor accessor = this.accessor;
final Context context = this.context;
return repo -> new PubSubReader(accessor, context);
}
private String findConsumerFromPartitions(String name, Collection partitions) {
if (name != null) {
return name;
}
Set names =
partitions.stream()
.map(p -> ((PubSubPartition) p).getConsumerName())
.collect(Collectors.toSet());
Preconditions.checkArgument(
names.size() == 1,
"Please provide partitions originating from single #split partition. Got %s",
partitions);
return Iterables.getOnlyElement(names);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy