io.streamnative.pulsar.handlers.kop.storage.ProducerStateManagerSnapshotBufferPartition Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pulsar-protocol-handler-kafka Show documentation
Show all versions of pulsar-protocol-handler-kafka Show documentation
Kafka on Pulsar implemented using Pulsar Protocol Handler
/**
* Copyright (c) 2019 - 2024 StreamNative, Inc.. All Rights Reserved.
*/
/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.streamnative.pulsar.handlers.kop.storage;
import com.google.common.annotations.VisibleForTesting;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufInputStream;
import io.netty.buffer.ByteBufOutputStream;
import io.netty.buffer.Unpooled;
import io.streamnative.pulsar.handlers.kop.SystemTopicClient;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.TreeMap;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executor;
import java.util.concurrent.TimeUnit;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.common.errors.NotLeaderOrFollowerException;
import org.apache.pulsar.client.api.Message;
import org.apache.pulsar.client.api.MessageId;
import org.apache.pulsar.client.api.Producer;
import org.apache.pulsar.client.api.Reader;
import org.apache.pulsar.common.util.FutureUtil;
@Slf4j
public class ProducerStateManagerSnapshotBufferPartition {
private static final long OP_TIMEOUT_NS = TimeUnit.SECONDS.toNanos(2);
private static final int MAX_RETRIES = 3;
private final Map latestSnapshots = new ConcurrentHashMap<>();
private final String topic;
private final SystemTopicClient pulsarClient;
private final Executor executor;
private CompletableFuture> reader;
private CompletableFuture> producer;
private CompletableFuture currentReadHandle;
private synchronized CompletableFuture> ensureReaderHandle() {
if (reader == null) {
reader = pulsarClient.newReaderBuilder()
.topic(topic)
.startMessageId(MessageId.earliest)
.readCompacted(true)
.createAsync();
}
return reader;
}
private synchronized CompletableFuture> ensureProducerHandle() {
if (producer == null) {
producer = pulsarClient.newProducerBuilder()
.enableBatching(false)
.topic(topic)
.blockIfQueueFull(true)
.createAsync();
}
return producer;
}
private CompletableFuture readNextMessageIfAvailable(Reader reader) {
return readNextMessageIfAvailableWithRetry(reader, MAX_RETRIES, 0);
}
protected CompletableFuture readNextMessageIfAvailableWithRetry(Reader reader,
int maxRetry,
int retryCnt) {
return reader
.hasMessageAvailableAsync()
.thenCompose(hasMessageAvailable -> {
if (hasMessageAvailable == null
|| !hasMessageAvailable) {
return CompletableFuture.completedFuture(null);
} else {
CompletableFuture> opMessage = reader.readNextAsync();
opMessage.completeOnTimeout(null, OP_TIMEOUT_NS, TimeUnit.NANOSECONDS);
return opMessage.thenComposeAsync(msg -> {
if (msg == null) {
if (retryCnt < maxRetry - 1) {
return readNextMessageIfAvailableWithRetry(reader, maxRetry, retryCnt + 1);
}
log.warn("Failed to read message after {} retries", maxRetry);
return CompletableFuture.completedFuture(null);
}
processMessage(msg);
return readNextMessageIfAvailableWithRetry(reader, maxRetry, 0);
}, executor);
}
}).exceptionally(err -> {
log.error("[{}] Error reading message", topic, err);
return null;
});
}
private synchronized CompletableFuture ensureLatestData(boolean beforeWrite) {
if (currentReadHandle != null) {
if (beforeWrite) {
// we are inside a write loop, so
// we must ensure that we start to read now
// otherwise the write would use non up-to-date data
// so let's finish the current loop
if (log.isDebugEnabled()) {
log.debug("A read was already pending, starting a new one in order to ensure consistency");
}
return currentReadHandle
.thenCompose(___ -> ensureLatestData(false));
}
// if there is an ongoing read operation then complete it
return currentReadHandle;
}
// please note that the read operation is async,
// and it is not execute inside this synchronized block
CompletableFuture> readerHandle = ensureReaderHandle();
final CompletableFuture newReadHandle =
readerHandle.thenCompose(this::readNextMessageIfAvailable);
currentReadHandle = newReadHandle;
return newReadHandle.thenApply((__) -> {
endReadLoop(newReadHandle);
return null;
});
}
private synchronized void endReadLoop(CompletableFuture> handle) {
if (handle == currentReadHandle) {
currentReadHandle = null;
}
}
public CompletableFuture write(ProducerStateManagerSnapshot snapshot) {
ByteBuffer serialized = serialize(snapshot);
if (serialized == null) {
// cannot serialise, skip
return CompletableFuture.completedFuture(null);
}
return ensureProducerHandle().thenCompose(opProducer -> {
// nobody can write now to the topic
// wait for local cache to be up-to-date
return ensureLatestData(true)
.thenCompose((___) -> {
ProducerStateManagerSnapshot latest = latestSnapshots.get(snapshot.topicPartition());
if (latest != null && latest.offset() > snapshot.offset()) {
log.error("Topic ownership changed for {}. Found a snapshot at {} "
+ "while trying to write the snapshot at {}", snapshot.topicPartition(),
latest.offset(), snapshot.offset());
return FutureUtil.failedFuture(new NotLeaderOrFollowerException("No more owner of "
+ "ProducerState for topic " + topic));
}
return opProducer
.newMessage()
.key(snapshot.topicPartition()) // leverage compaction
.value(serialized)
.sendAsync()
.thenApply((msgId) -> {
if (log.isDebugEnabled()) {
log.debug("{} written {} as {}", this, snapshot, msgId);
}
latestSnapshots.put(snapshot.topicPartition(), snapshot);
return null;
});
});
});
}
protected static ByteBuffer serialize(ProducerStateManagerSnapshot snapshot) {
ByteBuf byteBuf = Unpooled.buffer();
try (DataOutputStream dataOutputStream =
new DataOutputStream(new ByteBufOutputStream(byteBuf));) {
dataOutputStream.writeUTF(snapshot.topicPartition());
if (snapshot.topicUUID() != null) {
dataOutputStream.writeUTF(snapshot.topicUUID());
} else {
// topics created from Pulsar don't have the UUID
dataOutputStream.writeUTF("");
}
dataOutputStream.writeLong(snapshot.offset());
dataOutputStream.writeInt(snapshot.producers().size());
for (Map.Entry entry : snapshot.producers().entrySet()) {
ProducerStateEntry producer = entry.getValue();
dataOutputStream.writeLong(producer.producerId);
if (producer.producerEpoch != null) {
dataOutputStream.writeInt(producer.producerEpoch);
} else {
dataOutputStream.writeInt(-1);
}
if (producer.coordinatorEpoch != null) {
dataOutputStream.writeInt(producer.coordinatorEpoch);
} else {
dataOutputStream.writeInt(-1);
}
if (producer.lastTimestamp != null) {
dataOutputStream.writeLong(producer.lastTimestamp);
} else {
dataOutputStream.writeLong(-1L);
}
if (producer.currentTxnFirstOffset.isPresent()) {
dataOutputStream.writeLong(producer.currentTxnFirstOffset.get());
} else {
dataOutputStream.writeLong(-1);
}
}
dataOutputStream.writeInt(snapshot.ongoingTxns().size());
for (Map.Entry entry : snapshot.ongoingTxns().entrySet()) {
TxnMetadata tx = entry.getValue();
dataOutputStream.writeLong(tx.producerId);
dataOutputStream.writeLong(tx.firstOffset);
dataOutputStream.writeLong(tx.lastOffset);
}
dataOutputStream.writeInt(snapshot.abortedIndexList().size());
for (AbortedTxn tx : snapshot.abortedIndexList()) {
dataOutputStream.writeLong(tx.producerId());
dataOutputStream.writeLong(tx.firstOffset());
dataOutputStream.writeLong(tx.lastOffset());
dataOutputStream.writeLong(tx.lastStableOffset());
}
dataOutputStream.flush();
return byteBuf.nioBuffer();
} catch (IOException err) {
log.error("Cannot serialise snapshot {}", snapshot, err);
return null;
}
}
public static ProducerStateManagerSnapshot deserialize(ByteBuffer buffer) {
try (DataInputStream dataInputStream =
new DataInputStream(new ByteBufInputStream(Unpooled.wrappedBuffer(buffer)));) {
String topicPartition = dataInputStream.readUTF();
String topicUUID = dataInputStream.readUTF();
if (topicUUID.isEmpty()) {
topicUUID = null;
}
long offset = dataInputStream.readLong();
int numProducers = dataInputStream.readInt();
Map producers = new HashMap<>();
for (int i = 0; i < numProducers; i++) {
long producerId = dataInputStream.readLong();
Integer producerEpoch = dataInputStream.readInt();
if (producerEpoch == -1) {
producerEpoch = null;
}
Integer coordinatorEpoch = dataInputStream.readInt();
if (coordinatorEpoch == -1) {
coordinatorEpoch = null;
}
Long lastTimestamp = dataInputStream.readLong();
if (lastTimestamp == -1) {
lastTimestamp = null;
}
Long currentTxFirstOffset = dataInputStream.readLong();
if (currentTxFirstOffset == -1) {
currentTxFirstOffset = null;
}
ProducerStateEntry entry = new ProducerStateEntry(producerId,
(producerEpoch != null) ? producerEpoch.shortValue() : null, coordinatorEpoch, lastTimestamp,
Optional.ofNullable(currentTxFirstOffset));
producers.put(producerId, entry);
}
int numOngoingTxns = dataInputStream.readInt();
TreeMap ongoingTxns = new TreeMap<>();
for (int i = 0; i < numOngoingTxns; i++) {
long producerId = dataInputStream.readLong();
long firstOffset = dataInputStream.readLong();
long lastOffset = dataInputStream.readLong();
final var txnMetadata = new TxnMetadata(producerId, firstOffset, lastOffset, null);
ongoingTxns.put(firstOffset, txnMetadata);
}
int numAbortedIndexList = dataInputStream.readInt();
List abortedTxnList = new ArrayList<>();
for (int i = 0; i < numAbortedIndexList; i++) {
long producerId = dataInputStream.readLong();
long firstOffset = dataInputStream.readLong();
long lastOffset = dataInputStream.readLong();
long lastStableOffset = dataInputStream.readLong();
abortedTxnList.add(new AbortedTxn(producerId, firstOffset, lastOffset, lastStableOffset));
}
return new ProducerStateManagerSnapshot(topicPartition, topicUUID, offset,
producers, ongoingTxns, abortedTxnList);
} catch (Throwable err) {
log.error("Cannot deserialize snapshot", err);
return null;
}
}
@VisibleForTesting
protected void processMessage(Message msg) {
if (msg.getValue() == null) {
return;
}
ProducerStateManagerSnapshot deserialize = deserialize(msg.getValue());
if (deserialize != null) {
String key = msg.hasKey() ? msg.getKey() : null;
if (Objects.equals(key, deserialize.topicPartition())) {
if (log.isDebugEnabled()) {
log.debug("found snapshot for {} ({}): {}",
deserialize.topicPartition(),
deserialize.topicUUID(),
deserialize);
}
latestSnapshots.put(deserialize.topicPartition(), deserialize);
}
}
}
public CompletableFuture readLatestSnapshot(String topicPartition) {
if (log.isDebugEnabled()) {
log.debug("Reading latest snapshot for {}", topicPartition);
}
return ensureLatestData(false).thenApply(__ -> {
ProducerStateManagerSnapshot result = latestSnapshots.get(topicPartition);
log.info("Latest snapshot for {} is {}", topicPartition, result);
return result;
});
}
public ProducerStateManagerSnapshotBufferPartition(String topicName,
SystemTopicClient pulsarClient,
Executor executor) {
this.topic = topicName;
this.pulsarClient = pulsarClient;
this.executor = executor;
}
public synchronized void shutdown() {
if (reader != null) {
reader.whenComplete((r, e) -> {
if (r != null) {
r.closeAsync().whenComplete((___, err) -> {
if (err != null) {
log.error("Error closing reader for {}", topic, err);
}
});
}
});
}
if (producer != null) {
producer.whenComplete((r, e) -> {
if (r != null) {
r.closeAsync().whenComplete((___, err) -> {
if (err != null) {
log.error("Error closing producer for {}", topic, err);
}
});
}
});
}
}
@Override
public String toString() {
return "PulsarTopicProducerStateManagerSnapshotBuffer{" + topic + '}';
}
}