All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.streamnative.pulsar.handlers.kop.storage.ProducerStateManagerSnapshotBufferPartition Maven / Gradle / Ivy

There is a newer version: 4.0.0.4
Show newest version
/**
 * Copyright (c) 2019 - 2024 StreamNative, Inc.. All Rights Reserved.
 */
/**
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.streamnative.pulsar.handlers.kop.storage;

import com.google.common.annotations.VisibleForTesting;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufInputStream;
import io.netty.buffer.ByteBufOutputStream;
import io.netty.buffer.Unpooled;
import io.streamnative.pulsar.handlers.kop.SystemTopicClient;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.TreeMap;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executor;
import java.util.concurrent.TimeUnit;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.common.errors.NotLeaderOrFollowerException;
import org.apache.pulsar.client.api.Message;
import org.apache.pulsar.client.api.MessageId;
import org.apache.pulsar.client.api.Producer;
import org.apache.pulsar.client.api.Reader;
import org.apache.pulsar.common.util.FutureUtil;

@Slf4j
public class ProducerStateManagerSnapshotBufferPartition {

    private static final long OP_TIMEOUT_NS = TimeUnit.SECONDS.toNanos(2);
    private static final int MAX_RETRIES = 3;
    private final Map latestSnapshots = new ConcurrentHashMap<>();
    private final String topic;
    private final SystemTopicClient pulsarClient;
    private final Executor executor;
    private CompletableFuture> reader;

    private CompletableFuture> producer;

    private CompletableFuture currentReadHandle;

    private synchronized CompletableFuture> ensureReaderHandle() {
        if (reader == null) {
            reader = pulsarClient.newReaderBuilder()
                    .topic(topic)
                    .startMessageId(MessageId.earliest)
                    .readCompacted(true)
                    .createAsync();
        }
        return reader;
    }

    private synchronized CompletableFuture> ensureProducerHandle() {
        if (producer == null) {
            producer = pulsarClient.newProducerBuilder()
                    .enableBatching(false)
                    .topic(topic)
                    .blockIfQueueFull(true)
                    .createAsync();
        }
        return producer;
    }

    private CompletableFuture readNextMessageIfAvailable(Reader reader) {
        return readNextMessageIfAvailableWithRetry(reader, MAX_RETRIES, 0);
    }

    protected CompletableFuture readNextMessageIfAvailableWithRetry(Reader reader,
                                                                        int maxRetry,
                                                                        int retryCnt) {
        return reader
                .hasMessageAvailableAsync()
                .thenCompose(hasMessageAvailable -> {
                    if (hasMessageAvailable == null
                            || !hasMessageAvailable) {
                        return CompletableFuture.completedFuture(null);
                    } else {
                        CompletableFuture> opMessage = reader.readNextAsync();
                        opMessage.completeOnTimeout(null, OP_TIMEOUT_NS, TimeUnit.NANOSECONDS);
                        return opMessage.thenComposeAsync(msg -> {
                            if (msg == null) {
                                if (retryCnt < maxRetry - 1) {
                                    return readNextMessageIfAvailableWithRetry(reader, maxRetry, retryCnt + 1);
                                }
                                log.warn("Failed to read message after {} retries", maxRetry);
                                return CompletableFuture.completedFuture(null);
                            }
                            processMessage(msg);
                            return readNextMessageIfAvailableWithRetry(reader, maxRetry, 0);
                        }, executor);
                    }
                }).exceptionally(err -> {
                    log.error("[{}] Error reading message", topic, err);
                    return null;
                });
    }


    private synchronized CompletableFuture ensureLatestData(boolean beforeWrite) {
        if (currentReadHandle != null) {
            if (beforeWrite) {
                // we are inside a write loop, so
                // we must ensure that we start to read now
                // otherwise the write would use non up-to-date data
                // so let's finish the current loop
                if (log.isDebugEnabled()) {
                    log.debug("A read was already pending, starting a new one in order to ensure consistency");
                }
                return currentReadHandle
                        .thenCompose(___ -> ensureLatestData(false));
            }
            // if there is an ongoing read operation then complete it
            return currentReadHandle;
        }
        // please note that the read operation is async,
        // and it is not execute inside this synchronized block
        CompletableFuture> readerHandle = ensureReaderHandle();
        final CompletableFuture newReadHandle =
                readerHandle.thenCompose(this::readNextMessageIfAvailable);
        currentReadHandle = newReadHandle;
        return newReadHandle.thenApply((__) -> {
            endReadLoop(newReadHandle);
            return null;
        });
    }

    private synchronized void endReadLoop(CompletableFuture handle) {
        if (handle == currentReadHandle) {
            currentReadHandle = null;
        }
    }

    public CompletableFuture write(ProducerStateManagerSnapshot snapshot) {
        ByteBuffer serialized = serialize(snapshot);
        if (serialized == null) {
            // cannot serialise, skip
            return CompletableFuture.completedFuture(null);
        }
        return ensureProducerHandle().thenCompose(opProducer -> {
            // nobody can write now to the topic
            // wait for local cache to be up-to-date
            return ensureLatestData(true)
                    .thenCompose((___) -> {
                        ProducerStateManagerSnapshot latest = latestSnapshots.get(snapshot.topicPartition());
                        if (latest != null && latest.offset() > snapshot.offset()) {
                            log.error("Topic ownership changed for {}. Found a snapshot at {} "
                                    + "while trying to write the snapshot at {}", snapshot.topicPartition(),
                                    latest.offset(), snapshot.offset());
                            return FutureUtil.failedFuture(new NotLeaderOrFollowerException("No more owner of "
                                    + "ProducerState for topic " + topic));
                        }
                        return opProducer
                                .newMessage()
                                .key(snapshot.topicPartition()) // leverage compaction
                                .value(serialized)
                                .sendAsync()
                                .thenApply((msgId) -> {
                                    if (log.isDebugEnabled()) {
                                        log.debug("{} written {} as {}", this, snapshot, msgId);
                                    }
                                    latestSnapshots.put(snapshot.topicPartition(), snapshot);
                                    return null;
                                });
                    });
        });
    }

    protected static ByteBuffer serialize(ProducerStateManagerSnapshot snapshot) {

        ByteBuf byteBuf = Unpooled.buffer();
        try (DataOutputStream dataOutputStream =
                     new DataOutputStream(new ByteBufOutputStream(byteBuf));) {

            dataOutputStream.writeUTF(snapshot.topicPartition());
            if (snapshot.topicUUID() != null) {
                dataOutputStream.writeUTF(snapshot.topicUUID());
            } else {
                // topics created from Pulsar don't have the UUID
                dataOutputStream.writeUTF("");
            }
            dataOutputStream.writeLong(snapshot.offset());

            dataOutputStream.writeInt(snapshot.producers().size());
            for (Map.Entry entry : snapshot.producers().entrySet()) {
                ProducerStateEntry producer = entry.getValue();
                dataOutputStream.writeLong(producer.producerId);
                if (producer.producerEpoch != null) {
                    dataOutputStream.writeInt(producer.producerEpoch);
                } else {
                    dataOutputStream.writeInt(-1);
                }
                if (producer.coordinatorEpoch != null) {
                    dataOutputStream.writeInt(producer.coordinatorEpoch);
                } else {
                    dataOutputStream.writeInt(-1);
                }
                if (producer.lastTimestamp != null) {
                    dataOutputStream.writeLong(producer.lastTimestamp);
                } else {
                    dataOutputStream.writeLong(-1L);
                }
                if (producer.currentTxnFirstOffset.isPresent()) {
                    dataOutputStream.writeLong(producer.currentTxnFirstOffset.get());
                } else {
                    dataOutputStream.writeLong(-1);
                }
            }

            dataOutputStream.writeInt(snapshot.ongoingTxns().size());
            for (Map.Entry entry : snapshot.ongoingTxns().entrySet()) {
                TxnMetadata tx = entry.getValue();
                dataOutputStream.writeLong(tx.producerId);
                dataOutputStream.writeLong(tx.firstOffset);
                dataOutputStream.writeLong(tx.lastOffset);
            }

            dataOutputStream.writeInt(snapshot.abortedIndexList().size());
            for (AbortedTxn tx : snapshot.abortedIndexList()) {
                dataOutputStream.writeLong(tx.producerId());
                dataOutputStream.writeLong(tx.firstOffset());
                dataOutputStream.writeLong(tx.lastOffset());
                dataOutputStream.writeLong(tx.lastStableOffset());
            }

            dataOutputStream.flush();

            return byteBuf.nioBuffer();

        } catch (IOException err) {
            log.error("Cannot serialise snapshot {}", snapshot, err);
            return null;
        }
    }

    public static ProducerStateManagerSnapshot deserialize(ByteBuffer buffer) {
        try (DataInputStream dataInputStream =
                     new DataInputStream(new ByteBufInputStream(Unpooled.wrappedBuffer(buffer)));) {
            String topicPartition = dataInputStream.readUTF();
            String topicUUID = dataInputStream.readUTF();
            if (topicUUID.isEmpty()) {
                topicUUID = null;
            }
            long offset = dataInputStream.readLong();

            int numProducers = dataInputStream.readInt();
            Map producers = new HashMap<>();
            for (int i = 0; i < numProducers; i++) {
                long producerId = dataInputStream.readLong();
                Integer producerEpoch = dataInputStream.readInt();
                if (producerEpoch == -1) {
                    producerEpoch = null;
                }
                Integer coordinatorEpoch = dataInputStream.readInt();
                if (coordinatorEpoch == -1) {
                    coordinatorEpoch = null;
                }
                Long lastTimestamp = dataInputStream.readLong();
                if (lastTimestamp == -1) {
                    lastTimestamp = null;
                }
                Long currentTxFirstOffset = dataInputStream.readLong();
                if (currentTxFirstOffset == -1) {
                    currentTxFirstOffset = null;
                }
                ProducerStateEntry entry = new ProducerStateEntry(producerId,
                    (producerEpoch != null) ? producerEpoch.shortValue() : null, coordinatorEpoch, lastTimestamp,
                    Optional.ofNullable(currentTxFirstOffset));
                producers.put(producerId, entry);
            }

            int numOngoingTxns = dataInputStream.readInt();
            TreeMap ongoingTxns = new TreeMap<>();
            for (int i = 0; i < numOngoingTxns; i++) {
                long producerId = dataInputStream.readLong();
                long firstOffset = dataInputStream.readLong();
                long lastOffset = dataInputStream.readLong();
                final var txnMetadata = new TxnMetadata(producerId, firstOffset, lastOffset, null);
                ongoingTxns.put(firstOffset, txnMetadata);
            }

            int numAbortedIndexList = dataInputStream.readInt();
            List abortedTxnList = new ArrayList<>();
            for (int i = 0; i < numAbortedIndexList; i++) {
                long producerId = dataInputStream.readLong();
                long firstOffset = dataInputStream.readLong();
                long lastOffset = dataInputStream.readLong();
                long lastStableOffset = dataInputStream.readLong();
                abortedTxnList.add(new AbortedTxn(producerId, firstOffset, lastOffset, lastStableOffset));
            }

            return new ProducerStateManagerSnapshot(topicPartition, topicUUID, offset,
                    producers, ongoingTxns, abortedTxnList);

        } catch (Throwable err) {
            log.error("Cannot deserialize snapshot", err);
            return null;
        }
    }

    @VisibleForTesting
    protected void processMessage(Message msg) {
        if (msg.getValue() == null) {
            return;
        }
        ProducerStateManagerSnapshot deserialize = deserialize(msg.getValue());
        if (deserialize != null) {
            String key = msg.hasKey() ? msg.getKey() : null;
            if (Objects.equals(key, deserialize.topicPartition())) {
                if (log.isDebugEnabled()) {
                    log.debug("found snapshot for {} ({}): {}",
                            deserialize.topicPartition(),
                            deserialize.topicUUID(),
                            deserialize);
                }
                latestSnapshots.put(deserialize.topicPartition(), deserialize);
            }
        }
    }

    public CompletableFuture readLatestSnapshot(String topicPartition) {
        if (log.isDebugEnabled()) {
            log.debug("Reading latest snapshot for {}", topicPartition);
        }
        return ensureLatestData(false).thenApply(__ -> {
            ProducerStateManagerSnapshot result =  latestSnapshots.get(topicPartition);
            log.info("Latest snapshot for {} is {}", topicPartition, result);
            return result;
        });
    }

    public ProducerStateManagerSnapshotBufferPartition(String topicName,
                                                       SystemTopicClient pulsarClient,
                                                       Executor executor) {
        this.topic = topicName;
        this.pulsarClient = pulsarClient;
        this.executor = executor;
    }


    public synchronized void shutdown() {
        if (reader != null) {
            reader.whenComplete((r, e) -> {
                if (r != null) {
                    r.closeAsync().whenComplete((___, err) -> {
                        if (err != null) {
                            log.error("Error closing reader for {}", topic, err);
                        }
                    });
                }
            });
        }
        if (producer != null) {
            producer.whenComplete((r, e) -> {
                if (r != null) {
                    r.closeAsync().whenComplete((___, err) -> {
                        if (err != null) {
                            log.error("Error closing producer for {}", topic, err);
                        }
                    });
                }
            });
        }
    }

    @Override
    public String toString() {
        return "PulsarTopicProducerStateManagerSnapshotBuffer{" + topic + '}';
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy