All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pulsar.compaction.TwoPhaseCompactor Maven / Gradle / Ivy

There is a newer version: 4.0.0.10
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.pulsar.compaction;

import com.google.common.collect.ImmutableMap;
import io.netty.buffer.ByteBuf;
import java.io.IOException;
import java.time.Duration;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import org.apache.bookkeeper.client.BKException;
import org.apache.bookkeeper.client.BookKeeper;
import org.apache.bookkeeper.client.LedgerHandle;
import org.apache.bookkeeper.mledger.impl.LedgerMetadataUtils;
import org.apache.commons.lang3.tuple.ImmutableTriple;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.pulsar.broker.ServiceConfiguration;
import org.apache.pulsar.client.api.MessageId;
import org.apache.pulsar.client.api.PulsarClient;
import org.apache.pulsar.client.api.RawMessage;
import org.apache.pulsar.client.api.RawReader;
import org.apache.pulsar.client.impl.MessageIdImpl;
import org.apache.pulsar.client.impl.RawBatchConverter;
import org.apache.pulsar.common.api.proto.MessageMetadata;
import org.apache.pulsar.common.protocol.Commands;
import org.apache.pulsar.common.util.FutureUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Compaction will go through the topic in two passes. The first pass
 * selects latest offset for each key in the topic. Then the second pass
 * writes these values to a ledger.
 *
 * 

The two passes are required to avoid holding the payloads of each of * the latest values in memory, as the payload can be many orders of * magnitude larger than a message id. */ public class TwoPhaseCompactor extends Compactor { private static final Logger log = LoggerFactory.getLogger(TwoPhaseCompactor.class); private static final int MAX_OUTSTANDING = 500; private static final String COMPACTED_TOPIC_LEDGER_PROPERTY = "CompactedTopicLedger"; private final Duration phaseOneLoopReadTimeout; public TwoPhaseCompactor(ServiceConfiguration conf, PulsarClient pulsar, BookKeeper bk, ScheduledExecutorService scheduler) { super(conf, pulsar, bk, scheduler); phaseOneLoopReadTimeout = Duration.ofSeconds(conf.getBrokerServiceCompactionPhaseOneLoopTimeInSeconds()); } @Override protected CompletableFuture doCompaction(RawReader reader, BookKeeper bk) { return reader.hasMessageAvailableAsync() .thenCompose(available -> { if (available) { return phaseOne(reader).thenCompose( (r) -> phaseTwo(reader, r.from, r.to, r.lastReadId, r.latestForKey, bk)); } else { log.info("Skip compaction of the empty topic {}", reader.getTopic()); return CompletableFuture.completedFuture(-1L); } }); } private CompletableFuture phaseOne(RawReader reader) { Map latestForKey = new HashMap<>(); CompletableFuture loopPromise = new CompletableFuture<>(); reader.getLastMessageIdAsync() .thenAccept(lastMessageId -> { log.info("Commencing phase one of compaction for {}, reading to {}", reader.getTopic(), lastMessageId); // Each entry is processed as a whole, discard the batchIndex part deliberately. MessageIdImpl lastImpl = (MessageIdImpl) lastMessageId; MessageIdImpl lastEntryMessageId = new MessageIdImpl(lastImpl.getLedgerId(), lastImpl.getEntryId(), lastImpl.getPartitionIndex()); phaseOneLoop(reader, Optional.empty(), Optional.empty(), lastEntryMessageId, latestForKey, loopPromise); }).exceptionally(ex -> { loopPromise.completeExceptionally(ex); return null; }); return loopPromise; } private void phaseOneLoop(RawReader reader, Optional firstMessageId, Optional toMessageId, MessageId lastMessageId, Map latestForKey, CompletableFuture loopPromise) { if (loopPromise.isDone()) { return; } CompletableFuture future = reader.readNextAsync(); FutureUtil.addTimeoutHandling(future, phaseOneLoopReadTimeout, scheduler, () -> FutureUtil.createTimeoutException("Timeout", getClass(), "phaseOneLoop(...)")); future.thenAcceptAsync(m -> { try { MessageId id = m.getMessageId(); boolean deletedMessage = false; boolean replaceMessage = false; mxBean.addCompactionReadOp(reader.getTopic(), m.getHeadersAndPayload().readableBytes()); MessageMetadata metadata = Commands.parseMessageMetadata(m.getHeadersAndPayload()); if (RawBatchConverter.isReadableBatch(metadata)) { try { int numMessagesInBatch = metadata.getNumMessagesInBatch(); int deleteCnt = 0; for (ImmutableTriple e : RawBatchConverter .extractIdsAndKeysAndSize(m, false)) { if (e != null) { if (e.getRight() > 0) { MessageId old = latestForKey.put(e.getMiddle(), e.getLeft()); if (old != null) { mxBean.addCompactionRemovedEvent(reader.getTopic()); } } else { latestForKey.remove(e.getMiddle()); deleteCnt++; mxBean.addCompactionRemovedEvent(reader.getTopic()); } } } if (deleteCnt == numMessagesInBatch) { deletedMessage = true; } } catch (IOException ioe) { log.info("Error decoding batch for message {}. Whole batch will be included in output", id, ioe); } } else { Pair keyAndSize = extractKeyAndSize(m); if (keyAndSize != null) { if (keyAndSize.getRight() > 0) { MessageId old = latestForKey.put(keyAndSize.getLeft(), id); replaceMessage = old != null; } else { deletedMessage = true; latestForKey.remove(keyAndSize.getLeft()); } } if (replaceMessage || deletedMessage) { mxBean.addCompactionRemovedEvent(reader.getTopic()); } } MessageId first = firstMessageId.orElse(deletedMessage ? null : id); MessageId to = deletedMessage ? toMessageId.orElse(null) : id; if (id.compareTo(lastMessageId) == 0) { loopPromise.complete(new PhaseOneResult(first == null ? id : first, to == null ? id : to, lastMessageId, latestForKey)); } else { phaseOneLoop(reader, Optional.ofNullable(first), Optional.ofNullable(to), lastMessageId, latestForKey, loopPromise); } } finally { m.close(); } }, scheduler).exceptionally(ex -> { loopPromise.completeExceptionally(ex); return null; }); } private CompletableFuture phaseTwo(RawReader reader, MessageId from, MessageId to, MessageId lastReadId, Map latestForKey, BookKeeper bk) { Map metadata = LedgerMetadataUtils.buildMetadataForCompactedLedger(reader.getTopic(), to.toByteArray()); return createLedger(bk, metadata).thenCompose((ledger) -> { log.info("Commencing phase two of compaction for {}, from {} to {}, compacting {} keys to ledger {}", reader.getTopic(), from, to, latestForKey.size(), ledger.getId()); return phaseTwoSeekThenLoop(reader, from, to, lastReadId, latestForKey, bk, ledger); }); } private CompletableFuture phaseTwoSeekThenLoop(RawReader reader, MessageId from, MessageId to, MessageId lastReadId, Map latestForKey, BookKeeper bk, LedgerHandle ledger) { CompletableFuture promise = new CompletableFuture<>(); reader.seekAsync(from).thenCompose((v) -> { Semaphore outstanding = new Semaphore(MAX_OUTSTANDING); CompletableFuture loopPromise = new CompletableFuture(); phaseTwoLoop(reader, to, latestForKey, ledger, outstanding, loopPromise, MessageId.earliest); return loopPromise; }).thenCompose((v) -> closeLedger(ledger)) .thenCompose((v) -> reader.acknowledgeCumulativeAsync(lastReadId, ImmutableMap.of(COMPACTED_TOPIC_LEDGER_PROPERTY, ledger.getId()))) .whenComplete((res, exception) -> { if (exception != null) { deleteLedger(bk, ledger).whenComplete((res2, exception2) -> { if (exception2 != null) { log.warn("Cleanup of ledger {} for failed", ledger, exception2); } // complete with original exception promise.completeExceptionally(exception); }); } else { promise.complete(ledger.getId()); } }); return promise; } private void phaseTwoLoop(RawReader reader, MessageId to, Map latestForKey, LedgerHandle lh, Semaphore outstanding, CompletableFuture promise, MessageId lastCompactedMessageId) { if (promise.isDone()) { return; } reader.readNextAsync().thenAcceptAsync(m -> { if (promise.isDone()) { m.close(); return; } if (m.getMessageId().compareTo(lastCompactedMessageId) <= 0) { phaseTwoLoop(reader, to, latestForKey, lh, outstanding, promise, lastCompactedMessageId); return; } try { MessageId id = m.getMessageId(); Optional messageToAdd = Optional.empty(); mxBean.addCompactionReadOp(reader.getTopic(), m.getHeadersAndPayload().readableBytes()); if (RawBatchConverter.isReadableBatch(m)) { try { messageToAdd = RawBatchConverter.rebatchMessage( m, (key, subid) -> subid.equals(latestForKey.get(key))); } catch (IOException ioe) { log.info("Error decoding batch for message {}. Whole batch will be included in output", id, ioe); messageToAdd = Optional.of(m); } } else { Pair keyAndSize = extractKeyAndSize(m); MessageId msg; if (keyAndSize == null) { // pass through messages without a key messageToAdd = Optional.of(m); } else if ((msg = latestForKey.get(keyAndSize.getLeft())) != null && msg.equals(id)) { // consider message only if present into latestForKey map if (keyAndSize.getRight() <= 0) { promise.completeExceptionally(new IllegalArgumentException( "Compaction phase found empty record from sorted key-map")); } messageToAdd = Optional.of(m); } } if (messageToAdd.isPresent()) { RawMessage message = messageToAdd.get(); try { outstanding.acquire(); CompletableFuture addFuture = addToCompactedLedger(lh, message, reader.getTopic()) .whenComplete((res, exception2) -> { outstanding.release(); if (exception2 != null) { promise.completeExceptionally(exception2); } }); if (to.equals(id)) { // make sure all inflight writes have finished outstanding.acquire(MAX_OUTSTANDING); addFuture.whenComplete((res, exception2) -> { if (exception2 == null) { promise.complete(null); } }); return; } } catch (InterruptedException ie) { Thread.currentThread().interrupt(); promise.completeExceptionally(ie); } finally { if (message != m) { message.close(); } } } else if (to.equals(id)) { // Reached to last-id and phase-one found it deleted-message while iterating on ledger so, // not present under latestForKey. Complete the compaction. try { // make sure all inflight writes have finished outstanding.acquire(MAX_OUTSTANDING); promise.complete(null); } catch (InterruptedException e) { Thread.currentThread().interrupt(); promise.completeExceptionally(e); } return; } phaseTwoLoop(reader, to, latestForKey, lh, outstanding, promise, m.getMessageId()); } finally { m.close(); } }, scheduler).exceptionally(ex -> { promise.completeExceptionally(ex); return null; }); } private CompletableFuture createLedger(BookKeeper bk, Map metadata) { CompletableFuture bkf = new CompletableFuture<>(); try { bk.asyncCreateLedger(conf.getManagedLedgerDefaultEnsembleSize(), conf.getManagedLedgerDefaultWriteQuorum(), conf.getManagedLedgerDefaultAckQuorum(), Compactor.COMPACTED_TOPIC_LEDGER_DIGEST_TYPE, Compactor.COMPACTED_TOPIC_LEDGER_PASSWORD, (rc, ledger, ctx) -> { if (rc != BKException.Code.OK) { bkf.completeExceptionally(BKException.create(rc)); } else { bkf.complete(ledger); } }, null, metadata); } catch (Throwable t) { log.error("Encountered unexpected error when creating compaction ledger", t); return FutureUtil.failedFuture(t); } return bkf; } private CompletableFuture deleteLedger(BookKeeper bk, LedgerHandle lh) { CompletableFuture bkf = new CompletableFuture<>(); try { bk.asyncDeleteLedger(lh.getId(), (rc, ctx) -> { if (rc != BKException.Code.OK) { bkf.completeExceptionally(BKException.create(rc)); } else { bkf.complete(null); } }, null); } catch (Throwable t) { return FutureUtil.failedFuture(t); } return bkf; } private CompletableFuture closeLedger(LedgerHandle lh) { CompletableFuture bkf = new CompletableFuture<>(); try { lh.asyncClose((rc, ledger, ctx) -> { if (rc != BKException.Code.OK) { bkf.completeExceptionally(BKException.create(rc)); } else { bkf.complete(null); } }, null); } catch (Throwable t) { return FutureUtil.failedFuture(t); } return bkf; } private CompletableFuture addToCompactedLedger(LedgerHandle lh, RawMessage m, String topic) { CompletableFuture bkf = new CompletableFuture<>(); ByteBuf serialized = m.serialize(); try { mxBean.addCompactionWriteOp(topic, m.getHeadersAndPayload().readableBytes()); long start = System.nanoTime(); lh.asyncAddEntry(serialized, (rc, ledger, eid, ctx) -> { mxBean.addCompactionLatencyOp(topic, System.nanoTime() - start, TimeUnit.NANOSECONDS); if (rc != BKException.Code.OK) { bkf.completeExceptionally(BKException.create(rc)); } else { bkf.complete(null); } }, null); } catch (Throwable t) { return FutureUtil.failedFuture(t); } return bkf; } private static Pair extractKeyAndSize(RawMessage m) { ByteBuf headersAndPayload = m.getHeadersAndPayload(); MessageMetadata msgMetadata = Commands.parseMessageMetadata(headersAndPayload); if (msgMetadata.hasPartitionKey()) { int size = headersAndPayload.readableBytes(); if (msgMetadata.hasUncompressedSize()) { size = msgMetadata.getUncompressedSize(); } return Pair.of(msgMetadata.getPartitionKey(), size); } else { return null; } } private static class PhaseOneResult { final MessageId from; final MessageId to; // last undeleted messageId final MessageId lastReadId; // last read messageId final Map latestForKey; PhaseOneResult(MessageId from, MessageId to, MessageId lastReadId, Map latestForKey) { this.from = from; this.to = to; this.lastReadId = lastReadId; this.latestForKey = latestForKey; } } public long getPhaseOneLoopReadTimeoutInSeconds() { return phaseOneLoopReadTimeout.getSeconds(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy