All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.streamnative.pulsar.handlers.kop.format.PulsarEntryFormatter Maven / Gradle / Ivy

There is a newer version: 4.0.0.4
Show newest version
/**
 * Copyright (c) 2019 - 2024 StreamNative, Inc.. All Rights Reserved.
 */
/**
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.streamnative.pulsar.handlers.kop.format;

import static io.streamnative.pulsar.handlers.kop.utils.KopLogValidator.validateKey;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.collect.Lists;
import io.netty.buffer.ByteBuf;
import io.netty.util.concurrent.FastThreadLocal;
import io.streamnative.pulsar.handlers.kop.coordinator.group.TxnRecordsMetadata;
import io.streamnative.pulsar.handlers.kop.storage.PartitionLog;
import java.nio.ByteBuffer;
import java.util.Base64;
import java.util.Collections;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.apache.bookkeeper.common.util.MathUtils;
import org.apache.bookkeeper.mledger.Entry;
import org.apache.kafka.common.record.ControlRecordType;
import org.apache.kafka.common.record.MemoryRecords;
import org.apache.kafka.common.record.MutableRecordBatch;
import org.apache.kafka.common.record.Record;
import org.apache.kafka.common.record.RecordBatch;
import org.apache.pulsar.broker.service.plugin.EntryFilter;
import org.apache.pulsar.client.api.Message;
import org.apache.pulsar.client.api.Schema;
import org.apache.pulsar.client.impl.MessageImpl;
import org.apache.pulsar.common.allocator.PulsarByteBufAllocator;
import org.apache.pulsar.common.api.proto.CompressionType;
import org.apache.pulsar.common.api.proto.MarkerType;
import org.apache.pulsar.common.api.proto.MessageMetadata;
import org.apache.pulsar.common.compression.CompressionCodecProvider;
import org.apache.pulsar.common.protocol.Commands;
import org.apache.pulsar.common.protocol.Commands.ChecksumType;


/**
 * The entry formatter that uses Pulsar's format.
 */
@Slf4j
public class PulsarEntryFormatter extends AbstractEntryFormatter {

    private static final FastThreadLocal LOCAL_MESSAGE_METADATA = new FastThreadLocal<>() {
        @Override
        protected MessageMetadata initialValue() {
            return new MessageMetadata();
        }
    };
    private static final ByteBuffer EMPTY_CONTENT = ByteBuffer.allocate(0);

    //// for Batch messages
    private static final int INITIAL_BATCH_BUFFER_SIZE = 1024;
    private static final int MAX_MESSAGE_BATCH_SIZE_BYTES = 128 * 1024;
    private final boolean skipDispatch;

    protected PulsarEntryFormatter(List entryFilters) {
        this(entryFilters, true);
    }

    private PulsarEntryFormatter(List entryFilters, boolean skipDispatch) {
        super(entryFilters);
        this.skipDispatch = skipDispatch;
    }

    @Override
    public EncodeResult encode(final EncodeRequest encodeRequest) {
        final MemoryRecords records = encodeRequest.getRecords();
        final PartitionLog.LogAppendInfo appendInfo = encodeRequest.getAppendInfo();
        final int numMessages = appendInfo.numMessages();

        long currentBatchSizeBytes = 0;
        int numMessagesInBatch = 0;
        long startConversionNanos = MathUtils.nowInNano();

        long sequenceId = -1;

        ByteBuf batchedMessageMetadataAndPayload = PulsarByteBufAllocator.DEFAULT
                .buffer(Math.min(INITIAL_BATCH_BUFFER_SIZE, MAX_MESSAGE_BATCH_SIZE_BYTES));

        List> messages = Lists.newArrayListWithExpectedSize(numMessages);
        final MessageMetadata msgMetadata = new MessageMetadata();

        var compressionType = org.apache.kafka.common.record.CompressionType.NONE;
        for (MutableRecordBatch recordBatch : records.batches()) {
            compressionType = recordBatch.compressionType();
            if (recordBatch.isControlBatch()) {
                messages.add(controlBatchToEntry(recordBatch));
            } else {
                if (recordBatch.isTransactional()) {
                    TxnRecordsMetadata.attachToMetadata(msgMetadata, recordBatch.producerId(),
                            recordBatch.producerEpoch(), ControlRecordType.UNKNOWN);
                }
                for (Record record : recordBatch) {
                    validateKey(record, encodeRequest.getAppendInfo().compactedTopic());
                    messages.add(recordToEntry(record));
                }
            }
        }

        final var pulsarCompressionType = switch (compressionType) {
            case NONE -> CompressionType.NONE;
            case SNAPPY -> CompressionType.SNAPPY;
            case ZSTD -> CompressionType.ZSTD;
            // Use LZ4 for the compression types that Pulsar does not support
            default -> CompressionType.LZ4;
        };

        for (Message rawMessage: messages) {
            var message = (MessageImpl) rawMessage;
            if (++numMessagesInBatch == 1) {
                // msgMetadata will set publish time here
                final var singleMetadata = message.getMessageBuilder();
                sequenceId = Commands.initBatchMessageMetadata(msgMetadata, singleMetadata);
                if (singleMetadata.hasMarkerType()) { // Control record
                    msgMetadata.setMarkerType(singleMetadata.getMarkerType());
                }
                singleMetadata.getPropertiesList().forEach(__ -> msgMetadata.addProperty()
                        .setKey(__.getKey()).setValue(__.getValue()));
            }
            currentBatchSizeBytes += message.getDataBuffer().readableBytes();
            if (log.isTraceEnabled()) {
                log.trace("recordsToByteBuf , sequenceId: {}, numMessagesInBatch: {}, currentBatchSizeBytes: {} ",
                        sequenceId, numMessagesInBatch, currentBatchSizeBytes);
            }

            final MessageMetadata msgBuilder = message.getMessageBuilder();
            batchedMessageMetadataAndPayload = Commands.serializeSingleMessageInBatchWithPayload(msgBuilder,
                    message.getDataBuffer(), batchedMessageMetadataAndPayload);
        }

        msgMetadata.setNumMessagesInBatch(numMessagesInBatch);
        msgMetadata.setProducerName(appendInfo.producerName());
        msgMetadata.setSequenceId(appendInfo.firstSequence());
        msgMetadata.setHighestSequenceId(appendInfo.lastSequence());
        final int uncompressedSize = batchedMessageMetadataAndPayload.readableBytes();
        msgMetadata.setCompression(pulsarCompressionType);
        msgMetadata.setUncompressedSize(uncompressedSize);

        final var compressor = CompressionCodecProvider.getCompressionCodec(pulsarCompressionType);
        final var compressedPayload = compressor.encode(batchedMessageMetadataAndPayload);
        batchedMessageMetadataAndPayload.release();

        ByteBuf buf = Commands.serializeMetadataAndPayload(ChecksumType.Crc32c,
                msgMetadata,
                compressedPayload);
        compressedPayload.release();

        return EncodeResult.get(records, buf, numMessages, numMessagesInBatch,
                MathUtils.elapsedNanos(startConversionNanos));
    }

    @Override
    public DecodeResult decode(final List entries, final byte magic) {
        return super.decode(entries, magic);
    }

    private Message controlBatchToEntry(final RecordBatch batch) {
        final var record = batch.iterator().next();
        final var metadata = newMetadataFromKafkaRecord(record);

        final var value = ByteBuffer.allocate(batch.sizeInBytes());
        batch.writeTo(value);
        value.position(0);

        final var type = ControlRecordType.parse(record.key());
        if (skipDispatch) {
            // See AbstractBaseDispatcher#filterEntriesForConsumer, entries with the following fields will be skipped
            // when dispatching to Pulsar consumer
            metadata.setMarkerType(MarkerType.TXN_COMMIT_VALUE).setTxnidMostBits(0L).setTxnidLeastBits(0L);
        }
        TxnRecordsMetadata.attachToMetadata(metadata, batch.producerId(), batch.producerEpoch(), type);

        return MessageImpl.create(metadata, value, Schema.BYTEBUFFER, null);
    }

    private static Message recordToEntry(Record record) {
        final var metadata = newMetadataFromKafkaRecord(record);

        if (record.hasKey()) {
            byte[] key = new byte[record.keySize()];
            record.key().get(key);
            metadata.setPartitionKeyB64Encoded(true).setPartitionKey(Base64.getEncoder().encodeToString(key))
                    .setOrderingKey(key);
        }

        final ByteBuffer value;
        if (record.hasValue()) {
            value = record.value();
        } else {
            value = EMPTY_CONTENT;
            metadata.setNullValue(true);
        }

        return MessageImpl.create(metadata, value, Schema.BYTEBUFFER, null);
    }

    private static MessageMetadata newMetadataFromKafkaRecord(final Record record) {
        final var metadata = LOCAL_MESSAGE_METADATA.get();
        metadata.clear();

        final var sequenceId = record.sequence() >= 0 ? record.sequence() : 0L;
        metadata.setSequenceId(sequenceId).setProducerName("");

        if (record.timestamp() >= 0) {
            metadata.setPublishTime(record.timestamp()).setEventTime(record.timestamp());
        } else {
            metadata.setPublishTime(System.currentTimeMillis());
        }

        for (var header : record.headers()) {
            metadata.addProperty().setKey(header.key()).setValue(new String(header.value(), UTF_8));
        }

        return metadata;
    }

    public static PulsarEntryFormatter offsetControlRecordFormatter() {
        return new PulsarEntryFormatter(Collections.emptyList(), false);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy