io.streamnative.pulsar.handlers.kop.format.PulsarEntryFormatter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pulsar-protocol-handler-kafka Show documentation
Show all versions of pulsar-protocol-handler-kafka Show documentation
Kafka on Pulsar implemented using Pulsar Protocol Handler
/**
* Copyright (c) 2019 - 2024 StreamNative, Inc.. All Rights Reserved.
*/
/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.streamnative.pulsar.handlers.kop.format;
import static io.streamnative.pulsar.handlers.kop.utils.KopLogValidator.validateKey;
import static java.nio.charset.StandardCharsets.UTF_8;
import com.google.common.collect.Lists;
import io.netty.buffer.ByteBuf;
import io.netty.util.concurrent.FastThreadLocal;
import io.streamnative.pulsar.handlers.kop.coordinator.group.TxnRecordsMetadata;
import io.streamnative.pulsar.handlers.kop.storage.PartitionLog;
import java.nio.ByteBuffer;
import java.util.Base64;
import java.util.Collections;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.apache.bookkeeper.common.util.MathUtils;
import org.apache.bookkeeper.mledger.Entry;
import org.apache.kafka.common.record.ControlRecordType;
import org.apache.kafka.common.record.MemoryRecords;
import org.apache.kafka.common.record.MutableRecordBatch;
import org.apache.kafka.common.record.Record;
import org.apache.kafka.common.record.RecordBatch;
import org.apache.pulsar.broker.service.plugin.EntryFilter;
import org.apache.pulsar.client.api.Message;
import org.apache.pulsar.client.api.Schema;
import org.apache.pulsar.client.impl.MessageImpl;
import org.apache.pulsar.common.allocator.PulsarByteBufAllocator;
import org.apache.pulsar.common.api.proto.CompressionType;
import org.apache.pulsar.common.api.proto.MarkerType;
import org.apache.pulsar.common.api.proto.MessageMetadata;
import org.apache.pulsar.common.compression.CompressionCodecProvider;
import org.apache.pulsar.common.protocol.Commands;
import org.apache.pulsar.common.protocol.Commands.ChecksumType;
/**
* The entry formatter that uses Pulsar's format.
*/
@Slf4j
public class PulsarEntryFormatter extends AbstractEntryFormatter {
private static final FastThreadLocal LOCAL_MESSAGE_METADATA = new FastThreadLocal<>() {
@Override
protected MessageMetadata initialValue() {
return new MessageMetadata();
}
};
private static final ByteBuffer EMPTY_CONTENT = ByteBuffer.allocate(0);
//// for Batch messages
private static final int INITIAL_BATCH_BUFFER_SIZE = 1024;
private static final int MAX_MESSAGE_BATCH_SIZE_BYTES = 128 * 1024;
private final boolean skipDispatch;
protected PulsarEntryFormatter(List entryFilters) {
this(entryFilters, true);
}
private PulsarEntryFormatter(List entryFilters, boolean skipDispatch) {
super(entryFilters);
this.skipDispatch = skipDispatch;
}
@Override
public EncodeResult encode(final EncodeRequest encodeRequest) {
final MemoryRecords records = encodeRequest.getRecords();
final PartitionLog.LogAppendInfo appendInfo = encodeRequest.getAppendInfo();
final int numMessages = appendInfo.numMessages();
long currentBatchSizeBytes = 0;
int numMessagesInBatch = 0;
long startConversionNanos = MathUtils.nowInNano();
long sequenceId = -1;
ByteBuf batchedMessageMetadataAndPayload = PulsarByteBufAllocator.DEFAULT
.buffer(Math.min(INITIAL_BATCH_BUFFER_SIZE, MAX_MESSAGE_BATCH_SIZE_BYTES));
List> messages = Lists.newArrayListWithExpectedSize(numMessages);
final MessageMetadata msgMetadata = new MessageMetadata();
var compressionType = org.apache.kafka.common.record.CompressionType.NONE;
for (MutableRecordBatch recordBatch : records.batches()) {
compressionType = recordBatch.compressionType();
if (recordBatch.isControlBatch()) {
messages.add(controlBatchToEntry(recordBatch));
} else {
if (recordBatch.isTransactional()) {
TxnRecordsMetadata.attachToMetadata(msgMetadata, recordBatch.producerId(),
recordBatch.producerEpoch(), ControlRecordType.UNKNOWN);
}
for (Record record : recordBatch) {
validateKey(record, encodeRequest.getAppendInfo().compactedTopic());
messages.add(recordToEntry(record));
}
}
}
final var pulsarCompressionType = switch (compressionType) {
case NONE -> CompressionType.NONE;
case SNAPPY -> CompressionType.SNAPPY;
case ZSTD -> CompressionType.ZSTD;
// Use LZ4 for the compression types that Pulsar does not support
default -> CompressionType.LZ4;
};
for (Message rawMessage: messages) {
var message = (MessageImpl) rawMessage;
if (++numMessagesInBatch == 1) {
// msgMetadata will set publish time here
final var singleMetadata = message.getMessageBuilder();
sequenceId = Commands.initBatchMessageMetadata(msgMetadata, singleMetadata);
if (singleMetadata.hasMarkerType()) { // Control record
msgMetadata.setMarkerType(singleMetadata.getMarkerType());
}
singleMetadata.getPropertiesList().forEach(__ -> msgMetadata.addProperty()
.setKey(__.getKey()).setValue(__.getValue()));
}
currentBatchSizeBytes += message.getDataBuffer().readableBytes();
if (log.isTraceEnabled()) {
log.trace("recordsToByteBuf , sequenceId: {}, numMessagesInBatch: {}, currentBatchSizeBytes: {} ",
sequenceId, numMessagesInBatch, currentBatchSizeBytes);
}
final MessageMetadata msgBuilder = message.getMessageBuilder();
batchedMessageMetadataAndPayload = Commands.serializeSingleMessageInBatchWithPayload(msgBuilder,
message.getDataBuffer(), batchedMessageMetadataAndPayload);
}
msgMetadata.setNumMessagesInBatch(numMessagesInBatch);
msgMetadata.setProducerName(appendInfo.producerName());
msgMetadata.setSequenceId(appendInfo.firstSequence());
msgMetadata.setHighestSequenceId(appendInfo.lastSequence());
final int uncompressedSize = batchedMessageMetadataAndPayload.readableBytes();
msgMetadata.setCompression(pulsarCompressionType);
msgMetadata.setUncompressedSize(uncompressedSize);
final var compressor = CompressionCodecProvider.getCompressionCodec(pulsarCompressionType);
final var compressedPayload = compressor.encode(batchedMessageMetadataAndPayload);
batchedMessageMetadataAndPayload.release();
ByteBuf buf = Commands.serializeMetadataAndPayload(ChecksumType.Crc32c,
msgMetadata,
compressedPayload);
compressedPayload.release();
return EncodeResult.get(records, buf, numMessages, numMessagesInBatch,
MathUtils.elapsedNanos(startConversionNanos));
}
@Override
public DecodeResult decode(final List entries, final byte magic) {
return super.decode(entries, magic);
}
private Message controlBatchToEntry(final RecordBatch batch) {
final var record = batch.iterator().next();
final var metadata = newMetadataFromKafkaRecord(record);
final var value = ByteBuffer.allocate(batch.sizeInBytes());
batch.writeTo(value);
value.position(0);
final var type = ControlRecordType.parse(record.key());
if (skipDispatch) {
// See AbstractBaseDispatcher#filterEntriesForConsumer, entries with the following fields will be skipped
// when dispatching to Pulsar consumer
metadata.setMarkerType(MarkerType.TXN_COMMIT_VALUE).setTxnidMostBits(0L).setTxnidLeastBits(0L);
}
TxnRecordsMetadata.attachToMetadata(metadata, batch.producerId(), batch.producerEpoch(), type);
return MessageImpl.create(metadata, value, Schema.BYTEBUFFER, null);
}
private static Message recordToEntry(Record record) {
final var metadata = newMetadataFromKafkaRecord(record);
if (record.hasKey()) {
byte[] key = new byte[record.keySize()];
record.key().get(key);
metadata.setPartitionKeyB64Encoded(true).setPartitionKey(Base64.getEncoder().encodeToString(key))
.setOrderingKey(key);
}
final ByteBuffer value;
if (record.hasValue()) {
value = record.value();
} else {
value = EMPTY_CONTENT;
metadata.setNullValue(true);
}
return MessageImpl.create(metadata, value, Schema.BYTEBUFFER, null);
}
private static MessageMetadata newMetadataFromKafkaRecord(final Record record) {
final var metadata = LOCAL_MESSAGE_METADATA.get();
metadata.clear();
final var sequenceId = record.sequence() >= 0 ? record.sequence() : 0L;
metadata.setSequenceId(sequenceId).setProducerName("");
if (record.timestamp() >= 0) {
metadata.setPublishTime(record.timestamp()).setEventTime(record.timestamp());
} else {
metadata.setPublishTime(System.currentTimeMillis());
}
for (var header : record.headers()) {
metadata.addProperty().setKey(header.key()).setValue(new String(header.value(), UTF_8));
}
return metadata;
}
public static PulsarEntryFormatter offsetControlRecordFormatter() {
return new PulsarEntryFormatter(Collections.emptyList(), false);
}
}