org.apache.kafka.raft.internals.BatchBuilder Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of kafka-raft Show documentation
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kafka.raft.internals;

import org.apache.kafka.common.compress.Compression;
import org.apache.kafka.common.protocol.DataOutputStreamWritable;
import org.apache.kafka.common.protocol.ObjectSerializationCache;
import org.apache.kafka.common.protocol.Writable;
import org.apache.kafka.common.record.AbstractRecords;
import org.apache.kafka.common.record.DefaultRecord;
import org.apache.kafka.common.record.DefaultRecordBatch;
import org.apache.kafka.common.record.MemoryRecords;
import org.apache.kafka.common.record.RecordBatch;
import org.apache.kafka.common.record.TimestampType;
import org.apache.kafka.common.utils.ByteBufferOutputStream;
import org.apache.kafka.common.utils.ByteUtils;
import org.apache.kafka.server.common.serialization.RecordSerde;

import java.io.DataOutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.OptionalInt;

/**
 * Collect a set of records into a single batch. New records are added
 * through {@link #appendRecord(Object, ObjectSerializationCache)}, but the caller must first
 * check whether there is room using {@link #bytesNeeded(Collection, ObjectSerializationCache)}. Once the
 * batch is ready, then {@link #build()} should be used to get the resulting
 * {@link MemoryRecords} instance.
 *
 * @param  record type indicated by {@link RecordSerde} passed in constructor
 */
public class BatchBuilder {
    private final ByteBuffer initialBuffer;
    private final Compression compression;
    private final ByteBufferOutputStream batchOutput;
    private final DataOutputStreamWritable recordOutput;
    private final long baseOffset;
    private final long appendTime;
    private final boolean isControlBatch;
    private final int leaderEpoch;
    private final int initialPosition;
    private final int maxBytes;
    private final RecordSerde serde;
    private final List records;

    private long nextOffset;
    private int unflushedBytes;
    private boolean isOpenForAppends = true;

    public BatchBuilder(
        ByteBuffer buffer,
        RecordSerde serde,
        Compression compression,
        long baseOffset,
        long appendTime,
        boolean isControlBatch,
        int leaderEpoch,
        int maxBytes
    ) {
        this.initialBuffer = buffer;
        this.batchOutput = new ByteBufferOutputStream(buffer);
        this.serde = serde;
        this.compression = compression;
        this.baseOffset = baseOffset;
        this.nextOffset = baseOffset;
        this.appendTime = appendTime;
        this.isControlBatch = isControlBatch;
        this.initialPosition = batchOutput.position();
        this.leaderEpoch = leaderEpoch;
        this.maxBytes = maxBytes;
        this.records = new ArrayList<>();

        // field compressionType must be set before calculating the batch header size
        int batchHeaderSizeInBytes = batchHeaderSizeInBytes();
        batchOutput.position(initialPosition + batchHeaderSizeInBytes);

        this.recordOutput = new DataOutputStreamWritable(new DataOutputStream(
            compression.wrapForOutput(this.batchOutput, RecordBatch.MAGIC_VALUE_V2)));
    }

    /**
     * Append a record to this batch. The caller must first verify there is room for the batch
     * using {@link #bytesNeeded(Collection, ObjectSerializationCache)}.
     *
     * @param record the record to append
     * @param serializationCache serialization cache for use in {@link RecordSerde#write(Object, ObjectSerializationCache, Writable)}
     * @return the offset of the appended batch
     */
    public long appendRecord(T record, ObjectSerializationCache serializationCache) {
        if (!isOpenForAppends) {
            throw new IllegalStateException("Cannot append new records after the batch has been built");
        }

        if (nextOffset - baseOffset > Integer.MAX_VALUE) {
            throw new IllegalArgumentException("Cannot include more than " + Integer.MAX_VALUE +
                " records in a single batch");
        }

        long offset = nextOffset++;
        int recordSizeInBytes = writeRecord(
            offset,
            record,
            serializationCache
        );
        unflushedBytes += recordSizeInBytes;
        records.add(record);
        return offset;
    }

    /**
     * Check whether the batch has enough room for all the record values.
     *
     * Returns an empty {@link OptionalInt} if the batch builder has room for this list of records.
     * Otherwise, it returns the expected number of bytes needed for a batch to contain these records.
     *
     * @param records the records to use when checking for room
     * @param serializationCache serialization cache for computing sizes
     * @return empty {@link OptionalInt} if there is room for the records to be appended, otherwise
     *         returns the number of bytes needed
     */
    public OptionalInt bytesNeeded(Collection records, ObjectSerializationCache serializationCache) {
        int bytesNeeded = bytesNeededForRecords(
            records,
            serializationCache
        );

        if (!isOpenForAppends) {
            return OptionalInt.of(Math.addExact(batchHeaderSizeInBytes(), bytesNeeded));
        }

        int approxUnusedSizeInBytes = maxBytes - approximateSizeInBytes();
        if (approxUnusedSizeInBytes >= bytesNeeded) {
            return OptionalInt.empty();
        } else if (unflushedBytes > 0) {
            recordOutput.flush();
            unflushedBytes = 0;
            int unusedSizeInBytes = maxBytes - flushedSizeInBytes();
            if (unusedSizeInBytes >= bytesNeeded) {
                return OptionalInt.empty();
            }
        }

        return OptionalInt.of(Math.addExact(batchHeaderSizeInBytes(), bytesNeeded));
    }

    private int flushedSizeInBytes() {
        return batchOutput.position() - initialPosition;
    }

    /**
     * Get an estimate of the current size of the appended data. This estimate
     * is precise if no compression is in use.
     *
     * @return estimated size in bytes of the appended records
     */
    public int approximateSizeInBytes() {
        return flushedSizeInBytes() + unflushedBytes;
    }

    /**
     * Get the base offset of this batch. This is constant upon constructing
     * the builder instance.
     *
     * @return the base offset
     */
    public long baseOffset() {
        return baseOffset;
    }

    /**
     * Return the offset of the last appended record. This is updated after
     * every append and can be used after the batch has been built to obtain
     * the last offset.
     *
     * @return the offset of the last appended record
     */
    public long lastOffset() {
        return nextOffset - 1;
    }

    /**
     * Get the number of records appended to the batch. This is updated after
     * each append.
     *
     * @return the number of appended records
     */
    public int numRecords() {
        return (int) (nextOffset - baseOffset);
    }

    /**
     * Check whether there has been at least one record appended to the batch.
     *
     * @return true if one or more records have been appended
     */
    public boolean nonEmpty() {
        return numRecords() > 0;
    }

    /**
     * Return the reference to the initial buffer passed through the constructor.
     * This is used in case the buffer needs to be returned to a pool (e.g.
     * in {@link org.apache.kafka.common.memory.MemoryPool#release(ByteBuffer)}).
     *
     * @return the initial buffer passed to the constructor
     */
    public ByteBuffer initialBuffer() {
        return initialBuffer;
    }

    /**
     * Get a list of the records appended to the batch.
     * @return a list of records
     */
    public List records() {
        return records;
    }

    private void writeDefaultBatchHeader() {
        ByteBuffer buffer = batchOutput.buffer();
        int lastPosition = buffer.position();

        buffer.position(initialPosition);
        int size = lastPosition - initialPosition;
        int lastOffsetDelta = (int) (lastOffset() - baseOffset);

        DefaultRecordBatch.writeHeader(
            buffer,
            baseOffset,
            lastOffsetDelta,
            size,
            RecordBatch.MAGIC_VALUE_V2,
            compression.type(),
            TimestampType.CREATE_TIME,
            appendTime,
            appendTime,
            RecordBatch.NO_PRODUCER_ID,
            RecordBatch.NO_PRODUCER_EPOCH,
            RecordBatch.NO_SEQUENCE,
            false,
            isControlBatch,
            false,
            leaderEpoch,
            numRecords()
        );

        buffer.position(lastPosition);
    }

    public MemoryRecords build() {
        recordOutput.close();
        writeDefaultBatchHeader();
        ByteBuffer buffer = batchOutput.buffer().duplicate();
        buffer.flip();
        buffer.position(initialPosition);
        isOpenForAppends = false;
        return MemoryRecords.readableRecords(buffer.slice());
    }

    public int writeRecord(
        long offset,
        T payload,
        ObjectSerializationCache serializationCache
    ) {
        int offsetDelta = (int) (offset - baseOffset);
        long timestampDelta = 0;

        int payloadSize = serde.recordSize(payload, serializationCache);
        int sizeInBytes = DefaultRecord.sizeOfBodyInBytes(
            offsetDelta,
            timestampDelta,
            -1,
            payloadSize,
            DefaultRecord.EMPTY_HEADERS
        );
        recordOutput.writeVarint(sizeInBytes);

        // Write attributes (currently unused)
        recordOutput.writeByte((byte) 0);

        // Write timestamp and offset
        recordOutput.writeVarlong(timestampDelta);
        recordOutput.writeVarint(offsetDelta);

        // Write key, which is always null for controller messages
        recordOutput.writeVarint(-1);

        // Write value
        recordOutput.writeVarint(payloadSize);
        serde.write(payload, serializationCache, recordOutput);

        // Write headers (currently unused)
        recordOutput.writeVarint(0);
        return ByteUtils.sizeOfVarint(sizeInBytes) + sizeInBytes;
    }

    private int batchHeaderSizeInBytes() {
        return AbstractRecords.recordBatchHeaderSizeInBytes(
            RecordBatch.MAGIC_VALUE_V2,
            compression.type()
        );
    }

    private int bytesNeededForRecords(
        Collection records,
        ObjectSerializationCache serializationCache
    ) {
        long expectedNextOffset = nextOffset;
        int bytesNeeded = 0;
        for (T record : records) {
            if (expectedNextOffset - baseOffset >= Integer.MAX_VALUE) {
                throw new IllegalArgumentException(
                    String.format(
                        "Adding %d records to a batch with base offset of %d and next offset of %d",
                        records.size(),
                        baseOffset,
                        expectedNextOffset
                    )
                );
            }

            int recordSizeInBytes = DefaultRecord.sizeOfBodyInBytes(
                (int) (expectedNextOffset  - baseOffset),
                0,
                -1,
                serde.recordSize(record, serializationCache),
                DefaultRecord.EMPTY_HEADERS
            );

            bytesNeeded = Math.addExact(bytesNeeded, ByteUtils.sizeOfVarint(recordSizeInBytes));
            bytesNeeded = Math.addExact(bytesNeeded, recordSizeInBytes);

            expectedNextOffset += 1;
        }

        return bytesNeeded;
    }
}