com.swirlds.merkledb.files.DataFileWriter Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of swirlds-merkledb Show documentation
Swirlds is a software platform designed to build fully-distributed applications that harness the power of the cloud without servers. Now you can develop applications with fairness in decision making, speed, trust and reliability, at a fraction of the cost of traditional server-based platforms.
There is a newer version: 0.56.6
Show newest version
/*
 * Copyright (C) 2023-2024 Hedera Hashgraph, LLC
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.swirlds.merkledb.files;

import static com.swirlds.merkledb.files.DataFileCommon.FIELD_DATAFILE_ITEMS;
import static com.swirlds.merkledb.files.DataFileCommon.PAGE_SIZE;
import static com.swirlds.merkledb.files.DataFileCommon.createDataFilePath;

import com.hedera.pbj.runtime.ProtoWriterTools;
import com.hedera.pbj.runtime.io.buffer.BufferedData;
import com.swirlds.merkledb.serialize.BaseSerializer;
import java.io.IOException;
import java.nio.BufferOverflowException;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.time.Instant;

/**
 * Writer for creating a data file. A data file contains a number of data items. Each data item can
 * be variable or fixed size and is considered as a black box. All access to contents of the data
 * item is done via the BaseSerializer.
 *
 * This is designed to be used from a single thread.
 *
 * 
At the end of the file it is padded till a 4096 byte page boundary then a footer page is
 * written by DataFileMetadata.
 *
 * Protobuf schema: see {@link DataFileReader} for details.
 *
 * @param  Data item type
 */
public final class DataFileWriter {

    /** Mapped buffer size */
    private static final int MMAP_BUF_SIZE = PAGE_SIZE * 1024 * 64;

    /**
     * The current mapped byte buffer used for writing. When overflowed, it is released, and another
     * buffer is mapped from the file channel.
     */
    private MappedByteBuffer writingMmap;
    /**
     * Offset, in bytes, of the current mapped byte buffer in the file channel. After the file is
     * completely written and closed, this field value is equal to the file size.
     */
    private long mmapPositionInFile = 0;
    /* */
    private BufferedData writingPbjData;

    private MappedByteBuffer writingHeaderMmap;
    private BufferedData writingHeaderPbjData;

    /** Serializer for converting raw data to/from data items */
    private final BaseSerializer dataItemSerializer;
    /** The path to the data file we are writing */
    private final Path path;
    /** File metadata */
    private final DataFileMetadata metadata;
    /**
     * Count of the number of data items we have written so far. Ready to be stored in footer
     * metadata
     */
    private long dataItemCount = 0;

    /**
     * Create a new data file in the given directory, in append mode. Puts the object into "writing"
     * mode (i.e. creates a lock file. So you'd better start writing data and be sure to finish it
     * off).
     *
     * @param filePrefix string prefix for all files, must not contain "_" chars
     * @param dataFileDir the path to directory to create the data file in
     * @param index the index number for this file
     * @param dataItemSerializer Serializer for converting raw data to/from data items
     * @param creationTime the time stamp for the creation time for this file
     */
    public DataFileWriter(
            final String filePrefix,
            final Path dataFileDir,
            final int index,
            final BaseSerializer dataItemSerializer,
            final Instant creationTime,
            final int compactionLevel)
            throws IOException {
        this.dataItemSerializer = dataItemSerializer;
        this.path = createDataFilePath(filePrefix, dataFileDir, index, creationTime, DataFileCommon.FILE_EXTENSION);
        metadata = new DataFileMetadata(
                0, // data item count will be updated later in finishWriting()
                index,
                creationTime,
                dataItemSerializer.getCurrentDataVersion(),
                compactionLevel);
        Files.createFile(path);
        writeHeader();
    }

    /**
     * Maps the writing byte buffer to the given position in the file. Byte buffer size is always
     * {@link #MMAP_BUF_SIZE}. Previous mapped byte buffer, if not null, is released.
     *
     * @param newMmapPos new mapped byte buffer position in the file, in bytes
     * @throws IOException if I/O error(s) occurred
     */
    private void moveWritingBuffer(final long newMmapPos) throws IOException {
        try (final FileChannel channel = FileChannel.open(path, StandardOpenOption.READ, StandardOpenOption.WRITE)) {
            final MappedByteBuffer newMmap = channel.map(MapMode.READ_WRITE, newMmapPos, MMAP_BUF_SIZE);
            if (newMmap == null) {
                throw new IOException("Failed to map file channel to memory");
            }
            if (writingMmap != null) {
                DataFileCommon.closeMmapBuffer(writingMmap);
            }
            mmapPositionInFile = newMmapPos;
            writingMmap = newMmap;
            writingPbjData = BufferedData.wrap(writingMmap);
        }
    }

    private void writeHeader() throws IOException {
        try (final FileChannel channel = FileChannel.open(path, StandardOpenOption.READ, StandardOpenOption.WRITE)) {
            writingHeaderMmap = channel.map(MapMode.READ_WRITE, 0, 1024);
            writingHeaderPbjData = BufferedData.wrap(writingHeaderMmap);
            metadata.writeTo(writingHeaderPbjData);
        }
        // prepare to write data items
        moveWritingBuffer(writingHeaderPbjData.position());
    }

    /**
     * Get the path for the file being written. Useful when needing to get a reader to the file.
     *
     * @return file path
     */
    public Path getPath() {
        return path;
    }

    /**
     * Get file metadata for the written file.
     *
     * @return data file metadata
     */
    public DataFileMetadata getMetadata() {
        return metadata;
    }

    /**
     * Write a data item copied from another file like during merge. If this writer doesn't support
     * the provided raw data item type, this method should return null to indicate that the item
     * needs to be fully deserialized and then serialized to the target file rather than copied as
     * raw bytes.
     *
     * @param dataItemData a buffer containing the item's data
     * @return New data location in this file where it was written
     * @throws IllegalArgumentException If this writer doesn't support the given raw item bytes type
     * @throws IOException If there was a problem writing the data item
     */
    public synchronized long writeCopiedDataItem(final BufferedData dataItemData) throws IOException {
        // capture the current write position for beginning of data item
        final long currentWritingMmapPos = writingPbjData.position();
        final long byteOffset = mmapPositionInFile + currentWritingMmapPos;
        final int size = Math.toIntExact(dataItemData.remaining());
        if (writingPbjData.remaining() < ProtoWriterTools.sizeOfDelimited(FIELD_DATAFILE_ITEMS, size)) {
            moveWritingBuffer(byteOffset);
        }
        try {
            ProtoWriterTools.writeDelimited(
                    writingPbjData, FIELD_DATAFILE_ITEMS, size, o -> o.writeBytes(dataItemData));
        } catch (final BufferOverflowException e) {
            // Buffer overflow here means the mapped buffer is smaller than even a single data item
            throw new IOException(DataFileCommon.ERROR_DATAITEM_TOO_LARGE, e);
        }
        dataItemCount++;
        // return the offset where we wrote the data
        return DataFileCommon.dataLocation(metadata.getIndex(), byteOffset);
    }

    /**
     * Store data item in file returning location it was stored at.
     *
     * @param dataItem the data item to write
     * @return the data location of written data in bytes
     * @throws IOException if there was a problem appending data to file
     */
    public synchronized long storeDataItem(final D dataItem) throws IOException {
        // find offset for the start of this new data item, we assume we always write data in a
        // whole number of blocks
        final long currentWritingMmapPos = writingPbjData.position();
        final long byteOffset = mmapPositionInFile + currentWritingMmapPos;
        // write serialized data
        final int dataItemSize = dataItemSerializer.getSerializedSize(dataItem);
        if (writingPbjData.remaining() < ProtoWriterTools.sizeOfDelimited(FIELD_DATAFILE_ITEMS, dataItemSize)) {
            moveWritingBuffer(byteOffset);
        }
        try {
            ProtoWriterTools.writeDelimited(
                    writingPbjData,
                    FIELD_DATAFILE_ITEMS,
                    dataItemSize,
                    out -> dataItemSerializer.serialize(dataItem, out));
        } catch (final BufferOverflowException e) {
            // Buffer overflow here means the mapped buffer is smaller than even a single data item
            throw new IOException(DataFileCommon.ERROR_DATAITEM_TOO_LARGE, e);
        }
        // increment data item counter
        dataItemCount++;
        // return the offset where we wrote the data
        return DataFileCommon.dataLocation(metadata.getIndex(), byteOffset);
    }

    /**
     * When you finished append to a new file, call this to seal the file and make it read only for
     * reading.
     *
     * @throws IOException if there was a problem sealing file or opening again as read only
     */
    public synchronized void finishWriting() throws IOException {
        // total file size is where the current writing pos is
        final long totalFileSize = mmapPositionInFile + writingPbjData.position();
        // update data item count in the metadata and in the file
        // not that updateDataItemCount() messes up with writing buffer state (position), but
        // the buffer will be closed below anyway
        metadata.updateDataItemCount(writingHeaderPbjData, dataItemCount);
        // release all the resources
        DataFileCommon.closeMmapBuffer(writingHeaderMmap);
        DataFileCommon.closeMmapBuffer(writingMmap);

        try (FileChannel channel = FileChannel.open(path, StandardOpenOption.READ, StandardOpenOption.WRITE)) {
            channel.truncate(totalFileSize);
            // after finishWriting(), mmapPositionInFile should be equal to the file size
            mmapPositionInFile = totalFileSize;
        }
    }
}