All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.append.AppendOnlyWriter Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.dinky.shaded.paimon.append;

import org.dinky.shaded.paimon.annotation.VisibleForTesting;
import org.dinky.shaded.paimon.compact.CompactManager;
import org.dinky.shaded.paimon.data.InternalRow;
import org.dinky.shaded.paimon.data.serializer.InternalRowSerializer;
import org.dinky.shaded.paimon.disk.IOManager;
import org.dinky.shaded.paimon.disk.RowBuffer;
import org.dinky.shaded.paimon.format.FileFormat;
import org.dinky.shaded.paimon.fs.FileIO;
import org.dinky.shaded.paimon.io.CompactIncrement;
import org.dinky.shaded.paimon.io.DataFileMeta;
import org.dinky.shaded.paimon.io.DataFilePathFactory;
import org.dinky.shaded.paimon.io.NewFilesIncrement;
import org.dinky.shaded.paimon.io.RowDataRollingFileWriter;
import org.dinky.shaded.paimon.memory.MemoryOwner;
import org.dinky.shaded.paimon.memory.MemorySegmentPool;
import org.dinky.shaded.paimon.operation.metrics.WriterMetrics;
import org.dinky.shaded.paimon.statistics.FieldStatsCollector;
import org.dinky.shaded.paimon.types.RowKind;
import org.dinky.shaded.paimon.types.RowType;
import org.dinky.shaded.paimon.utils.CommitIncrement;
import org.dinky.shaded.paimon.utils.LongCounter;
import org.dinky.shaded.paimon.utils.Preconditions;
import org.dinky.shaded.paimon.utils.RecordWriter;

import javax.annotation.Nullable;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;

/**
 * A {@link RecordWriter} implementation that only accepts records which are always insert
 * operations and don't have any unique keys or sort keys.
 */
public class AppendOnlyWriter implements RecordWriter, MemoryOwner {

    private final FileIO fileIO;
    private final long schemaId;
    private final FileFormat fileFormat;
    private final long targetFileSize;
    private final RowType writeSchema;
    private final DataFilePathFactory pathFactory;
    private final CompactManager compactManager;
    private final boolean forceCompact;
    private final List newFiles;
    private final List compactBefore;
    private final List compactAfter;
    private final LongCounter seqNumCounter;
    private final String fileCompression;
    private final SinkWriter sinkWriter;
    private final FieldStatsCollector.Factory[] statsCollectors;
    private final IOManager ioManager;

    private WriterMetrics writerMetrics;

    public AppendOnlyWriter(
            FileIO fileIO,
            IOManager ioManager,
            long schemaId,
            FileFormat fileFormat,
            long targetFileSize,
            RowType writeSchema,
            long maxSequenceNumber,
            CompactManager compactManager,
            boolean forceCompact,
            DataFilePathFactory pathFactory,
            @Nullable CommitIncrement increment,
            boolean useWriteBuffer,
            boolean spillable,
            String fileCompression,
            FieldStatsCollector.Factory[] statsCollectors,
            WriterMetrics writerMetrics) {
        this.fileIO = fileIO;
        this.schemaId = schemaId;
        this.fileFormat = fileFormat;
        this.targetFileSize = targetFileSize;
        this.writeSchema = writeSchema;
        this.pathFactory = pathFactory;
        this.compactManager = compactManager;
        this.forceCompact = forceCompact;
        this.newFiles = new ArrayList<>();
        this.compactBefore = new ArrayList<>();
        this.compactAfter = new ArrayList<>();
        this.seqNumCounter = new LongCounter(maxSequenceNumber + 1);
        this.fileCompression = fileCompression;
        this.ioManager = ioManager;
        this.statsCollectors = statsCollectors;

        this.sinkWriter =
                useWriteBuffer ? new BufferedSinkWriter(spillable) : new DirectSinkWriter();

        if (increment != null) {
            newFiles.addAll(increment.newFilesIncrement().newFiles());
            compactBefore.addAll(increment.compactIncrement().compactBefore());
            compactAfter.addAll(increment.compactIncrement().compactAfter());
        }
        this.writerMetrics = writerMetrics;
    }

    @Override
    public void write(InternalRow rowData) throws Exception {
        Preconditions.checkArgument(
                rowData.getRowKind() == RowKind.INSERT,
                "Append-only writer can only accept insert row kind, but current row kind is: %s",
                rowData.getRowKind());
        boolean success = sinkWriter.write(rowData);
        if (!success) {
            flush(false, false);
            success = sinkWriter.write(rowData);
            if (!success) {
                // Should not get here, because writeBuffer will throw too big exception out.
                // But we throw again in case of something unexpected happens. (like someone changed
                // code in SpillableBuffer.)
                throw new RuntimeException("Mem table is too small to hold a single element.");
            }
        }

        if (writerMetrics != null) {
            writerMetrics.incWriteRecordNum();
        }
    }

    @Override
    public void compact(boolean fullCompaction) throws Exception {
        flush(true, fullCompaction);
    }

    @Override
    public void addNewFiles(List files) {
        files.forEach(compactManager::addNewFile);
    }

    @Override
    public Collection dataFiles() {
        return compactManager.allFiles();
    }

    @Override
    public CommitIncrement prepareCommit(boolean waitCompaction) throws Exception {
        long start = System.currentTimeMillis();
        flush(false, false);
        trySyncLatestCompaction(waitCompaction || forceCompact);
        CommitIncrement increment = drainIncrement();
        if (writerMetrics != null) {
            writerMetrics.updatePrepareCommitCostMillis(System.currentTimeMillis() - start);
        }
        return increment;
    }

    private void flush(boolean waitForLatestCompaction, boolean forcedFullCompaction)
            throws Exception {
        long start = System.currentTimeMillis();
        List flushedFiles = sinkWriter.flush();

        // add new generated files
        flushedFiles.forEach(compactManager::addNewFile);
        trySyncLatestCompaction(waitForLatestCompaction);
        compactManager.triggerCompaction(forcedFullCompaction);
        newFiles.addAll(flushedFiles);
        if (writerMetrics != null) {
            writerMetrics.updateBufferFlushCostMillis(System.currentTimeMillis() - start);
        }
    }

    @Override
    public void sync() throws Exception {
        trySyncLatestCompaction(true);
    }

    @Override
    public void close() throws Exception {
        if (writerMetrics != null) {
            writerMetrics.close();
        }
        // cancel compaction so that it does not block job cancelling
        compactManager.cancelCompaction();
        sync();

        compactManager.close();
        for (DataFileMeta file : compactAfter) {
            // appendOnlyCompactManager will rewrite the file and no file upgrade will occur, so we
            // can directly delete the file in compactAfter.
            fileIO.deleteQuietly(pathFactory.toPath(file.fileName()));
        }

        sinkWriter.close();
    }

    private RowDataRollingFileWriter createRollingRowWriter() {
        return new RowDataRollingFileWriter(
                fileIO,
                schemaId,
                fileFormat,
                targetFileSize,
                writeSchema,
                pathFactory,
                seqNumCounter,
                fileCompression,
                statsCollectors);
    }

    private void trySyncLatestCompaction(boolean blocking)
            throws ExecutionException, InterruptedException {
        compactManager
                .getCompactionResult(blocking)
                .ifPresent(
                        result -> {
                            compactBefore.addAll(result.before());
                            compactAfter.addAll(result.after());
                        });
    }

    private CommitIncrement drainIncrement() {
        NewFilesIncrement newFilesIncrement =
                new NewFilesIncrement(new ArrayList<>(newFiles), Collections.emptyList());
        CompactIncrement compactIncrement =
                new CompactIncrement(
                        new ArrayList<>(compactBefore),
                        new ArrayList<>(compactAfter),
                        Collections.emptyList());

        newFiles.clear();
        compactBefore.clear();
        compactAfter.clear();

        return new CommitIncrement(newFilesIncrement, compactIncrement);
    }

    @Override
    public void setMemoryPool(MemorySegmentPool memoryPool) {
        sinkWriter.setMemoryPool(memoryPool);
    }

    @Override
    public long memoryOccupancy() {
        return sinkWriter.memoryOccupancy();
    }

    @Override
    public void flushMemory() throws Exception {
        flush(false, false);
    }

    @VisibleForTesting
    RowBuffer getWriteBuffer() {
        if (sinkWriter instanceof BufferedSinkWriter) {
            return ((BufferedSinkWriter) sinkWriter).writeBuffer;
        } else {
            return null;
        }
    }

    @VisibleForTesting
    List getNewFiles() {
        return newFiles;
    }

    /** Internal interface to Sink Data from input. */
    private interface SinkWriter {

        boolean write(InternalRow data) throws IOException;

        List flush() throws IOException;

        long memoryOccupancy();

        void close();

        void setMemoryPool(MemorySegmentPool memoryPool);
    }

    /**
     * Directly sink data to file, no memory cache here, use OrcWriter/ParquetWrite/etc directly
     * write data. May cause out-of-memory.
     */
    private class DirectSinkWriter implements SinkWriter {

        private RowDataRollingFileWriter writer;

        @Override
        public boolean write(InternalRow data) throws IOException {
            if (writer == null) {
                writer = createRollingRowWriter();
            }
            writer.write(data);
            return true;
        }

        @Override
        public List flush() throws IOException {
            List flushedFiles = new ArrayList<>();
            if (writer != null) {
                writer.close();
                flushedFiles.addAll(writer.result());
                writer = null;
            }
            return flushedFiles;
        }

        @Override
        public long memoryOccupancy() {
            return 0;
        }

        @Override
        public void close() {
            if (writer != null) {
                writer.abort();
                writer = null;
            }
        }

        @Override
        public void setMemoryPool(MemorySegmentPool memoryPool) {
            // do nothing
        }
    }

    /**
     * Use buffered writer, segment pooled from segment pool. When spillable, may delay checkpoint
     * acknowledge time. When non-spillable, may cause too many small files.
     */
    private class BufferedSinkWriter implements SinkWriter {

        private final boolean spillable;

        private RowBuffer writeBuffer;

        private BufferedSinkWriter(boolean spillable) {
            this.spillable = spillable;
        }

        @Override
        public boolean write(InternalRow data) throws IOException {
            return writeBuffer.put(data);
        }

        @Override
        public List flush() throws IOException {
            List flushedFiles = new ArrayList<>();
            if (writeBuffer != null) {
                writeBuffer.complete();
                RowDataRollingFileWriter writer = createRollingRowWriter();
                try (RowBuffer.RowBufferIterator iterator = writeBuffer.newIterator()) {
                    while (iterator.advanceNext()) {
                        writer.write(iterator.getRow());
                    }
                } finally {
                    writer.close();
                }
                flushedFiles.addAll(writer.result());
                // reuse writeBuffer
                writeBuffer.reset();
            }
            return flushedFiles;
        }

        @Override
        public long memoryOccupancy() {
            return writeBuffer.memoryOccupancy();
        }

        @Override
        public void close() {
            if (writeBuffer != null) {
                writeBuffer.reset();
                writeBuffer = null;
            }
        }

        @Override
        public void setMemoryPool(MemorySegmentPool memoryPool) {
            writeBuffer =
                    RowBuffer.getBuffer(
                            ioManager,
                            memoryPool,
                            new InternalRowSerializer(writeSchema),
                            spillable);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy