All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.operation.AppendOnlyFileStoreWrite Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.operation;

import org.dinky.shaded.paimon.AppendOnlyFileStore;
import org.dinky.shaded.paimon.CoreOptions;
import org.dinky.shaded.paimon.append.AppendOnlyCompactManager;
import org.dinky.shaded.paimon.append.AppendOnlyWriter;
import org.dinky.shaded.paimon.compact.CompactManager;
import org.dinky.shaded.paimon.compact.NoopCompactManager;
import org.dinky.shaded.paimon.data.BinaryRow;
import org.dinky.shaded.paimon.data.InternalRow;
import org.dinky.shaded.paimon.format.FileFormat;
import org.dinky.shaded.paimon.fs.FileIO;
import org.dinky.shaded.paimon.io.DataFileMeta;
import org.dinky.shaded.paimon.io.DataFilePathFactory;
import org.dinky.shaded.paimon.io.RowDataRollingFileWriter;
import org.dinky.shaded.paimon.reader.RecordReaderIterator;
import org.dinky.shaded.paimon.statistics.FieldStatsCollector;
import org.dinky.shaded.paimon.table.BucketMode;
import org.dinky.shaded.paimon.table.source.DataSplit;
import org.dinky.shaded.paimon.types.RowType;
import org.dinky.shaded.paimon.utils.CommitIncrement;
import org.dinky.shaded.paimon.utils.FileStorePathFactory;
import org.dinky.shaded.paimon.utils.LongCounter;
import org.dinky.shaded.paimon.utils.RecordWriter;
import org.dinky.shaded.paimon.utils.SnapshotManager;
import org.dinky.shaded.paimon.utils.StatsCollectorFactories;

import javax.annotation.Nullable;

import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutorService;

import static org.dinky.shaded.paimon.io.DataFileMeta.getMaxSequenceNumber;

/** {@link FileStoreWrite} for {@link AppendOnlyFileStore}. */
public class AppendOnlyFileStoreWrite extends MemoryFileStoreWrite {

    private final FileIO fileIO;
    private final AppendOnlyFileStoreRead read;
    private final long schemaId;
    private final RowType rowType;
    private final FileFormat fileFormat;
    private final FileStorePathFactory pathFactory;
    private final long targetFileSize;
    private final int compactionMinFileNum;
    private final int compactionMaxFileNum;
    private final boolean commitForceCompact;
    private final String fileCompression;
    private final boolean useWriteBuffer;
    private final boolean spillable;
    private final FieldStatsCollector.Factory[] statsCollectors;

    private boolean skipCompaction;
    private BucketMode bucketMode = BucketMode.FIXED;

    public AppendOnlyFileStoreWrite(
            FileIO fileIO,
            AppendOnlyFileStoreRead read,
            long schemaId,
            String commitUser,
            RowType rowType,
            FileStorePathFactory pathFactory,
            SnapshotManager snapshotManager,
            FileStoreScan scan,
            CoreOptions options,
            String tableName) {
        super(commitUser, snapshotManager, scan, options, null, tableName, pathFactory);
        this.fileIO = fileIO;
        this.read = read;
        this.schemaId = schemaId;
        this.rowType = rowType;
        this.fileFormat = options.fileFormat();
        this.pathFactory = pathFactory;
        this.targetFileSize = options.targetFileSize();
        this.compactionMinFileNum = options.compactionMinFileNum();
        this.compactionMaxFileNum = options.compactionMaxFileNum();
        this.commitForceCompact = options.commitForceCompact();
        this.skipCompaction = options.writeOnly();
        this.fileCompression = options.fileCompression();
        this.useWriteBuffer = options.useWriteBufferForAppend();
        this.spillable = options.writeBufferSpillable(fileIO.isObjectStore(), isStreamingMode);
        this.statsCollectors =
                StatsCollectorFactories.createStatsFactories(options, rowType.getFieldNames());
    }

    @Override
    protected RecordWriter createWriter(
            BinaryRow partition,
            int bucket,
            List restoredFiles,
            @Nullable CommitIncrement restoreIncrement,
            ExecutorService compactExecutor) {
        // let writer and compact manager hold the same reference
        // and make restore files mutable to update
        long maxSequenceNumber = getMaxSequenceNumber(restoredFiles);
        DataFilePathFactory factory = pathFactory.createDataFilePathFactory(partition, bucket);
        CompactManager compactManager =
                skipCompaction
                        ? new NoopCompactManager()
                        : new AppendOnlyCompactManager(
                                compactExecutor,
                                restoredFiles,
                                compactionMinFileNum,
                                compactionMaxFileNum,
                                targetFileSize,
                                compactRewriter(partition, bucket),
                                getCompactionMetrics(partition, bucket));

        return new AppendOnlyWriter(
                fileIO,
                ioManager,
                schemaId,
                fileFormat,
                targetFileSize,
                rowType,
                maxSequenceNumber,
                compactManager,
                commitForceCompact,
                factory,
                restoreIncrement,
                useWriteBuffer,
                spillable,
                fileCompression,
                statsCollectors,
                getWriterMetrics(partition, bucket));
    }

    public AppendOnlyCompactManager.CompactRewriter compactRewriter(
            BinaryRow partition, int bucket) {
        return toCompact -> {
            if (toCompact.isEmpty()) {
                return Collections.emptyList();
            }
            RowDataRollingFileWriter rewriter =
                    new RowDataRollingFileWriter(
                            fileIO,
                            schemaId,
                            fileFormat,
                            targetFileSize,
                            rowType,
                            pathFactory.createDataFilePathFactory(partition, bucket),
                            new LongCounter(toCompact.get(0).minSequenceNumber()),
                            fileCompression,
                            statsCollectors);
            try {
                rewriter.write(
                        new RecordReaderIterator<>(
                                read.createReader(
                                        DataSplit.builder()
                                                .withPartition(partition)
                                                .withBucket(bucket)
                                                .withDataFiles(toCompact)
                                                .build())));
            } finally {
                rewriter.close();
            }
            return rewriter.result();
        };
    }

    public AppendOnlyFileStoreWrite withBucketMode(BucketMode bucketMode) {
        // AppendOnlyFileStoreWrite is sensitive with bucket mode. It will act difference in
        // unaware-bucket mode (no compaction and force empty-writer).
        this.bucketMode = bucketMode;
        if (bucketMode == BucketMode.UNAWARE) {
            super.withIgnorePreviousFiles(true);
            skipCompaction = true;
        }
        return this;
    }

    @Override
    public void withIgnorePreviousFiles(boolean ignorePrevious) {
        // in unaware bucket mode, we need all writers to be empty
        super.withIgnorePreviousFiles(ignorePrevious || bucketMode == BucketMode.UNAWARE);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy