All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.operation.KeyValueFileStoreWrite Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.operation;

import org.dinky.shaded.paimon.CoreOptions;
import org.dinky.shaded.paimon.CoreOptions.ChangelogProducer;
import org.dinky.shaded.paimon.KeyValue;
import org.dinky.shaded.paimon.KeyValueFileStore;
import org.dinky.shaded.paimon.annotation.VisibleForTesting;
import org.dinky.shaded.paimon.codegen.RecordEqualiser;
import org.dinky.shaded.paimon.compact.CompactManager;
import org.dinky.shaded.paimon.compact.NoopCompactManager;
import org.dinky.shaded.paimon.data.BinaryRow;
import org.dinky.shaded.paimon.data.InternalRow;
import org.dinky.shaded.paimon.format.FileFormatDiscover;
import org.dinky.shaded.paimon.fs.FileIO;
import org.dinky.shaded.paimon.index.IndexMaintainer;
import org.dinky.shaded.paimon.io.DataFileMeta;
import org.dinky.shaded.paimon.io.KeyValueFileReaderFactory;
import org.dinky.shaded.paimon.io.KeyValueFileWriterFactory;
import org.dinky.shaded.paimon.lookup.hash.HashLookupStoreFactory;
import org.dinky.shaded.paimon.mergetree.ContainsLevels;
import org.dinky.shaded.paimon.mergetree.Levels;
import org.dinky.shaded.paimon.mergetree.LookupLevels;
import org.dinky.shaded.paimon.mergetree.MergeSorter;
import org.dinky.shaded.paimon.mergetree.MergeTreeWriter;
import org.dinky.shaded.paimon.mergetree.compact.CompactRewriter;
import org.dinky.shaded.paimon.mergetree.compact.CompactStrategy;
import org.dinky.shaded.paimon.mergetree.compact.FirstRowMergeTreeCompactRewriter;
import org.dinky.shaded.paimon.mergetree.compact.FullChangelogMergeTreeCompactRewriter;
import org.dinky.shaded.paimon.mergetree.compact.LookupCompaction;
import org.dinky.shaded.paimon.mergetree.compact.LookupMergeTreeCompactRewriter;
import org.dinky.shaded.paimon.mergetree.compact.MergeFunctionFactory;
import org.dinky.shaded.paimon.mergetree.compact.MergeTreeCompactManager;
import org.dinky.shaded.paimon.mergetree.compact.MergeTreeCompactRewriter;
import org.dinky.shaded.paimon.mergetree.compact.UniversalCompaction;
import org.dinky.shaded.paimon.schema.KeyValueFieldsExtractor;
import org.dinky.shaded.paimon.schema.SchemaManager;
import org.dinky.shaded.paimon.types.RowType;
import org.dinky.shaded.paimon.utils.CommitIncrement;
import org.dinky.shaded.paimon.utils.FileStorePathFactory;
import org.dinky.shaded.paimon.utils.SnapshotManager;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.function.Supplier;

import static org.dinky.shaded.paimon.io.DataFileMeta.getMaxSequenceNumber;

/** {@link FileStoreWrite} for {@link KeyValueFileStore}. */
public class KeyValueFileStoreWrite extends MemoryFileStoreWrite {

    private static final Logger LOG = LoggerFactory.getLogger(KeyValueFileStoreWrite.class);

    private final KeyValueFileReaderFactory.Builder readerFactoryBuilder;
    private final KeyValueFileWriterFactory.Builder writerFactoryBuilder;
    private final Supplier> keyComparatorSupplier;
    private final Supplier valueEqualiserSupplier;
    private final MergeFunctionFactory mfFactory;
    private final CoreOptions options;
    private final FileIO fileIO;
    private final RowType keyType;
    private final RowType valueType;

    public KeyValueFileStoreWrite(
            FileIO fileIO,
            SchemaManager schemaManager,
            long schemaId,
            String commitUser,
            RowType keyType,
            RowType valueType,
            Supplier> keyComparatorSupplier,
            Supplier valueEqualiserSupplier,
            MergeFunctionFactory mfFactory,
            FileStorePathFactory pathFactory,
            Map format2PathFactory,
            SnapshotManager snapshotManager,
            FileStoreScan scan,
            @Nullable IndexMaintainer.Factory indexFactory,
            CoreOptions options,
            KeyValueFieldsExtractor extractor,
            String tableName) {
        super(commitUser, snapshotManager, scan, options, indexFactory, tableName, pathFactory);
        this.fileIO = fileIO;
        this.keyType = keyType;
        this.valueType = valueType;
        this.readerFactoryBuilder =
                KeyValueFileReaderFactory.builder(
                        fileIO,
                        schemaManager,
                        schemaId,
                        keyType,
                        valueType,
                        FileFormatDiscover.of(options),
                        pathFactory,
                        extractor,
                        options);
        this.writerFactoryBuilder =
                KeyValueFileWriterFactory.builder(
                        fileIO,
                        schemaId,
                        keyType,
                        valueType,
                        options.fileFormat(),
                        format2PathFactory,
                        options.targetFileSize());
        this.keyComparatorSupplier = keyComparatorSupplier;
        this.valueEqualiserSupplier = valueEqualiserSupplier;
        this.mfFactory = mfFactory;
        this.options = options;
    }

    @Override
    protected MergeTreeWriter createWriter(
            BinaryRow partition,
            int bucket,
            List restoreFiles,
            @Nullable CommitIncrement restoreIncrement,
            ExecutorService compactExecutor) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(
                    "Creating merge tree writer for partition {} bucket {} from restored files {}",
                    partition,
                    bucket,
                    restoreFiles);
        }

        KeyValueFileWriterFactory writerFactory =
                writerFactoryBuilder.build(partition, bucket, options);
        Comparator keyComparator = keyComparatorSupplier.get();
        Levels levels = new Levels(keyComparator, restoreFiles, options.numLevels());
        UniversalCompaction universalCompaction =
                new UniversalCompaction(
                        options.maxSizeAmplificationPercent(),
                        options.sortedRunSizeRatio(),
                        options.numSortedRunCompactionTrigger());
        CompactStrategy compactStrategy =
                options.changelogProducer() == ChangelogProducer.LOOKUP
                        ? new LookupCompaction(universalCompaction)
                        : universalCompaction;
        CompactManager compactManager =
                createCompactManager(partition, bucket, compactStrategy, compactExecutor, levels);

        return new MergeTreeWriter(
                bufferSpillable(),
                options.localSortMaxNumFileHandles(),
                ioManager,
                compactManager,
                getMaxSequenceNumber(restoreFiles),
                keyComparator,
                mfFactory.create(),
                writerFactory,
                options.commitForceCompact(),
                options.changelogProducer(),
                restoreIncrement,
                getWriterMetrics(partition, bucket));
    }

    @VisibleForTesting
    public boolean bufferSpillable() {
        return options.writeBufferSpillable(fileIO.isObjectStore(), isStreamingMode);
    }

    private CompactManager createCompactManager(
            BinaryRow partition,
            int bucket,
            CompactStrategy compactStrategy,
            ExecutorService compactExecutor,
            Levels levels) {
        if (options.writeOnly()) {
            return new NoopCompactManager();
        } else {
            Comparator keyComparator = keyComparatorSupplier.get();
            CompactRewriter rewriter = createRewriter(partition, bucket, keyComparator, levels);
            return new MergeTreeCompactManager(
                    compactExecutor,
                    levels,
                    compactStrategy,
                    keyComparator,
                    options.compactionFileSize(),
                    options.numSortedRunStopTrigger(),
                    rewriter,
                    getCompactionMetrics(partition, bucket));
        }
    }

    private MergeTreeCompactRewriter createRewriter(
            BinaryRow partition, int bucket, Comparator keyComparator, Levels levels) {
        KeyValueFileReaderFactory readerFactory = readerFactoryBuilder.build(partition, bucket);
        KeyValueFileWriterFactory writerFactory =
                writerFactoryBuilder.build(partition, bucket, options);
        MergeSorter mergeSorter = new MergeSorter(options, keyType, valueType, ioManager);
        switch (options.changelogProducer()) {
            case FULL_COMPACTION:
                return new FullChangelogMergeTreeCompactRewriter(
                        options.numLevels() - 1,
                        readerFactory,
                        writerFactory,
                        keyComparator,
                        mfFactory,
                        mergeSorter,
                        valueEqualiserSupplier.get(),
                        options.changelogRowDeduplicate());
            case LOOKUP:
                if (options.mergeEngine() == CoreOptions.MergeEngine.FIRST_ROW) {
                    KeyValueFileReaderFactory keyOnlyReader =
                            readerFactoryBuilder
                                    .copyWithoutProjection()
                                    .withValueProjection(new int[0][])
                                    .build(partition, bucket);
                    ContainsLevels containsLevels = createContainsLevels(levels, keyOnlyReader);
                    return new FirstRowMergeTreeCompactRewriter(
                            containsLevels,
                            readerFactory,
                            writerFactory,
                            keyComparator,
                            mfFactory,
                            mergeSorter,
                            valueEqualiserSupplier.get(),
                            options.changelogRowDeduplicate());
                }
                LookupLevels lookupLevels = createLookupLevels(levels, readerFactory);
                return new LookupMergeTreeCompactRewriter(
                        lookupLevels,
                        readerFactory,
                        writerFactory,
                        keyComparator,
                        mfFactory,
                        mergeSorter,
                        valueEqualiserSupplier.get(),
                        options.changelogRowDeduplicate());
            default:
                return new MergeTreeCompactRewriter(
                        readerFactory, writerFactory, keyComparator, mfFactory, mergeSorter);
        }
    }

    private LookupLevels createLookupLevels(
            Levels levels, KeyValueFileReaderFactory readerFactory) {
        if (ioManager == null) {
            throw new RuntimeException(
                    "Can not use lookup, there is no temp disk directory to use.");
        }
        return new LookupLevels(
                levels,
                keyComparatorSupplier.get(),
                keyType,
                valueType,
                file ->
                        readerFactory.createRecordReader(
                                file.schemaId(), file.fileName(), file.fileSize(), file.level()),
                () -> ioManager.createChannel().getPathFile(),
                new HashLookupStoreFactory(
                        cacheManager,
                        options.toConfiguration().get(CoreOptions.LOOKUP_HASH_LOAD_FACTOR)),
                options.toConfiguration().get(CoreOptions.LOOKUP_CACHE_FILE_RETENTION),
                options.toConfiguration().get(CoreOptions.LOOKUP_CACHE_MAX_DISK_SIZE));
    }

    private ContainsLevels createContainsLevels(
            Levels levels, KeyValueFileReaderFactory readerFactory) {
        if (ioManager == null) {
            throw new RuntimeException(
                    "Can not use lookup, there is no temp disk directory to use.");
        }
        return new ContainsLevels(
                levels,
                keyComparatorSupplier.get(),
                keyType,
                file ->
                        readerFactory.createRecordReader(
                                file.schemaId(), file.fileName(), file.fileSize(), file.level()),
                () -> ioManager.createChannel().getPathFile(),
                new HashLookupStoreFactory(
                        cacheManager,
                        options.toConfiguration().get(CoreOptions.LOOKUP_HASH_LOAD_FACTOR)),
                options.toConfiguration().get(CoreOptions.LOOKUP_CACHE_FILE_RETENTION),
                options.toConfiguration().get(CoreOptions.LOOKUP_CACHE_MAX_DISK_SIZE));
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy