org.dinky.shaded.paimon.table.PrimaryKeyFileStoreTable Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of dinky-shaded-paimon Show documentation
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.table;

import org.dinky.shaded.paimon.CoreOptions;
import org.dinky.shaded.paimon.CoreOptions.ChangelogProducer;
import org.dinky.shaded.paimon.KeyValue;
import org.dinky.shaded.paimon.KeyValueFileStore;
import org.dinky.shaded.paimon.data.InternalRow;
import org.dinky.shaded.paimon.fs.FileIO;
import org.dinky.shaded.paimon.fs.Path;
import org.dinky.shaded.paimon.manifest.ManifestCacheFilter;
import org.dinky.shaded.paimon.mergetree.compact.LookupMergeFunction;
import org.dinky.shaded.paimon.mergetree.compact.MergeFunctionFactory;
import org.dinky.shaded.paimon.operation.FileStoreScan;
import org.dinky.shaded.paimon.operation.KeyValueFileStoreScan;
import org.dinky.shaded.paimon.operation.Lock;
import org.dinky.shaded.paimon.options.Options;
import org.dinky.shaded.paimon.predicate.Predicate;
import org.dinky.shaded.paimon.reader.RecordReader;
import org.dinky.shaded.paimon.schema.KeyValueFieldsExtractor;
import org.dinky.shaded.paimon.schema.TableSchema;
import org.dinky.shaded.paimon.table.sink.SequenceGenerator;
import org.dinky.shaded.paimon.table.sink.TableWriteImpl;
import org.dinky.shaded.paimon.table.source.InnerTableRead;
import org.dinky.shaded.paimon.table.source.KeyValueTableRead;
import org.dinky.shaded.paimon.table.source.MergeTreeSplitGenerator;
import org.dinky.shaded.paimon.table.source.SplitGenerator;
import org.dinky.shaded.paimon.table.source.ValueContentRowDataRecordIterator;
import org.dinky.shaded.paimon.types.RowType;

import java.util.List;
import java.util.function.BiConsumer;

import static org.dinky.shaded.paimon.predicate.PredicateBuilder.and;
import static org.dinky.shaded.paimon.predicate.PredicateBuilder.pickTransformFieldMapping;
import static org.dinky.shaded.paimon.predicate.PredicateBuilder.splitAnd;

/** {@link FileStoreTable} for primary key table. */
public class PrimaryKeyFileStoreTable extends AbstractFileStoreTable {

    private static final long serialVersionUID = 1L;

    private transient KeyValueFileStore lazyStore;

    PrimaryKeyFileStoreTable(FileIO fileIO, Path path, TableSchema tableSchema) {
        this(fileIO, path, tableSchema, new CatalogEnvironment(Lock.emptyFactory(), null, null));
    }

    PrimaryKeyFileStoreTable(
            FileIO fileIO,
            Path path,
            TableSchema tableSchema,
            CatalogEnvironment catalogEnvironment) {
        super(fileIO, path, tableSchema, catalogEnvironment);
    }

    @Override
    protected FileStoreTable copy(TableSchema newTableSchema) {
        return new PrimaryKeyFileStoreTable(fileIO, path, newTableSchema, catalogEnvironment);
    }

    @Override
    public KeyValueFileStore store() {
        if (lazyStore == null) {
            RowType rowType = tableSchema.logicalRowType();
            Options conf = Options.fromMap(tableSchema.options());
            CoreOptions options = new CoreOptions(conf);
            KeyValueFieldsExtractor extractor =
                    PrimaryKeyTableUtils.PrimaryKeyFieldsExtractor.EXTRACTOR;

            MergeFunctionFactory mfFactory =
                    PrimaryKeyTableUtils.createMergeFunctionFactory(tableSchema, extractor);
            if (options.changelogProducer() == ChangelogProducer.LOOKUP) {
                mfFactory =
                        LookupMergeFunction.wrap(
                                mfFactory, new RowType(extractor.keyFields(tableSchema)), rowType);
            }

            lazyStore =
                    new KeyValueFileStore(
                            fileIO(),
                            schemaManager(),
                            tableSchema.id(),
                            tableSchema.crossPartitionUpdate(),
                            options,
                            tableSchema.logicalPartitionType(),
                            PrimaryKeyTableUtils.addKeyNamePrefix(
                                    tableSchema.logicalBucketKeyType()),
                            new RowType(extractor.keyFields(tableSchema)),
                            rowType,
                            extractor,
                            mfFactory,
                            name());
        }
        return lazyStore;
    }

    @Override
    public SplitGenerator splitGenerator() {
        return new MergeTreeSplitGenerator(
                store().newKeyComparator(),
                store().options().splitTargetSize(),
                store().options().splitOpenFileCost());
    }

    @Override
    public boolean supportStreamingReadOverwrite() {
        return new CoreOptions(tableSchema.options()).streamingReadOverwrite();
    }

    @Override
    public BiConsumer nonPartitionFilterConsumer() {
        return (scan, predicate) -> {
            // currently we can only perform filter push down on keys
            // consider this case:
            //   data file 1: insert key = a, value = 1
            //   data file 2: update key = a, value = 2
            //   filter: value = 1
            // if we perform filter push down on values, data file 1 will be chosen, but data
            // file 2 will be ignored, and the final result will be key = a, value = 1 while the
            // correct result is an empty set
            List keyFilters =
                    pickTransformFieldMapping(
                            splitAnd(predicate),
                            tableSchema.fieldNames(),
                            tableSchema.trimmedPrimaryKeys());
            if (keyFilters.size() > 0) {
                ((KeyValueFileStoreScan) scan).withKeyFilter(and(keyFilters));
            }

            // support value filter in bucket level
            ((KeyValueFileStoreScan) scan).withValueFilter(predicate);
        };
    }

    @Override
    public InnerTableRead newRead() {
        return new KeyValueTableRead(store().newRead(), schema()) {

            @Override
            public void projection(int[][] projection) {
                read.withValueProjection(projection);
            }

            @Override
            protected RecordReader.RecordIterator rowDataRecordIteratorFromKv(
                    RecordReader.RecordIterator kvRecordIterator) {
                return new ValueContentRowDataRecordIterator(kvRecordIterator);
            }

            @Override
            public InnerTableRead forceKeepDelete() {
                read.forceKeepDelete();
                return this;
            }
        };
    }

    @Override
    public TableWriteImpl newWrite(String commitUser) {
        return newWrite(commitUser, null);
    }

    @Override
    public TableWriteImpl newWrite(
            String commitUser, ManifestCacheFilter manifestFilter) {
        final SequenceGenerator sequenceGenerator =
                SequenceGenerator.create(schema(), store().options());
        final KeyValue kv = new KeyValue();
        return new TableWriteImpl<>(
                store().newWrite(commitUser, manifestFilter),
                createRowKeyExtractor(),
                record -> {
                    long sequenceNumber =
                            sequenceGenerator == null
                                    ? KeyValue.UNKNOWN_SEQUENCE
                                    : sequenceGenerator.generate(record.row());
                    return kv.replace(
                            record.primaryKey(),
                            sequenceNumber,
                            record.row().getRowKind(),
                            record.row());
                },
                name());
    }
}