All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.paimon.flink.lookup.PrimaryKeyPartialLookupTable Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.paimon.flink.lookup;

import org.apache.paimon.annotation.VisibleForTesting;
import org.apache.paimon.data.BinaryRow;
import org.apache.paimon.data.InternalRow;
import org.apache.paimon.disk.IOManagerImpl;
import org.apache.paimon.flink.query.RemoteTableQuery;
import org.apache.paimon.io.DataFileMeta;
import org.apache.paimon.predicate.Predicate;
import org.apache.paimon.table.BucketMode;
import org.apache.paimon.table.FileStoreTable;
import org.apache.paimon.table.query.LocalTableQuery;
import org.apache.paimon.table.source.DataSplit;
import org.apache.paimon.table.source.Split;
import org.apache.paimon.table.source.StreamTableScan;
import org.apache.paimon.utils.Filter;
import org.apache.paimon.utils.ProjectedRow;

import javax.annotation.Nullable;

import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Set;

/** Lookup table for primary key which supports to read the LSM tree directly. */
public class PrimaryKeyPartialLookupTable implements LookupTable {

    private final QueryExecutorFactory executorFactory;
    private final FixedBucketFromPkExtractor extractor;
    @Nullable private final ProjectedRow keyRearrange;
    @Nullable private final ProjectedRow trimmedKeyRearrange;

    private Predicate specificPartition;
    @Nullable private Filter cacheRowFilter;
    private QueryExecutor queryExecutor;

    private PrimaryKeyPartialLookupTable(
            QueryExecutorFactory executorFactory, FileStoreTable table, List joinKey) {
        this.executorFactory = executorFactory;

        if (table.bucketMode() != BucketMode.HASH_FIXED) {
            throw new UnsupportedOperationException(
                    "Unsupported mode for partial lookup: " + table.bucketMode());
        }

        this.extractor = new FixedBucketFromPkExtractor(table.schema());

        ProjectedRow keyRearrange = null;
        if (!table.primaryKeys().equals(joinKey)) {
            keyRearrange =
                    ProjectedRow.from(
                            table.primaryKeys().stream()
                                    .map(joinKey::indexOf)
                                    .mapToInt(value -> value)
                                    .toArray());
        }
        this.keyRearrange = keyRearrange;

        List trimmedPrimaryKeys = table.schema().trimmedPrimaryKeys();
        ProjectedRow trimmedKeyRearrange = null;
        if (!trimmedPrimaryKeys.equals(joinKey)) {
            trimmedKeyRearrange =
                    ProjectedRow.from(
                            trimmedPrimaryKeys.stream()
                                    .map(joinKey::indexOf)
                                    .mapToInt(value -> value)
                                    .toArray());
        }
        this.trimmedKeyRearrange = trimmedKeyRearrange;
    }

    @VisibleForTesting
    QueryExecutor queryExecutor() {
        return queryExecutor;
    }

    @Override
    public void specificPartitionFilter(Predicate filter) {
        this.specificPartition = filter;
    }

    @Override
    public void open() throws Exception {
        this.queryExecutor = executorFactory.create(specificPartition, cacheRowFilter);
        refresh();
    }

    @Override
    public List get(InternalRow key) throws IOException {
        InternalRow adjustedKey = key;
        if (keyRearrange != null) {
            adjustedKey = keyRearrange.replaceRow(adjustedKey);
        }
        extractor.setRecord(adjustedKey);
        int bucket = extractor.bucket();
        BinaryRow partition = extractor.partition();

        InternalRow trimmedKey = key;
        if (trimmedKeyRearrange != null) {
            trimmedKey = trimmedKeyRearrange.replaceRow(trimmedKey);
        }

        InternalRow kv = queryExecutor.lookup(partition, bucket, trimmedKey);
        if (kv == null) {
            return Collections.emptyList();
        } else {
            return Collections.singletonList(kv);
        }
    }

    @Override
    public void refresh() {
        queryExecutor.refresh();
    }

    @Override
    public void specifyCacheRowFilter(Filter filter) {
        this.cacheRowFilter = filter;
    }

    @Override
    public void close() throws IOException {
        if (queryExecutor != null) {
            queryExecutor.close();
        }
    }

    public static PrimaryKeyPartialLookupTable createLocalTable(
            FileStoreTable table,
            int[] projection,
            File tempPath,
            List joinKey,
            Set requireCachedBucketIds) {
        return new PrimaryKeyPartialLookupTable(
                (filter, cacheRowFilter) ->
                        new LocalQueryExecutor(
                                new LookupFileStoreTable(table, joinKey),
                                projection,
                                tempPath,
                                filter,
                                requireCachedBucketIds,
                                cacheRowFilter),
                table,
                joinKey);
    }

    public static PrimaryKeyPartialLookupTable createRemoteTable(
            FileStoreTable table, int[] projection, List joinKey) {
        return new PrimaryKeyPartialLookupTable(
                (filter, cacheRowFilter) -> new RemoteQueryExecutor(table, projection),
                table,
                joinKey);
    }

    interface QueryExecutorFactory {
        QueryExecutor create(Predicate filter, @Nullable Filter cacheRowFilter);
    }

    interface QueryExecutor extends Closeable {

        InternalRow lookup(BinaryRow partition, int bucket, InternalRow key) throws IOException;

        void refresh();
    }

    static class LocalQueryExecutor implements QueryExecutor {

        private final LocalTableQuery tableQuery;
        private final StreamTableScan scan;

        private LocalQueryExecutor(
                FileStoreTable table,
                int[] projection,
                File tempPath,
                @Nullable Predicate filter,
                Set requireCachedBucketIds,
                @Nullable Filter cacheRowFilter) {
            this.tableQuery =
                    table.newLocalTableQuery()
                            .withValueProjection(projection)
                            .withIOManager(new IOManagerImpl(tempPath.toString()));

            if (cacheRowFilter != null) {
                this.tableQuery.withCacheRowFilter(cacheRowFilter);
            }

            this.scan =
                    table.newReadBuilder()
                            .dropStats()
                            .withFilter(filter)
                            .withBucketFilter(
                                    requireCachedBucketIds == null
                                            ? null
                                            : requireCachedBucketIds::contains)
                            .newStreamScan();
        }

        @Override
        public InternalRow lookup(BinaryRow partition, int bucket, InternalRow key)
                throws IOException {
            return tableQuery.lookup(partition, bucket, key);
        }

        @Override
        public void refresh() {
            while (true) {
                List splits = scan.plan().splits();
                if (splits.isEmpty()) {
                    return;
                }

                for (Split split : splits) {
                    if (!(split instanceof DataSplit)) {
                        throw new IllegalArgumentException(
                                "Unsupported split: " + split.getClass());
                    }
                    BinaryRow partition = ((DataSplit) split).partition();
                    int bucket = ((DataSplit) split).bucket();
                    List before = ((DataSplit) split).beforeFiles();
                    List after = ((DataSplit) split).dataFiles();

                    tableQuery.refreshFiles(partition, bucket, before, after);
                }
            }
        }

        @Override
        public void close() throws IOException {
            tableQuery.close();
        }
    }

    static class RemoteQueryExecutor implements QueryExecutor {

        private final RemoteTableQuery tableQuery;

        private RemoteQueryExecutor(FileStoreTable table, int[] projection) {
            this.tableQuery = new RemoteTableQuery(table).withValueProjection(projection);
        }

        @Override
        public InternalRow lookup(BinaryRow partition, int bucket, InternalRow key)
                throws IOException {
            return tableQuery.lookup(partition, bucket, key);
        }

        @Override
        public void refresh() {}

        @Override
        public void close() throws IOException {
            tableQuery.close();
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy