All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.paimon.flink.source.DataTableSource Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.paimon.flink.source;

import org.apache.paimon.flink.log.LogStoreTableFactory;
import org.apache.paimon.predicate.Predicate;
import org.apache.paimon.stats.ColStats;
import org.apache.paimon.stats.Statistics;
import org.apache.paimon.table.Table;

import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.table.catalog.ObjectIdentifier;
import org.apache.flink.table.connector.source.abilities.SupportsDynamicFiltering;
import org.apache.flink.table.connector.source.abilities.SupportsStatisticReport;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.factories.DynamicTableFactory;
import org.apache.flink.table.plan.stats.ColumnStats;
import org.apache.flink.table.plan.stats.TableStats;

import javax.annotation.Nullable;

import java.util.AbstractMap;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;

import static org.apache.paimon.utils.Preconditions.checkState;

/**
 * A {@link BaseDataTableSource} implements {@link SupportsStatisticReport} and {@link
 * SupportsDynamicFiltering}.
 */
public class DataTableSource extends BaseDataTableSource
        implements SupportsStatisticReport, SupportsDynamicFiltering {

    @Nullable private List dynamicPartitionFilteringFields;

    public DataTableSource(
            ObjectIdentifier tableIdentifier,
            Table table,
            boolean streaming,
            DynamicTableFactory.Context context,
            @Nullable LogStoreTableFactory logStoreTableFactory) {
        this(
                tableIdentifier,
                table,
                streaming,
                context,
                logStoreTableFactory,
                null,
                null,
                null,
                null,
                null,
                null);
    }

    public DataTableSource(
            ObjectIdentifier tableIdentifier,
            Table table,
            boolean streaming,
            DynamicTableFactory.Context context,
            @Nullable LogStoreTableFactory logStoreTableFactory,
            @Nullable Predicate predicate,
            @Nullable int[][] projectFields,
            @Nullable Long limit,
            @Nullable WatermarkStrategy watermarkStrategy,
            @Nullable List dynamicPartitionFilteringFields,
            @Nullable Long countPushed) {
        super(
                tableIdentifier,
                table,
                streaming,
                context,
                logStoreTableFactory,
                predicate,
                projectFields,
                limit,
                watermarkStrategy,
                countPushed);
        this.dynamicPartitionFilteringFields = dynamicPartitionFilteringFields;
    }

    @Override
    public DataTableSource copy() {
        return new DataTableSource(
                tableIdentifier,
                table,
                streaming,
                context,
                logStoreTableFactory,
                predicate,
                projectFields,
                limit,
                watermarkStrategy,
                dynamicPartitionFilteringFields,
                countPushed);
    }

    @Override
    public TableStats reportStatistics() {
        if (streaming) {
            return TableStats.UNKNOWN;
        }
        Optional optionStatistics = table.statistics();
        if (optionStatistics.isPresent()) {
            Statistics statistics = optionStatistics.get();
            if (statistics.mergedRecordCount().isPresent()) {
                Map flinkColStats =
                        statistics.colStats().entrySet().stream()
                                .map(
                                        entry ->
                                                new AbstractMap.SimpleEntry<>(
                                                        entry.getKey(),
                                                        toFlinkColumnStats(entry.getValue())))
                                .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
                return new TableStats(statistics.mergedRecordCount().getAsLong(), flinkColStats);
            }
        }
        scanSplitsForInference();
        return new TableStats(splitStatistics.totalRowCount());
    }

    @Override
    public List listAcceptedFilterFields() {
        // note that streaming query doesn't support dynamic filtering
        return streaming ? Collections.emptyList() : table.partitionKeys();
    }

    @Override
    public void applyDynamicFiltering(List candidateFilterFields) {
        checkState(
                !streaming,
                "Cannot apply dynamic filtering to Paimon table '%s' when streaming reading.",
                table.name());

        checkState(
                !table.partitionKeys().isEmpty(),
                "Cannot apply dynamic filtering to non-partitioned Paimon table '%s'.",
                table.name());

        this.dynamicPartitionFilteringFields = candidateFilterFields;
    }

    @Override
    protected List dynamicPartitionFilteringFields() {
        return dynamicPartitionFilteringFields;
    }

    private ColumnStats toFlinkColumnStats(ColStats colStats) {
        return ColumnStats.Builder.builder()
                .setNdv(
                        colStats.distinctCount().isPresent()
                                ? colStats.distinctCount().getAsLong()
                                : null)
                .setNullCount(
                        colStats.nullCount().isPresent() ? colStats.nullCount().getAsLong() : null)
                .setAvgLen(
                        colStats.avgLen().isPresent()
                                ? (double) colStats.avgLen().getAsLong()
                                : null)
                .setMaxLen(
                        colStats.maxLen().isPresent() ? (int) colStats.maxLen().getAsLong() : null)
                .setMax(colStats.max().isPresent() ? colStats.max().get() : null)
                .setMin(colStats.min().isPresent() ? colStats.min().get() : null)
                .build();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy