org.apache.paimon.flink.sink.FlinkSinkBuilder Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of paimon-flink-common Show documentation
There is a newer version: 0.9.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.paimon.flink.sink;

import org.apache.paimon.annotation.Public;
import org.apache.paimon.data.InternalRow;
import org.apache.paimon.flink.FlinkConnectorOptions;
import org.apache.paimon.flink.FlinkRowWrapper;
import org.apache.paimon.flink.sink.index.GlobalDynamicBucketSink;
import org.apache.paimon.flink.sorter.TableSortInfo;
import org.apache.paimon.flink.sorter.TableSorter;
import org.apache.paimon.flink.sorter.TableSorter.OrderType;
import org.apache.paimon.table.BucketMode;
import org.apache.paimon.table.FileStoreTable;
import org.apache.paimon.table.Table;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSink;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.data.util.DataFormatConverters;
import org.apache.flink.table.runtime.typeutils.InternalTypeInfo;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.logical.RowType;
import org.apache.flink.types.Row;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;

import static org.apache.paimon.flink.FlinkConnectorOptions.CLUSTERING_SAMPLE_FACTOR;
import static org.apache.paimon.flink.FlinkConnectorOptions.CLUSTERING_STRATEGY;
import static org.apache.paimon.flink.FlinkConnectorOptions.MIN_CLUSTERING_SAMPLE_FACTOR;
import static org.apache.paimon.flink.sink.FlinkSink.isStreaming;
import static org.apache.paimon.flink.sink.FlinkStreamPartitioner.partition;
import static org.apache.paimon.flink.sorter.TableSorter.OrderType.HILBERT;
import static org.apache.paimon.flink.sorter.TableSorter.OrderType.ORDER;
import static org.apache.paimon.flink.sorter.TableSorter.OrderType.ZORDER;
import static org.apache.paimon.table.BucketMode.BUCKET_UNAWARE;
import static org.apache.paimon.utils.Preconditions.checkArgument;
import static org.apache.paimon.utils.Preconditions.checkState;

/**
 * DataStream API for building Flink Sink.
 *
 * @since 0.8
 */
@Public
public class FlinkSinkBuilder {

    private static final Logger LOG = LoggerFactory.getLogger(FlinkSinkBuilder.class);

    protected final FileStoreTable table;

    private DataStream input;
    @Nullable protected Map overwritePartition;
    @Nullable private Integer parallelism;
    @Nullable private TableSortInfo tableSortInfo;

    // ============== for extension ==============

    protected boolean compactSink = false;
    @Nullable protected LogSinkFunction logSinkFunction;

    public FlinkSinkBuilder(Table table) {
        if (!(table instanceof FileStoreTable)) {
            throw new UnsupportedOperationException("Unsupported table type: " + table);
        }
        this.table = (FileStoreTable) table;
    }

    /**
     * From {@link DataStream} with {@link Row}, need to provide a {@link DataType} for builder to
     * convert those {@link Row}s to a {@link RowData} DataStream.
     */
    public FlinkSinkBuilder forRow(DataStream input, DataType rowDataType) {
        RowType rowType = (RowType) rowDataType.getLogicalType();
        DataType[] fieldDataTypes = rowDataType.getChildren().toArray(new DataType[0]);

        DataFormatConverters.RowConverter converter =
                new DataFormatConverters.RowConverter(fieldDataTypes);
        this.input =
                input.transform(
                                "Map",
                                InternalTypeInfo.of(rowType),
                                new StreamMapWithForwardingRecordAttributes<>(
                                        (MapFunction) converter::toInternal))
                        .setParallelism(input.getParallelism());
        return this;
    }

    /** From {@link DataStream} with {@link RowData}. */
    public FlinkSinkBuilder forRowData(DataStream input) {
        this.input = input;
        return this;
    }

    /** INSERT OVERWRITE. */
    public FlinkSinkBuilder overwrite() {
        return overwrite(new HashMap<>());
    }

    /** INSERT OVERWRITE PARTITION (...). */
    public FlinkSinkBuilder overwrite(Map overwritePartition) {
        this.overwritePartition = overwritePartition;
        return this;
    }

    /** Set sink parallelism. */
    public FlinkSinkBuilder parallelism(int parallelism) {
        this.parallelism = parallelism;
        return this;
    }

    /** Clustering the input data if possible. */
    public FlinkSinkBuilder clusteringIfPossible(
            String clusteringColumns,
            String clusteringStrategy,
            boolean sortInCluster,
            int sampleFactor) {
        // The clustering will be skipped if the clustering columns are empty or the execution
        // mode is STREAMING or the table type is illegal.
        if (clusteringColumns == null || clusteringColumns.isEmpty()) {
            return this;
        }
        checkState(input != null, "The input stream should be specified earlier.");
        if (isStreaming(input) || !table.bucketMode().equals(BUCKET_UNAWARE)) {
            LOG.warn(
                    "Clustering is enabled; however, it has been skipped as "
                            + "it only supports the bucket unaware table without primary keys and "
                            + "BATCH execution mode.");
            return this;
        }
        // If the clustering is not skipped, check the clustering column names and sample
        // factor value.
        List columns = Arrays.asList(clusteringColumns.split(","));
        List fieldNames = table.schema().fieldNames();
        checkState(
                new HashSet<>(fieldNames).containsAll(new HashSet<>(columns)),
                String.format(
                        "Field names %s should contains all clustering column names %s.",
                        fieldNames, columns));
        checkState(
                sampleFactor >= MIN_CLUSTERING_SAMPLE_FACTOR,
                "The minimum allowed "
                        + CLUSTERING_SAMPLE_FACTOR.key()
                        + " is "
                        + MIN_CLUSTERING_SAMPLE_FACTOR
                        + ".");
        TableSortInfo.Builder sortInfoBuilder = new TableSortInfo.Builder();
        if (clusteringStrategy.equals(CLUSTERING_STRATEGY.defaultValue())) {
            if (columns.size() == 1) {
                sortInfoBuilder.setSortStrategy(ORDER);
            } else if (columns.size() < 5) {
                sortInfoBuilder.setSortStrategy(ZORDER);
            } else {
                sortInfoBuilder.setSortStrategy(HILBERT);
            }
        } else {
            sortInfoBuilder.setSortStrategy(OrderType.of(clusteringStrategy));
        }
        int upstreamParallelism = input.getParallelism();
        String sinkParallelismValue =
                table.options().get(FlinkConnectorOptions.SINK_PARALLELISM.key());
        int sinkParallelism =
                sinkParallelismValue == null
                        ? upstreamParallelism
                        : Integer.parseInt(sinkParallelismValue);
        sortInfoBuilder
                .setSortColumns(columns)
                .setSortInCluster(sortInCluster)
                .setSinkParallelism(sinkParallelism);
        int globalSampleSize = sinkParallelism * sampleFactor;
        // If the adaptive scheduler is not enabled, the local sample size is determined by the
        // division of global sample size by the upstream parallelism, which limits total
        // received data of global sample node. If the adaptive scheduler is enabled, the
        // local sample size will equal to sinkParallelism * minimum sample factor.
        int localSampleSize =
                upstreamParallelism > 0
                        ? Math.max(sampleFactor, globalSampleSize / upstreamParallelism)
                        : sinkParallelism * MIN_CLUSTERING_SAMPLE_FACTOR;
        this.tableSortInfo =
                sortInfoBuilder
                        .setRangeNumber(sinkParallelism)
                        .setGlobalSampleSize(globalSampleSize)
                        .setLocalSampleSize(localSampleSize)
                        .build();
        return this;
    }

    /** Build {@link DataStreamSink}. */
    public DataStreamSink build() {
        setParallelismIfAdaptiveConflict();
        input = trySortInput(input);
        DataStream input = mapToInternalRow(this.input, table.rowType());
        if (table.coreOptions().localMergeEnabled() && table.schema().primaryKeys().size() > 0) {
            input =
                    input.forward()
                            .transform(
                                    "local merge",
                                    input.getType(),
                                    new LocalMergeOperator.Factory(table.schema()))
                            .setParallelism(input.getParallelism());
        }

        BucketMode bucketMode = table.bucketMode();
        switch (bucketMode) {
            case HASH_FIXED:
                return buildForFixedBucket(input);
            case HASH_DYNAMIC:
                return buildDynamicBucketSink(input, false);
            case CROSS_PARTITION:
                return buildDynamicBucketSink(input, true);
            case BUCKET_UNAWARE:
                return buildUnawareBucketSink(input);
            default:
                throw new UnsupportedOperationException("Unsupported bucket mode: " + bucketMode);
        }
    }

    protected DataStream mapToInternalRow(
            DataStream input, org.apache.paimon.types.RowType rowType) {
        return input.transform(
                        "Map",
                        org.apache.paimon.flink.utils.InternalTypeInfo.fromRowType(rowType),
                        new StreamMapWithForwardingRecordAttributes<>(
                                (MapFunction) FlinkRowWrapper::new))
                .setParallelism(input.getParallelism());
    }

    protected DataStreamSink buildDynamicBucketSink(
            DataStream input, boolean globalIndex) {
        checkArgument(logSinkFunction == null, "Dynamic bucket mode can not work with log system.");
        return compactSink && !globalIndex
                // todo support global index sort compact
                ? new DynamicBucketCompactSink(table, overwritePartition).build(input, parallelism)
                : globalIndex
                        ? new GlobalDynamicBucketSink(table, overwritePartition)
                                .build(input, parallelism)
                        : new RowDynamicBucketSink(table, overwritePartition)
                                .build(input, parallelism);
    }

    protected DataStreamSink buildForFixedBucket(DataStream input) {
        int bucketNums = table.bucketSpec().getNumBuckets();
        if (parallelism == null
                && bucketNums < input.getParallelism()
                && table.partitionKeys().isEmpty()) {
            // For non-partitioned table, if the bucketNums is less than job parallelism.
            LOG.warn(
                    "For non-partitioned table, if bucketNums is less than the parallelism of inputOperator,"
                            + " then the parallelism of writerOperator will be set to bucketNums.");
            parallelism = bucketNums;
        }
        DataStream partitioned =
                partition(
                        input,
                        new RowDataChannelComputer(table.schema(), logSinkFunction != null),
                        parallelism);
        FixedBucketSink sink = new FixedBucketSink(table, overwritePartition, logSinkFunction);
        return sink.sinkFrom(partitioned);
    }

    private DataStreamSink buildUnawareBucketSink(DataStream input) {
        checkArgument(
                table.primaryKeys().isEmpty(),
                "Unaware bucket mode only works with append-only table for now.");
        return new RowUnawareBucketSink(table, overwritePartition, logSinkFunction, parallelism)
                .sinkFrom(input);
    }

    private DataStream trySortInput(DataStream input) {
        if (tableSortInfo != null) {
            TableSorter sorter =
                    TableSorter.getSorter(
                            input.getExecutionEnvironment(), input, table, tableSortInfo);
            return sorter.sort();
        }
        return input;
    }

    private void setParallelismIfAdaptiveConflict() {
        try {
            boolean parallelismUndefined = parallelism == null || parallelism == -1;
            boolean isStreaming = isStreaming(input);
            boolean isAdaptiveParallelismEnabled =
                    AdaptiveParallelism.isEnabled(input.getExecutionEnvironment());
            boolean writeMCacheEnabled = table.coreOptions().writeManifestCache().getBytes() > 0;
            boolean hashDynamicMode = table.bucketMode() == BucketMode.HASH_DYNAMIC;
            if (parallelismUndefined
                    && !isStreaming
                    && isAdaptiveParallelismEnabled
                    && (writeMCacheEnabled || hashDynamicMode)) {
                List messages = new ArrayList<>();
                if (writeMCacheEnabled) {
                    messages.add("Write Manifest Cache");
                }
                if (hashDynamicMode) {
                    messages.add("Dynamic Bucket Mode");
                }

                String parallelismSource;
                if (input.getParallelism() > 0) {
                    parallelismSource = "input parallelism";
                    parallelism = input.getParallelism();
                } else {
                    parallelismSource = "AdaptiveBatchScheduler's default max parallelism";
                    parallelism =
                            AdaptiveParallelism.getDefaultMaxParallelism(
                                    input.getExecutionEnvironment().getConfiguration(),
                                    input.getExecutionConfig());
                }
                String msg =
                        String.format(
                                "Paimon Sink with %s does not support Flink's Adaptive Parallelism mode. "
                                        + "Configuring sink parallelism to `%s` instead. You can also set Paimon "
                                        + "`sink.parallelism` manually to override this configuration.",
                                messages, parallelismSource);
                LOG.warn(msg);
            }
        } catch (NoClassDefFoundError ignored) {
            // before 1.17, there is no adaptive parallelism
        }
    }
}