All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.paimon.flink.action.CloneAction Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.paimon.flink.action;

import org.apache.paimon.flink.clone.CloneFileInfo;
import org.apache.paimon.flink.clone.CloneSourceBuilder;
import org.apache.paimon.flink.clone.CopyFileOperator;
import org.apache.paimon.flink.clone.PickFilesForCloneOperator;
import org.apache.paimon.flink.clone.SnapshotHintChannelComputer;
import org.apache.paimon.flink.clone.SnapshotHintOperator;
import org.apache.paimon.flink.sink.FlinkStreamPartitioner;

import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.v2.DiscardingSink;

import java.util.HashMap;
import java.util.Map;

import static org.apache.paimon.utils.StringUtils.isNullOrWhitespaceOnly;

/** The Latest Snapshot clone action for Flink. */
public class CloneAction extends ActionBase {

    private final int parallelism;

    private Map sourceCatalogConfig;
    private final String database;
    private final String tableName;

    private Map targetCatalogConfig;
    private final String targetDatabase;
    private final String targetTableName;

    public CloneAction(
            String database,
            String tableName,
            Map sourceCatalogConfig,
            String targetDatabase,
            String targetTableName,
            Map targetCatalogConfig,
            String parallelismStr) {
        super(sourceCatalogConfig);

        this.parallelism =
                isNullOrWhitespaceOnly(parallelismStr)
                        ? env.getParallelism()
                        : Integer.parseInt(parallelismStr);

        this.sourceCatalogConfig = new HashMap<>();
        if (!sourceCatalogConfig.isEmpty()) {
            this.sourceCatalogConfig = sourceCatalogConfig;
        }
        this.database = database;
        this.tableName = tableName;

        this.targetCatalogConfig = new HashMap<>();
        if (!targetCatalogConfig.isEmpty()) {
            this.targetCatalogConfig = targetCatalogConfig;
        }
        this.targetDatabase = targetDatabase;
        this.targetTableName = targetTableName;
    }

    // ------------------------------------------------------------------------
    //  Java API
    // ------------------------------------------------------------------------

    @Override
    public void build() {
        try {
            buildCloneFlinkJob(env);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    private void buildCloneFlinkJob(StreamExecutionEnvironment env) throws Exception {
        DataStream> cloneSource =
                new CloneSourceBuilder(
                                env,
                                sourceCatalogConfig,
                                database,
                                tableName,
                                targetDatabase,
                                targetTableName)
                        .build();

        SingleOutputStreamOperator pickFilesForClone =
                cloneSource
                        .transform(
                                "Pick Files",
                                TypeInformation.of(CloneFileInfo.class),
                                new PickFilesForCloneOperator(
                                        sourceCatalogConfig, targetCatalogConfig))
                        .forceNonParallel();

        SingleOutputStreamOperator copyFiles =
                pickFilesForClone
                        .rebalance()
                        .transform(
                                "Copy Files",
                                TypeInformation.of(CloneFileInfo.class),
                                new CopyFileOperator(sourceCatalogConfig, targetCatalogConfig))
                        .setParallelism(parallelism);

        SingleOutputStreamOperator snapshotHintOperator =
                FlinkStreamPartitioner.partition(
                                copyFiles, new SnapshotHintChannelComputer(), parallelism)
                        .transform(
                                "Recreate Snapshot Hint",
                                TypeInformation.of(CloneFileInfo.class),
                                new SnapshotHintOperator(targetCatalogConfig))
                        .setParallelism(parallelism);

        snapshotHintOperator.sinkTo(new DiscardingSink<>()).name("end").setParallelism(1);
    }

    @Override
    public void run() throws Exception {
        build();
        execute("Clone job");
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy