org.apache.paimon.flink.action.CloneAction Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.paimon.flink.action;
import org.apache.paimon.flink.clone.CloneFileInfo;
import org.apache.paimon.flink.clone.CloneSourceBuilder;
import org.apache.paimon.flink.clone.CopyFileOperator;
import org.apache.paimon.flink.clone.PickFilesForCloneOperator;
import org.apache.paimon.flink.clone.SnapshotHintChannelComputer;
import org.apache.paimon.flink.clone.SnapshotHintOperator;
import org.apache.paimon.flink.sink.FlinkStreamPartitioner;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.v2.DiscardingSink;
import java.util.HashMap;
import java.util.Map;
import static org.apache.paimon.utils.StringUtils.isNullOrWhitespaceOnly;
/** The Latest Snapshot clone action for Flink. */
public class CloneAction extends ActionBase {
private final int parallelism;
private Map sourceCatalogConfig;
private final String database;
private final String tableName;
private Map targetCatalogConfig;
private final String targetDatabase;
private final String targetTableName;
public CloneAction(
String database,
String tableName,
Map sourceCatalogConfig,
String targetDatabase,
String targetTableName,
Map targetCatalogConfig,
String parallelismStr) {
super(sourceCatalogConfig);
this.parallelism =
isNullOrWhitespaceOnly(parallelismStr)
? env.getParallelism()
: Integer.parseInt(parallelismStr);
this.sourceCatalogConfig = new HashMap<>();
if (!sourceCatalogConfig.isEmpty()) {
this.sourceCatalogConfig = sourceCatalogConfig;
}
this.database = database;
this.tableName = tableName;
this.targetCatalogConfig = new HashMap<>();
if (!targetCatalogConfig.isEmpty()) {
this.targetCatalogConfig = targetCatalogConfig;
}
this.targetDatabase = targetDatabase;
this.targetTableName = targetTableName;
}
// ------------------------------------------------------------------------
// Java API
// ------------------------------------------------------------------------
@Override
public void build() {
try {
buildCloneFlinkJob(env);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private void buildCloneFlinkJob(StreamExecutionEnvironment env) throws Exception {
DataStream> cloneSource =
new CloneSourceBuilder(
env,
sourceCatalogConfig,
database,
tableName,
targetDatabase,
targetTableName)
.build();
SingleOutputStreamOperator pickFilesForClone =
cloneSource
.transform(
"Pick Files",
TypeInformation.of(CloneFileInfo.class),
new PickFilesForCloneOperator(
sourceCatalogConfig, targetCatalogConfig))
.forceNonParallel();
SingleOutputStreamOperator copyFiles =
pickFilesForClone
.rebalance()
.transform(
"Copy Files",
TypeInformation.of(CloneFileInfo.class),
new CopyFileOperator(sourceCatalogConfig, targetCatalogConfig))
.setParallelism(parallelism);
SingleOutputStreamOperator snapshotHintOperator =
FlinkStreamPartitioner.partition(
copyFiles, new SnapshotHintChannelComputer(), parallelism)
.transform(
"Recreate Snapshot Hint",
TypeInformation.of(CloneFileInfo.class),
new SnapshotHintOperator(targetCatalogConfig))
.setParallelism(parallelism);
snapshotHintOperator.sinkTo(new DiscardingSink<>()).name("end").setParallelism(1);
}
@Override
public void run() throws Exception {
build();
execute("Clone job");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy