org.apache.paimon.flink.action.CompactDatabaseAction Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.paimon.flink.action;
import org.apache.paimon.CoreOptions;
import org.apache.paimon.catalog.Catalog;
import org.apache.paimon.catalog.Identifier;
import org.apache.paimon.flink.FlinkConnectorOptions;
import org.apache.paimon.flink.compact.UnawareBucketCompactionTopoBuilder;
import org.apache.paimon.flink.sink.BucketsRowChannelComputer;
import org.apache.paimon.flink.sink.CompactorSinkBuilder;
import org.apache.paimon.flink.sink.MultiTablesCompactorSink;
import org.apache.paimon.flink.source.CompactorSourceBuilder;
import org.apache.paimon.flink.source.MultiTablesCompactorSourceBuilder;
import org.apache.paimon.options.Options;
import org.apache.paimon.table.FileStoreTable;
import org.apache.paimon.table.Table;
import org.apache.paimon.utils.Preconditions;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.configuration.ExecutionOptions;
import org.apache.flink.configuration.ReadableConfig;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.data.RowData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.apache.paimon.flink.sink.FlinkStreamPartitioner.partition;
/** Database compact action for Flink. */
public class CompactDatabaseAction extends ActionBase {
private static final Logger LOG = LoggerFactory.getLogger(CompactDatabaseAction.class);
private Pattern includingPattern = Pattern.compile(".*");
@Nullable private Pattern excludingPattern;
private Pattern databasePattern = Pattern.compile(".*");
private MultiTablesSinkMode databaseCompactMode = MultiTablesSinkMode.DIVIDED;
private final Map tableMap = new HashMap<>();
private Options tableOptions = new Options();
public CompactDatabaseAction(String warehouse, Map catalogConfig) {
super(warehouse, catalogConfig);
}
public CompactDatabaseAction includingDatabases(@Nullable String includingDatabases) {
if (includingDatabases != null) {
this.databasePattern = Pattern.compile(includingDatabases);
}
return this;
}
public CompactDatabaseAction includingTables(@Nullable String includingTables) {
if (includingTables != null) {
this.includingPattern = Pattern.compile(includingTables);
}
return this;
}
public CompactDatabaseAction excludingTables(@Nullable String excludingTables) {
this.excludingPattern = excludingTables == null ? null : Pattern.compile(excludingTables);
return this;
}
public CompactDatabaseAction withDatabaseCompactMode(@Nullable String mode) {
this.databaseCompactMode = MultiTablesSinkMode.fromString(mode);
return this;
}
public CompactDatabaseAction withTableOptions(Map tableOptions) {
this.tableOptions = Options.fromMap(tableOptions);
return this;
}
private boolean shouldCompactionTable(String paimonFullTableName) {
boolean shouldCompaction = includingPattern.matcher(paimonFullTableName).matches();
if (excludingPattern != null) {
shouldCompaction =
shouldCompaction && !excludingPattern.matcher(paimonFullTableName).matches();
}
if (!shouldCompaction) {
LOG.debug("Source table '{}' is excluded.", paimonFullTableName);
}
return shouldCompaction;
}
@Override
public void build() {
if (databaseCompactMode == MultiTablesSinkMode.DIVIDED) {
buildForDividedMode();
} else {
buildForCombinedMode();
}
}
private void buildForDividedMode() {
try {
List databases = catalog.listDatabases();
for (String databaseName : databases) {
Matcher databaseMatcher = databasePattern.matcher(databaseName);
if (databaseMatcher.matches()) {
List tables = catalog.listTables(databaseName);
for (String tableName : tables) {
String fullTableName = String.format("%s.%s", databaseName, tableName);
if (shouldCompactionTable(fullTableName)) {
Table table =
catalog.getTable(Identifier.create(databaseName, tableName));
if (!(table instanceof FileStoreTable)) {
LOG.error(
String.format(
"Only FileStoreTable supports compact action. The table type is '%s'.",
table.getClass().getName()));
continue;
}
Map dynamicOptions =
new HashMap<>(tableOptions.toMap());
dynamicOptions.put(CoreOptions.WRITE_ONLY.key(), "false");
FileStoreTable fileStoreTable =
(FileStoreTable) table.copy(dynamicOptions);
tableMap.put(fullTableName, fileStoreTable);
} else {
LOG.debug("The table {} is excluded.", fullTableName);
}
}
}
}
} catch (Catalog.DatabaseNotExistException | Catalog.TableNotExistException e) {
throw new RuntimeException(e);
}
Preconditions.checkState(
!tableMap.isEmpty(),
"no tables to be compacted. possible cause is that there are no tables detected after pattern matching");
ReadableConfig conf = env.getConfiguration();
boolean isStreaming =
conf.get(ExecutionOptions.RUNTIME_MODE) == RuntimeExecutionMode.STREAMING;
for (Map.Entry entry : tableMap.entrySet()) {
FileStoreTable fileStoreTable = entry.getValue();
switch (fileStoreTable.bucketMode()) {
case UNAWARE:
{
buildForUnawareBucketCompaction(
env, entry.getKey(), fileStoreTable, isStreaming);
break;
}
case FIXED:
case DYNAMIC:
default:
{
buildForTraditionalCompaction(
env, entry.getKey(), fileStoreTable, isStreaming);
}
}
}
}
private void buildForCombinedMode() {
ReadableConfig conf = env.getConfiguration();
boolean isStreaming =
conf.get(ExecutionOptions.RUNTIME_MODE) == RuntimeExecutionMode.STREAMING;
// TODO: Currently, multi-tables compaction don't support tables which bucketmode is UNWARE.
MultiTablesCompactorSourceBuilder sourceBuilder =
new MultiTablesCompactorSourceBuilder(
catalogLoader(),
databasePattern,
includingPattern,
excludingPattern,
tableOptions.get(CoreOptions.CONTINUOUS_DISCOVERY_INTERVAL).toMillis());
DataStream source =
sourceBuilder.withEnv(env).withContinuousMode(isStreaming).build();
DataStream partitioned =
partition(
source,
new BucketsRowChannelComputer(),
tableOptions.get(FlinkConnectorOptions.SINK_PARALLELISM));
new MultiTablesCompactorSink(catalogLoader(), tableOptions).sinkFrom(partitioned);
}
private void buildForTraditionalCompaction(
StreamExecutionEnvironment env,
String fullName,
FileStoreTable table,
boolean isStreaming) {
CompactorSourceBuilder sourceBuilder = new CompactorSourceBuilder(fullName, table);
CompactorSinkBuilder sinkBuilder = new CompactorSinkBuilder(table);
DataStreamSource source =
sourceBuilder.withEnv(env).withContinuousMode(isStreaming).build();
sinkBuilder.withInput(source).build();
}
private void buildForUnawareBucketCompaction(
StreamExecutionEnvironment env,
String fullName,
FileStoreTable table,
boolean isStreaming) {
UnawareBucketCompactionTopoBuilder unawareBucketCompactionTopoBuilder =
new UnawareBucketCompactionTopoBuilder(env, fullName, table);
unawareBucketCompactionTopoBuilder.withContinuousMode(isStreaming);
unawareBucketCompactionTopoBuilder.build();
}
@Override
public void run() throws Exception {
build();
execute("Compact database job");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy