org.apache.paimon.flink.sink.StoreMultiCommitter Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.paimon.flink.sink;
import org.apache.paimon.catalog.Catalog;
import org.apache.paimon.catalog.CatalogLoader;
import org.apache.paimon.catalog.Identifier;
import org.apache.paimon.manifest.ManifestCommittable;
import org.apache.paimon.manifest.WrappedManifestCommittable;
import org.apache.paimon.table.FileStoreTable;
import org.apache.paimon.table.sink.CommitMessage;
import org.apache.flink.api.java.tuple.Tuple2;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/**
* {@link StoreMultiCommitter} for multiple dynamic store. During the commit process, it will group
* the WrappedManifestCommittables by their table identifier and use different committers to commit
* to different tables.
*/
public class StoreMultiCommitter
implements Committer {
private final Catalog catalog;
private final Context context;
// To make the commit behavior consistent with that of Committer,
// StoreMultiCommitter manages multiple committers which are
// referenced by table id.
private final Map tableCommitters;
// Currently, only compact_database job needs to ignore empty commit and set dynamic options
private final boolean ignoreEmptyCommit;
private final Map dynamicOptions;
public StoreMultiCommitter(CatalogLoader catalogLoader, Context context) {
this(catalogLoader, context, false, Collections.emptyMap());
}
public StoreMultiCommitter(
CatalogLoader catalogLoader,
Context context,
boolean ignoreEmptyCommit,
Map dynamicOptions) {
this.catalog = catalogLoader.load();
this.context = context;
this.ignoreEmptyCommit = ignoreEmptyCommit;
this.dynamicOptions = dynamicOptions;
this.tableCommitters = new HashMap<>();
}
@Override
public boolean forceCreatingSnapshot() {
return true;
}
@Override
public WrappedManifestCommittable combine(
long checkpointId, long watermark, List committables) {
WrappedManifestCommittable wrappedManifestCommittable =
new WrappedManifestCommittable(checkpointId, watermark);
return combine(checkpointId, watermark, wrappedManifestCommittable, committables);
}
@Override
public WrappedManifestCommittable combine(
long checkpointId,
long watermark,
WrappedManifestCommittable wrappedManifestCommittable,
List committables) {
for (MultiTableCommittable committable : committables) {
Identifier identifier =
Identifier.create(committable.getDatabase(), committable.getTable());
ManifestCommittable manifestCommittable =
wrappedManifestCommittable.computeCommittableIfAbsent(
identifier, checkpointId, watermark);
switch (committable.kind()) {
case FILE:
CommitMessage file = (CommitMessage) committable.wrappedCommittable();
manifestCommittable.addFileCommittable(file);
break;
case LOG_OFFSET:
LogOffsetCommittable offset =
(LogOffsetCommittable) committable.wrappedCommittable();
StoreCommitter committer = tableCommitters.get(identifier);
manifestCommittable.addLogOffset(
offset.bucket(), offset.offset(), committer.allowLogOffsetDuplicate());
break;
}
}
return wrappedManifestCommittable;
}
@Override
public void commit(List committables)
throws IOException, InterruptedException {
if (committables.isEmpty()) {
return;
}
// key by table id
Map> committableMap = groupByTable(committables);
committableMap.keySet().forEach(this::getStoreCommitter);
long checkpointId = committables.get(0).checkpointId();
long watermark = committables.get(0).watermark();
for (Map.Entry entry : tableCommitters.entrySet()) {
List committableList = committableMap.get(entry.getKey());
StoreCommitter committer = entry.getValue();
if (committableList != null) {
committer.commit(committableList);
} else {
// try best to commit empty snapshot, but tableCommitters may not contain all tables
if (committer.forceCreatingSnapshot()) {
ManifestCommittable combine =
committer.combine(checkpointId, watermark, Collections.emptyList());
committer.commit(Collections.singletonList(combine));
}
}
}
}
@Override
public int filterAndCommit(
List globalCommittables, boolean checkAppendFiles)
throws IOException {
int result = 0;
for (Map.Entry> entry :
groupByTable(globalCommittables).entrySet()) {
result +=
getStoreCommitter(entry.getKey())
.filterAndCommit(entry.getValue(), checkAppendFiles);
}
return result;
}
private Map> groupByTable(
List committables) {
return committables.stream()
.flatMap(
wrapped -> {
Map manifestCommittables =
wrapped.manifestCommittables();
return manifestCommittables.entrySet().stream()
.map(entry -> Tuple2.of(entry.getKey(), entry.getValue()));
})
.collect(
Collectors.groupingBy(
t -> t.f0, Collectors.mapping(t -> t.f1, Collectors.toList())));
}
@Override
public Map> groupByCheckpoint(
Collection committables) {
Map> grouped = new HashMap<>();
for (MultiTableCommittable c : committables) {
grouped.computeIfAbsent(c.checkpointId(), k -> new ArrayList<>()).add(c);
}
return grouped;
}
private StoreCommitter getStoreCommitter(Identifier tableId) {
StoreCommitter committer = tableCommitters.get(tableId);
if (committer == null) {
FileStoreTable table;
try {
table = (FileStoreTable) catalog.getTable(tableId).copy(dynamicOptions);
} catch (Catalog.TableNotExistException e) {
throw new RuntimeException(
String.format(
"Failed to get committer for table %s", tableId.getFullName()),
e);
}
committer =
new StoreCommitter(
table,
table.newCommit(context.commitUser())
.ignoreEmptyCommit(ignoreEmptyCommit),
context);
tableCommitters.put(tableId, committer);
}
return committer;
}
@Override
public void close() throws Exception {
for (StoreCommitter committer : tableCommitters.values()) {
committer.close();
}
if (catalog != null) {
catalog.close();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy