
org.dinky.shaded.paimon.table.sink.TableCommitImpl Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dinky.shaded.paimon.table.sink;
import org.dinky.shaded.paimon.annotation.VisibleForTesting;
import org.dinky.shaded.paimon.consumer.ConsumerManager;
import org.dinky.shaded.paimon.data.BinaryRow;
import org.dinky.shaded.paimon.fs.Path;
import org.dinky.shaded.paimon.index.IndexFileMeta;
import org.dinky.shaded.paimon.io.DataFileMeta;
import org.dinky.shaded.paimon.io.DataFilePathFactory;
import org.dinky.shaded.paimon.manifest.ManifestCommittable;
import org.dinky.shaded.paimon.metrics.MetricRegistry;
import org.dinky.shaded.paimon.operation.FileStoreCommit;
import org.dinky.shaded.paimon.operation.FileStoreExpire;
import org.dinky.shaded.paimon.operation.Lock;
import org.dinky.shaded.paimon.operation.PartitionExpire;
import org.dinky.shaded.paimon.operation.metrics.CommitMetrics;
import org.dinky.shaded.paimon.tag.TagAutoCreation;
import org.dinky.shaded.paimon.utils.ExecutorThreadFactory;
import org.dinky.shaded.paimon.utils.FileUtils;
import org.dinky.shaded.paimon.utils.IOUtils;
import org.dinky.shaded.paimon.utils.Pair;
import org.dinky.shaded.paimon.shade.guava30.com.google.common.util.concurrent.MoreExecutors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import static org.dinky.shaded.paimon.CoreOptions.ExpireExecutionMode;
import static org.dinky.shaded.paimon.utils.Preconditions.checkState;
/**
* An abstraction layer above {@link FileStoreCommit} and {@link FileStoreExpire} to provide
* snapshot commit and expiration.
*/
public class TableCommitImpl implements InnerTableCommit {
private static final Logger LOG = LoggerFactory.getLogger(TableCommitImpl.class);
private final FileStoreCommit commit;
private final List commitCallbacks;
@Nullable private final FileStoreExpire expire;
@Nullable private final PartitionExpire partitionExpire;
@Nullable private final TagAutoCreation tagAutoCreation;
private final Lock lock;
@Nullable private final Duration consumerExpireTime;
private final ConsumerManager consumerManager;
private final ExecutorService expireMainExecutor;
private final AtomicReference expireError;
private final String tableName;
@Nullable private Map overwritePartition = null;
private boolean batchCommitted = false;
public TableCommitImpl(
FileStoreCommit commit,
List commitCallbacks,
@Nullable FileStoreExpire expire,
@Nullable PartitionExpire partitionExpire,
@Nullable TagAutoCreation tagAutoCreation,
Lock lock,
@Nullable Duration consumerExpireTime,
ConsumerManager consumerManager,
ExpireExecutionMode expireExecutionMode,
String tableName) {
commit.withLock(lock);
if (expire != null) {
expire.withLock(lock);
}
if (partitionExpire != null) {
partitionExpire.withLock(lock);
}
this.commit = commit;
this.commitCallbacks = commitCallbacks;
this.expire = expire;
this.partitionExpire = partitionExpire;
this.tagAutoCreation = tagAutoCreation;
this.lock = lock;
this.consumerExpireTime = consumerExpireTime;
this.consumerManager = consumerManager;
this.expireMainExecutor =
expireExecutionMode == ExpireExecutionMode.SYNC
? MoreExecutors.newDirectExecutorService()
: Executors.newSingleThreadExecutor(
new ExecutorThreadFactory(
Thread.currentThread().getName() + "expire-main-thread"));
this.expireError = new AtomicReference<>(null);
this.tableName = tableName;
}
public boolean forceCreatingSnapshot() {
return tagAutoCreation != null && tagAutoCreation.forceCreatingSnapshot();
}
@Override
public TableCommitImpl withOverwrite(@Nullable Map overwritePartitions) {
this.overwritePartition = overwritePartitions;
return this;
}
@Override
public TableCommitImpl ignoreEmptyCommit(boolean ignoreEmptyCommit) {
commit.ignoreEmptyCommit(ignoreEmptyCommit);
return this;
}
@Override
public InnerTableCommit withMetricRegistry(MetricRegistry registry) {
commit.withMetrics(new CommitMetrics(registry, tableName));
return this;
}
@Override
public Set filterCommitted(Set commitIdentifiers) {
return commit.filterCommitted(commitIdentifiers);
}
@Override
public void commit(List commitMessages) {
checkState(!batchCommitted, "BatchTableCommit only support one-time committing.");
batchCommitted = true;
commit(BatchWriteBuilder.COMMIT_IDENTIFIER, commitMessages);
}
@Override
public void commit(long identifier, List commitMessages) {
commit(createManifestCommittable(identifier, commitMessages));
}
@Override
public int filterAndCommit(Map> commitIdentifiersAndMessages) {
return filterAndCommitMultiple(
commitIdentifiersAndMessages.entrySet().stream()
.map(e -> createManifestCommittable(e.getKey(), e.getValue()))
.collect(Collectors.toList()));
}
private ManifestCommittable createManifestCommittable(
long identifier, List commitMessages) {
ManifestCommittable committable = new ManifestCommittable(identifier);
for (CommitMessage commitMessage : commitMessages) {
committable.addFileCommittable(commitMessage);
}
return committable;
}
public void commit(ManifestCommittable committable) {
commitMultiple(Collections.singletonList(committable));
}
public void commitMultiple(List committables) {
if (overwritePartition == null) {
for (ManifestCommittable committable : committables) {
commit.commit(committable, new HashMap<>());
}
if (!committables.isEmpty()) {
expire(committables.get(committables.size() - 1).identifier(), expireMainExecutor);
}
} else {
ManifestCommittable committable;
if (committables.size() > 1) {
throw new RuntimeException(
"Multiple committables appear in overwrite mode, this may be a bug, please report it: "
+ committables);
} else if (committables.size() == 1) {
committable = committables.get(0);
} else {
// create an empty committable
// identifier is Long.MAX_VALUE, come from batch job
// TODO maybe it can be produced by CommitterOperator
committable = new ManifestCommittable(Long.MAX_VALUE);
}
commit.overwrite(overwritePartition, committable, Collections.emptyMap());
expire(committable.identifier(), expireMainExecutor);
}
commitCallbacks.forEach(c -> c.call(committables));
}
public int filterAndCommitMultiple(List committables) {
Set retryIdentifiers =
commit.filterCommitted(
committables.stream()
.map(ManifestCommittable::identifier)
.collect(Collectors.toSet()));
// commitCallback may fail after the snapshot file is successfully created,
// so we have to try all of them again
List succeededCommittables =
committables.stream()
.filter(c -> !retryIdentifiers.contains(c.identifier()))
.collect(Collectors.toList());
commitCallbacks.forEach(c -> c.call(succeededCommittables));
List retryCommittables =
committables.stream()
.filter(c -> retryIdentifiers.contains(c.identifier()))
// identifier must be in increasing order
.sorted(Comparator.comparingLong(ManifestCommittable::identifier))
.collect(Collectors.toList());
if (retryCommittables.size() > 0) {
checkFilesExistence(retryCommittables);
commitMultiple(retryCommittables);
}
return retryCommittables.size();
}
private void checkFilesExistence(List committables) {
List files = new ArrayList<>();
Map, DataFilePathFactory> factoryMap = new HashMap<>();
for (ManifestCommittable committable : committables) {
for (CommitMessage message : committable.fileCommittables()) {
CommitMessageImpl msg = (CommitMessageImpl) message;
DataFilePathFactory pathFactory =
factoryMap.computeIfAbsent(
Pair.of(message.partition(), message.bucket()),
k ->
commit.pathFactory()
.createDataFilePathFactory(
k.getKey(), k.getValue()));
Consumer collector = f -> files.addAll(f.collectFiles(pathFactory));
msg.newFilesIncrement().newFiles().forEach(collector);
msg.newFilesIncrement().changelogFiles().forEach(collector);
msg.compactIncrement().compactBefore().forEach(collector);
msg.compactIncrement().compactAfter().forEach(collector);
msg.indexIncrement().newIndexFiles().stream()
.map(IndexFileMeta::fileName)
.map(pathFactory::toPath)
.forEach(files::add);
}
}
Predicate nonExists =
p -> {
try {
return !commit.fileIO().exists(p);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
};
List nonExistFiles;
try {
nonExistFiles =
FileUtils.COMMON_IO_FORK_JOIN_POOL
.submit(
() ->
files.parallelStream()
.filter(nonExists)
.collect(Collectors.toList()))
.get();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
} catch (ExecutionException e) {
throw new RuntimeException(e.getCause());
}
if (nonExistFiles.size() > 0) {
String message =
String.join(
"\n",
"Cannot recover from this checkpoint because some files in the snapshot that"
+ " need to be resubmitted have been deleted:",
" "
+ nonExistFiles.stream()
.map(Object::toString)
.collect(Collectors.joining(",")),
" The most likely reason is because you are recovering from a very old savepoint that"
+ " contains some uncommitted files that have already been deleted.");
throw new RuntimeException(message);
}
}
private void expire(long partitionExpireIdentifier, ExecutorService executor) {
if (expireError.get() != null) {
throw new RuntimeException(expireError.get());
}
executor.execute(
() -> {
try {
expire(partitionExpireIdentifier);
} catch (Throwable t) {
LOG.error("Executing expire encountered an error.", t);
expireError.compareAndSet(null, t);
}
});
}
private void expire(long partitionExpireIdentifier) {
// expire consumer first to avoid preventing snapshot expiration
if (consumerExpireTime != null) {
consumerManager.expire(LocalDateTime.now().minus(consumerExpireTime));
}
if (expire != null) {
expire.expire();
}
if (partitionExpire != null) {
partitionExpire.expire(partitionExpireIdentifier);
}
if (tagAutoCreation != null) {
tagAutoCreation.run();
}
}
@Override
public void close() throws Exception {
for (CommitCallback commitCallback : commitCallbacks) {
IOUtils.closeQuietly(commitCallback);
}
IOUtils.closeQuietly(lock);
expireMainExecutor.shutdownNow();
}
@Override
public void abort(List commitMessages) {
commit.abort(commitMessages);
}
@VisibleForTesting
public ExecutorService getExpireMainExecutor() {
return expireMainExecutor;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy