All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.table.sink.TableCommitImpl Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.table.sink;

import org.dinky.shaded.paimon.annotation.VisibleForTesting;
import org.dinky.shaded.paimon.consumer.ConsumerManager;
import org.dinky.shaded.paimon.data.BinaryRow;
import org.dinky.shaded.paimon.fs.Path;
import org.dinky.shaded.paimon.index.IndexFileMeta;
import org.dinky.shaded.paimon.io.DataFileMeta;
import org.dinky.shaded.paimon.io.DataFilePathFactory;
import org.dinky.shaded.paimon.manifest.ManifestCommittable;
import org.dinky.shaded.paimon.metrics.MetricRegistry;
import org.dinky.shaded.paimon.operation.FileStoreCommit;
import org.dinky.shaded.paimon.operation.FileStoreExpire;
import org.dinky.shaded.paimon.operation.Lock;
import org.dinky.shaded.paimon.operation.PartitionExpire;
import org.dinky.shaded.paimon.operation.metrics.CommitMetrics;
import org.dinky.shaded.paimon.tag.TagAutoCreation;
import org.dinky.shaded.paimon.utils.ExecutorThreadFactory;
import org.dinky.shaded.paimon.utils.FileUtils;
import org.dinky.shaded.paimon.utils.IOUtils;
import org.dinky.shaded.paimon.utils.Pair;

import org.dinky.shaded.paimon.shade.guava30.com.google.common.util.concurrent.MoreExecutors;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.stream.Collectors;

import static org.dinky.shaded.paimon.CoreOptions.ExpireExecutionMode;
import static org.dinky.shaded.paimon.utils.Preconditions.checkState;

/**
 * An abstraction layer above {@link FileStoreCommit} and {@link FileStoreExpire} to provide
 * snapshot commit and expiration.
 */
public class TableCommitImpl implements InnerTableCommit {
    private static final Logger LOG = LoggerFactory.getLogger(TableCommitImpl.class);

    private final FileStoreCommit commit;
    private final List commitCallbacks;
    @Nullable private final FileStoreExpire expire;
    @Nullable private final PartitionExpire partitionExpire;
    @Nullable private final TagAutoCreation tagAutoCreation;
    private final Lock lock;

    @Nullable private final Duration consumerExpireTime;
    private final ConsumerManager consumerManager;

    private final ExecutorService expireMainExecutor;
    private final AtomicReference expireError;

    private final String tableName;

    @Nullable private Map overwritePartition = null;
    private boolean batchCommitted = false;

    public TableCommitImpl(
            FileStoreCommit commit,
            List commitCallbacks,
            @Nullable FileStoreExpire expire,
            @Nullable PartitionExpire partitionExpire,
            @Nullable TagAutoCreation tagAutoCreation,
            Lock lock,
            @Nullable Duration consumerExpireTime,
            ConsumerManager consumerManager,
            ExpireExecutionMode expireExecutionMode,
            String tableName) {
        commit.withLock(lock);
        if (expire != null) {
            expire.withLock(lock);
        }
        if (partitionExpire != null) {
            partitionExpire.withLock(lock);
        }

        this.commit = commit;
        this.commitCallbacks = commitCallbacks;
        this.expire = expire;
        this.partitionExpire = partitionExpire;
        this.tagAutoCreation = tagAutoCreation;
        this.lock = lock;

        this.consumerExpireTime = consumerExpireTime;
        this.consumerManager = consumerManager;

        this.expireMainExecutor =
                expireExecutionMode == ExpireExecutionMode.SYNC
                        ? MoreExecutors.newDirectExecutorService()
                        : Executors.newSingleThreadExecutor(
                                new ExecutorThreadFactory(
                                        Thread.currentThread().getName() + "expire-main-thread"));
        this.expireError = new AtomicReference<>(null);

        this.tableName = tableName;
    }

    public boolean forceCreatingSnapshot() {
        return tagAutoCreation != null && tagAutoCreation.forceCreatingSnapshot();
    }

    @Override
    public TableCommitImpl withOverwrite(@Nullable Map overwritePartitions) {
        this.overwritePartition = overwritePartitions;
        return this;
    }

    @Override
    public TableCommitImpl ignoreEmptyCommit(boolean ignoreEmptyCommit) {
        commit.ignoreEmptyCommit(ignoreEmptyCommit);
        return this;
    }

    @Override
    public InnerTableCommit withMetricRegistry(MetricRegistry registry) {
        commit.withMetrics(new CommitMetrics(registry, tableName));
        return this;
    }

    @Override
    public Set filterCommitted(Set commitIdentifiers) {
        return commit.filterCommitted(commitIdentifiers);
    }

    @Override
    public void commit(List commitMessages) {
        checkState(!batchCommitted, "BatchTableCommit only support one-time committing.");
        batchCommitted = true;
        commit(BatchWriteBuilder.COMMIT_IDENTIFIER, commitMessages);
    }

    @Override
    public void commit(long identifier, List commitMessages) {
        commit(createManifestCommittable(identifier, commitMessages));
    }

    @Override
    public int filterAndCommit(Map> commitIdentifiersAndMessages) {
        return filterAndCommitMultiple(
                commitIdentifiersAndMessages.entrySet().stream()
                        .map(e -> createManifestCommittable(e.getKey(), e.getValue()))
                        .collect(Collectors.toList()));
    }

    private ManifestCommittable createManifestCommittable(
            long identifier, List commitMessages) {
        ManifestCommittable committable = new ManifestCommittable(identifier);
        for (CommitMessage commitMessage : commitMessages) {
            committable.addFileCommittable(commitMessage);
        }
        return committable;
    }

    public void commit(ManifestCommittable committable) {
        commitMultiple(Collections.singletonList(committable));
    }

    public void commitMultiple(List committables) {
        if (overwritePartition == null) {
            for (ManifestCommittable committable : committables) {
                commit.commit(committable, new HashMap<>());
            }
            if (!committables.isEmpty()) {
                expire(committables.get(committables.size() - 1).identifier(), expireMainExecutor);
            }
        } else {
            ManifestCommittable committable;
            if (committables.size() > 1) {
                throw new RuntimeException(
                        "Multiple committables appear in overwrite mode, this may be a bug, please report it: "
                                + committables);
            } else if (committables.size() == 1) {
                committable = committables.get(0);
            } else {
                // create an empty committable
                // identifier is Long.MAX_VALUE, come from batch job
                // TODO maybe it can be produced by CommitterOperator
                committable = new ManifestCommittable(Long.MAX_VALUE);
            }
            commit.overwrite(overwritePartition, committable, Collections.emptyMap());
            expire(committable.identifier(), expireMainExecutor);
        }

        commitCallbacks.forEach(c -> c.call(committables));
    }

    public int filterAndCommitMultiple(List committables) {
        Set retryIdentifiers =
                commit.filterCommitted(
                        committables.stream()
                                .map(ManifestCommittable::identifier)
                                .collect(Collectors.toSet()));

        // commitCallback may fail after the snapshot file is successfully created,
        // so we have to try all of them again
        List succeededCommittables =
                committables.stream()
                        .filter(c -> !retryIdentifiers.contains(c.identifier()))
                        .collect(Collectors.toList());
        commitCallbacks.forEach(c -> c.call(succeededCommittables));

        List retryCommittables =
                committables.stream()
                        .filter(c -> retryIdentifiers.contains(c.identifier()))
                        // identifier must be in increasing order
                        .sorted(Comparator.comparingLong(ManifestCommittable::identifier))
                        .collect(Collectors.toList());
        if (retryCommittables.size() > 0) {
            checkFilesExistence(retryCommittables);
            commitMultiple(retryCommittables);
        }
        return retryCommittables.size();
    }

    private void checkFilesExistence(List committables) {
        List files = new ArrayList<>();
        Map, DataFilePathFactory> factoryMap = new HashMap<>();
        for (ManifestCommittable committable : committables) {
            for (CommitMessage message : committable.fileCommittables()) {
                CommitMessageImpl msg = (CommitMessageImpl) message;
                DataFilePathFactory pathFactory =
                        factoryMap.computeIfAbsent(
                                Pair.of(message.partition(), message.bucket()),
                                k ->
                                        commit.pathFactory()
                                                .createDataFilePathFactory(
                                                        k.getKey(), k.getValue()));

                Consumer collector = f -> files.addAll(f.collectFiles(pathFactory));
                msg.newFilesIncrement().newFiles().forEach(collector);
                msg.newFilesIncrement().changelogFiles().forEach(collector);
                msg.compactIncrement().compactBefore().forEach(collector);
                msg.compactIncrement().compactAfter().forEach(collector);
                msg.indexIncrement().newIndexFiles().stream()
                        .map(IndexFileMeta::fileName)
                        .map(pathFactory::toPath)
                        .forEach(files::add);
            }
        }

        Predicate nonExists =
                p -> {
                    try {
                        return !commit.fileIO().exists(p);
                    } catch (IOException e) {
                        throw new UncheckedIOException(e);
                    }
                };
        List nonExistFiles;
        try {
            nonExistFiles =
                    FileUtils.COMMON_IO_FORK_JOIN_POOL
                            .submit(
                                    () ->
                                            files.parallelStream()
                                                    .filter(nonExists)
                                                    .collect(Collectors.toList()))
                            .get();
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new RuntimeException(e);
        } catch (ExecutionException e) {
            throw new RuntimeException(e.getCause());
        }

        if (nonExistFiles.size() > 0) {
            String message =
                    String.join(
                            "\n",
                            "Cannot recover from this checkpoint because some files in the snapshot that"
                                    + " need to be resubmitted have been deleted:",
                            "    "
                                    + nonExistFiles.stream()
                                            .map(Object::toString)
                                            .collect(Collectors.joining(",")),
                            "    The most likely reason is because you are recovering from a very old savepoint that"
                                    + " contains some uncommitted files that have already been deleted.");
            throw new RuntimeException(message);
        }
    }

    private void expire(long partitionExpireIdentifier, ExecutorService executor) {
        if (expireError.get() != null) {
            throw new RuntimeException(expireError.get());
        }

        executor.execute(
                () -> {
                    try {
                        expire(partitionExpireIdentifier);
                    } catch (Throwable t) {
                        LOG.error("Executing expire encountered an error.", t);
                        expireError.compareAndSet(null, t);
                    }
                });
    }

    private void expire(long partitionExpireIdentifier) {
        // expire consumer first to avoid preventing snapshot expiration
        if (consumerExpireTime != null) {
            consumerManager.expire(LocalDateTime.now().minus(consumerExpireTime));
        }

        if (expire != null) {
            expire.expire();
        }

        if (partitionExpire != null) {
            partitionExpire.expire(partitionExpireIdentifier);
        }

        if (tagAutoCreation != null) {
            tagAutoCreation.run();
        }
    }

    @Override
    public void close() throws Exception {
        for (CommitCallback commitCallback : commitCallbacks) {
            IOUtils.closeQuietly(commitCallback);
        }
        IOUtils.closeQuietly(lock);
        expireMainExecutor.shutdownNow();
    }

    @Override
    public void abort(List commitMessages) {
        commit.abort(commitMessages);
    }

    @VisibleForTesting
    public ExecutorService getExpireMainExecutor() {
        return expireMainExecutor;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy