All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.paimon.flink.sink.GlobalFullCompactionSinkWrite Maven / Gradle / Ivy

There is a newer version: 0.9.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.paimon.flink.sink;

import org.apache.paimon.Snapshot;
import org.apache.paimon.data.BinaryRow;
import org.apache.paimon.data.InternalRow;
import org.apache.paimon.memory.MemorySegmentPool;
import org.apache.paimon.table.FileStoreTable;
import org.apache.paimon.table.sink.SinkRecord;
import org.apache.paimon.utils.SnapshotManager;

import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.metrics.MetricGroup;
import org.apache.flink.runtime.io.disk.iomanager.IOManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

import static org.apache.paimon.table.source.snapshot.FullCompactedStartingScanner.isFullCompactedIdentifier;

/**
 * {@link StoreSinkWrite} for execute full compaction globally. All writers will be full compaction
 * at the same time (in the specified checkpoint).
 */
public class GlobalFullCompactionSinkWrite extends StoreSinkWriteImpl {

    private static final Logger LOG = LoggerFactory.getLogger(GlobalFullCompactionSinkWrite.class);

    private final int deltaCommits;

    private final String tableName;
    private final SnapshotManager snapshotManager;

    private final Set> currentWrittenBuckets;
    private final NavigableMap>> writtenBuckets;
    private static final String WRITTEN_BUCKETS_STATE_NAME = "paimon_written_buckets";

    private final TreeSet commitIdentifiersToCheck;

    public GlobalFullCompactionSinkWrite(
            FileStoreTable table,
            String commitUser,
            StoreSinkWriteState state,
            IOManager ioManager,
            boolean ignorePreviousFiles,
            boolean waitCompaction,
            int deltaCommits,
            boolean isStreaming,
            @Nullable MemorySegmentPool memoryPool,
            MetricGroup metricGroup) {
        super(
                table,
                commitUser,
                state,
                ioManager,
                ignorePreviousFiles,
                waitCompaction,
                isStreaming,
                memoryPool,
                metricGroup);

        this.deltaCommits = deltaCommits;

        this.tableName = table.name();
        this.snapshotManager = table.snapshotManager();

        currentWrittenBuckets = new HashSet<>();
        writtenBuckets = new TreeMap<>();
        List writtenBucketStateValues =
                state.get(tableName, WRITTEN_BUCKETS_STATE_NAME);
        if (writtenBucketStateValues != null) {
            for (StoreSinkWriteState.StateValue stateValue : writtenBucketStateValues) {
                writtenBuckets
                        .computeIfAbsent(bytesToLong(stateValue.value()), k -> new HashSet<>())
                        .add(Tuple2.of(stateValue.partition(), stateValue.bucket()));
            }
        }

        commitIdentifiersToCheck = new TreeSet<>();
    }

    @Override
    @Nullable
    public SinkRecord write(InternalRow rowData) throws Exception {
        SinkRecord sinkRecord = super.write(rowData);
        if (sinkRecord != null) {
            touchBucket(sinkRecord.partition(), sinkRecord.bucket());
        }
        return sinkRecord;
    }

    @Override
    @Nullable
    public SinkRecord write(InternalRow rowData, int bucket) throws Exception {
        SinkRecord sinkRecord = super.write(rowData, bucket);
        if (sinkRecord != null) {
            touchBucket(sinkRecord.partition(), bucket);
        }
        return sinkRecord;
    }

    @Override
    public void compact(BinaryRow partition, int bucket, boolean fullCompaction) throws Exception {
        super.compact(partition, bucket, fullCompaction);
        touchBucket(partition, bucket);
    }

    private void touchBucket(BinaryRow partition, int bucket) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("touch partition {}, bucket {}", partition, bucket);
        }

        // partition is a reused BinaryRow
        // we first check if the tuple exists to minimize copying
        if (!currentWrittenBuckets.contains(Tuple2.of(partition, bucket))) {
            currentWrittenBuckets.add(Tuple2.of(partition.copy(), bucket));
        }
    }

    @Override
    public List prepareCommit(boolean waitCompaction, long checkpointId)
            throws IOException {
        checkSuccessfulFullCompaction();

        // collects what buckets we've modified during this checkpoint interval
        if (!currentWrittenBuckets.isEmpty()) {
            writtenBuckets
                    .computeIfAbsent(checkpointId, k -> new HashSet<>())
                    .addAll(currentWrittenBuckets);
            currentWrittenBuckets.clear();
        }

        if (LOG.isDebugEnabled()) {
            for (Map.Entry>> checkpointIdAndBuckets :
                    writtenBuckets.entrySet()) {
                LOG.debug(
                        "Written buckets for checkpoint #{} are:", checkpointIdAndBuckets.getKey());
                for (Tuple2 bucket : checkpointIdAndBuckets.getValue()) {
                    LOG.debug("  * partition {}, bucket {}", bucket.f0, bucket.f1);
                }
            }
        }

        if (!writtenBuckets.isEmpty() && isFullCompactedIdentifier(checkpointId, deltaCommits)) {
            waitCompaction = true;
        }

        if (waitCompaction) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Submit full compaction for checkpoint #{}", checkpointId);
            }
            submitFullCompaction(checkpointId);
            commitIdentifiersToCheck.add(checkpointId);
        }

        return super.prepareCommit(waitCompaction, checkpointId);
    }

    private void checkSuccessfulFullCompaction() {
        if (commitIdentifiersToCheck.isEmpty()) {
            return;
        }

        snapshotManager.traversalSnapshotsFromLatestSafely(this::checkSuccessfulFullCompaction);
    }

    private boolean checkSuccessfulFullCompaction(Snapshot snapshot) {
        if (snapshot.commitUser().equals(commitUser)
                && snapshot.commitKind() == Snapshot.CommitKind.COMPACT) {
            long commitIdentifier = snapshot.commitIdentifier();
            if (commitIdentifiersToCheck.contains(commitIdentifier)) {
                // We found a full compaction snapshot triggered by `submitFullCompaction` method.
                //
                // Because `submitFullCompaction` will compact all buckets in `writtenBuckets`, thus
                // a successful commit indicates that all previous buckets have been compacted.
                //
                // We must make sure that the compact snapshot is triggered by
                // `submitFullCompaction`, because normal compaction may also trigger full
                // compaction, but that only compacts a specific bucket, not all buckets recorded in
                // `writtenBuckets`.
                if (LOG.isDebugEnabled()) {
                    LOG.debug(
                            "Found full compaction snapshot #{} with identifier {}",
                            snapshot.id(),
                            commitIdentifier);
                }
                writtenBuckets.headMap(commitIdentifier, true).clear();
                commitIdentifiersToCheck.headSet(commitIdentifier).clear();
                return true;
            }
        }
        return false;
    }

    private void submitFullCompaction(long currentCheckpointId) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Submit full compaction for checkpoint #{}", currentCheckpointId);
        }
        Set> compactedBuckets = new HashSet<>();
        writtenBuckets.forEach(
                (checkpointId, buckets) -> {
                    for (Tuple2 bucket : buckets) {
                        if (compactedBuckets.contains(bucket)) {
                            continue;
                        }
                        compactedBuckets.add(bucket);
                        try {
                            write.compact(bucket.f0, bucket.f1, true);
                        } catch (Exception e) {
                            throw new RuntimeException(e);
                        }
                    }
                });
    }

    @Override
    public void snapshotState() throws Exception {
        super.snapshotState();

        List writtenBucketList = new ArrayList<>();
        for (Map.Entry>> entry : writtenBuckets.entrySet()) {
            for (Tuple2 bucket : entry.getValue()) {
                writtenBucketList.add(
                        new StoreSinkWriteState.StateValue(
                                bucket.f0, bucket.f1, longToBytes(entry.getKey())));
            }
        }
        state.put(tableName, WRITTEN_BUCKETS_STATE_NAME, writtenBucketList);
    }

    private static byte[] longToBytes(long l) {
        byte[] result = new byte[8];
        for (int i = 7; i >= 0; i--) {
            result[i] = (byte) (l & 0xFF);
            l >>= 8;
        }
        return result;
    }

    private static long bytesToLong(final byte[] b) {
        long result = 0;
        for (int i = 0; i < 8; i++) {
            result <<= 8;
            result |= (b[i] & 0xFF);
        }
        return result;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy