All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.append.AppendOnlyCompactManager Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.append;

import org.dinky.shaded.paimon.AppendOnlyFileStore;
import org.dinky.shaded.paimon.annotation.VisibleForTesting;
import org.dinky.shaded.paimon.compact.CompactFutureManager;
import org.dinky.shaded.paimon.compact.CompactResult;
import org.dinky.shaded.paimon.compact.CompactTask;
import org.dinky.shaded.paimon.io.DataFileMeta;
import org.dinky.shaded.paimon.operation.metrics.CompactionMetrics;
import org.dinky.shaded.paimon.utils.Preconditions;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Optional;
import java.util.TreeSet;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;

/** Compact manager for {@link AppendOnlyFileStore}. */
public class AppendOnlyCompactManager extends CompactFutureManager {

    private static final Logger LOG = LoggerFactory.getLogger(AppendOnlyCompactManager.class);

    private static final int FULL_COMPACT_MIN_FILE = 3;

    private final ExecutorService executor;
    private final TreeSet toCompact;
    private final int minFileNum;
    private final int maxFileNum;
    private final long targetFileSize;
    private final CompactRewriter rewriter;

    private List compacting;

    @Nullable private final CompactionMetrics metrics;

    public AppendOnlyCompactManager(
            ExecutorService executor,
            List restored,
            int minFileNum,
            int maxFileNum,
            long targetFileSize,
            CompactRewriter rewriter,
            @Nullable CompactionMetrics metrics) {
        this.executor = executor;
        this.toCompact = new TreeSet<>(fileComparator(false));
        this.toCompact.addAll(restored);
        this.minFileNum = minFileNum;
        this.maxFileNum = maxFileNum;
        this.targetFileSize = targetFileSize;
        this.rewriter = rewriter;
        this.metrics = metrics;
    }

    @Override
    public void triggerCompaction(boolean fullCompaction) {
        if (fullCompaction) {
            triggerFullCompaction();
        } else {
            triggerCompactionWithBestEffort();
        }
    }

    private void triggerFullCompaction() {
        Preconditions.checkState(
                taskFuture == null,
                "A compaction task is still running while the user "
                        + "forces a new compaction. This is unexpected.");
        if (toCompact.size() < FULL_COMPACT_MIN_FILE) {
            return;
        }

        taskFuture =
                executor.submit(new FullCompactTask(toCompact, targetFileSize, rewriter, metrics));
        compacting = new ArrayList<>(toCompact);
        toCompact.clear();
    }

    private void triggerCompactionWithBestEffort() {
        if (taskFuture != null) {
            return;
        }
        Optional> picked = pickCompactBefore();
        if (picked.isPresent()) {
            compacting = picked.get();
            taskFuture = executor.submit(new AutoCompactTask(compacting, rewriter, metrics));
        }
    }

    @Override
    public boolean shouldWaitForLatestCompaction() {
        return false;
    }

    @Override
    public boolean shouldWaitForPreparingCheckpoint() {
        return false;
    }

    @Override
    public void addNewFile(DataFileMeta file) {
        toCompact.add(file);
    }

    @Override
    public List allFiles() {
        List allFiles = new ArrayList<>();
        if (compacting != null) {
            allFiles.addAll(compacting);
        }
        allFiles.addAll(toCompact);
        return allFiles;
    }

    /** Finish current task, and update result files to {@link #toCompact}. */
    @Override
    public Optional getCompactionResult(boolean blocking)
            throws ExecutionException, InterruptedException {
        Optional result = innerGetCompactionResult(blocking);
        if (result.isPresent()) {
            CompactResult compactResult = result.get();
            if (!compactResult.after().isEmpty()) {
                // if the last compacted file is still small,
                // add it back to the head
                DataFileMeta lastFile = compactResult.after().get(compactResult.after().size() - 1);
                if (lastFile.fileSize() < targetFileSize) {
                    toCompact.add(lastFile);
                }
            }
            compacting = null;
        }
        return result;
    }

    @VisibleForTesting
    Optional> pickCompactBefore() {
        if (toCompact.isEmpty()) {
            return Optional.empty();
        }

        long totalFileSize = 0L;
        int fileNum = 0;
        LinkedList candidates = new LinkedList<>();

        while (!toCompact.isEmpty()) {
            DataFileMeta file = toCompact.pollFirst();
            candidates.add(file);
            totalFileSize += file.fileSize();
            fileNum++;
            if ((totalFileSize >= targetFileSize && fileNum >= minFileNum)
                    || fileNum >= maxFileNum) {
                return Optional.of(candidates);
            } else if (totalFileSize >= targetFileSize) {
                // let pointer shift one pos to right
                DataFileMeta removed = candidates.pollFirst();
                assert removed != null;
                totalFileSize -= removed.fileSize();
                fileNum--;
            }
        }
        toCompact.addAll(candidates);
        return Optional.empty();
    }

    @VisibleForTesting
    TreeSet getToCompact() {
        return toCompact;
    }

    @Override
    public void close() throws IOException {
        if (metrics != null) {
            metrics.close();
        }
    }

    /** A {@link CompactTask} impl for full compaction of append-only table. */
    public static class FullCompactTask extends CompactTask {

        private final LinkedList inputs;
        private final long targetFileSize;
        private final CompactRewriter rewriter;

        public FullCompactTask(
                Collection inputs,
                long targetFileSize,
                CompactRewriter rewriter,
                @Nullable CompactionMetrics metrics) {
            super(metrics);
            this.inputs = new LinkedList<>(inputs);
            this.targetFileSize = targetFileSize;
            this.rewriter = rewriter;
        }

        @Override
        protected CompactResult doCompact() throws Exception {
            // remove large files
            while (!inputs.isEmpty()) {
                DataFileMeta file = inputs.peekFirst();
                if (file.fileSize() >= targetFileSize) {
                    inputs.poll();
                    continue;
                }
                break;
            }

            // compute small files
            int big = 0;
            int small = 0;
            for (DataFileMeta file : inputs) {
                if (file.fileSize() >= targetFileSize) {
                    big++;
                } else {
                    small++;
                }
            }

            // do compaction
            List compactBefore = new ArrayList<>();
            List compactAfter = new ArrayList<>();
            if (small > big && inputs.size() >= FULL_COMPACT_MIN_FILE) {
                compactBefore = new ArrayList<>(inputs);
                compactAfter = rewriter.rewrite(inputs);
            }
            return result(new ArrayList<>(compactBefore), compactAfter);
        }
    }

    /**
     * A {@link CompactTask} impl for append-only table auto-compaction.
     *
     * 

This task accepts an already-picked candidate to perform one-time rewrite. And for the * rest of input files, it is the duty of {@link AppendOnlyWriter} to invoke the next time * compaction. */ public static class AutoCompactTask extends CompactTask { private final List toCompact; private final CompactRewriter rewriter; public AutoCompactTask( List toCompact, CompactRewriter rewriter, @Nullable CompactionMetrics metrics) { super(metrics); this.toCompact = toCompact; this.rewriter = rewriter; } @Override protected CompactResult doCompact() throws Exception { return result(toCompact, rewriter.rewrite(toCompact)); } } private static CompactResult result(List before, List after) { return new CompactResult() { @Override public List before() { return before; } @Override public List after() { return after; } }; } /** Compact rewriter for append-only table. */ public interface CompactRewriter { List rewrite(List compactBefore) throws Exception; } /** * New files may be created during the compaction process, then the results of the compaction * may be put after the new files, and this order will be disrupted. We need to ensure this * order, so we force the order by sequence. */ public static Comparator fileComparator(boolean ignoreOverlap) { return (o1, o2) -> { if (o1 == o2) { return 0; } if (!ignoreOverlap && isOverlap(o1, o2)) { LOG.warn( String.format( "There should no overlap in append files, but Range1(%s, %s), Range2(%s, %s)," + " check if you have multiple write jobs.", o1.minSequenceNumber(), o1.maxSequenceNumber(), o2.minSequenceNumber(), o2.maxSequenceNumber())); } return Long.compare(o1.minSequenceNumber(), o2.minSequenceNumber()); }; } private static boolean isOverlap(DataFileMeta o1, DataFileMeta o2) { return o2.minSequenceNumber() <= o1.maxSequenceNumber() && o2.maxSequenceNumber() >= o1.minSequenceNumber(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy