![JAR search and dependency download from the Maven repository](/logo.png)
org.dinky.shaded.paimon.append.AppendOnlyCompactManager Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dinky.shaded.paimon.append;
import org.dinky.shaded.paimon.AppendOnlyFileStore;
import org.dinky.shaded.paimon.annotation.VisibleForTesting;
import org.dinky.shaded.paimon.compact.CompactFutureManager;
import org.dinky.shaded.paimon.compact.CompactResult;
import org.dinky.shaded.paimon.compact.CompactTask;
import org.dinky.shaded.paimon.io.DataFileMeta;
import org.dinky.shaded.paimon.operation.metrics.CompactionMetrics;
import org.dinky.shaded.paimon.utils.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Optional;
import java.util.TreeSet;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
/** Compact manager for {@link AppendOnlyFileStore}. */
public class AppendOnlyCompactManager extends CompactFutureManager {
private static final Logger LOG = LoggerFactory.getLogger(AppendOnlyCompactManager.class);
private static final int FULL_COMPACT_MIN_FILE = 3;
private final ExecutorService executor;
private final TreeSet toCompact;
private final int minFileNum;
private final int maxFileNum;
private final long targetFileSize;
private final CompactRewriter rewriter;
private List compacting;
@Nullable private final CompactionMetrics metrics;
public AppendOnlyCompactManager(
ExecutorService executor,
List restored,
int minFileNum,
int maxFileNum,
long targetFileSize,
CompactRewriter rewriter,
@Nullable CompactionMetrics metrics) {
this.executor = executor;
this.toCompact = new TreeSet<>(fileComparator(false));
this.toCompact.addAll(restored);
this.minFileNum = minFileNum;
this.maxFileNum = maxFileNum;
this.targetFileSize = targetFileSize;
this.rewriter = rewriter;
this.metrics = metrics;
}
@Override
public void triggerCompaction(boolean fullCompaction) {
if (fullCompaction) {
triggerFullCompaction();
} else {
triggerCompactionWithBestEffort();
}
}
private void triggerFullCompaction() {
Preconditions.checkState(
taskFuture == null,
"A compaction task is still running while the user "
+ "forces a new compaction. This is unexpected.");
if (toCompact.size() < FULL_COMPACT_MIN_FILE) {
return;
}
taskFuture =
executor.submit(new FullCompactTask(toCompact, targetFileSize, rewriter, metrics));
compacting = new ArrayList<>(toCompact);
toCompact.clear();
}
private void triggerCompactionWithBestEffort() {
if (taskFuture != null) {
return;
}
Optional> picked = pickCompactBefore();
if (picked.isPresent()) {
compacting = picked.get();
taskFuture = executor.submit(new AutoCompactTask(compacting, rewriter, metrics));
}
}
@Override
public boolean shouldWaitForLatestCompaction() {
return false;
}
@Override
public boolean shouldWaitForPreparingCheckpoint() {
return false;
}
@Override
public void addNewFile(DataFileMeta file) {
toCompact.add(file);
}
@Override
public List allFiles() {
List allFiles = new ArrayList<>();
if (compacting != null) {
allFiles.addAll(compacting);
}
allFiles.addAll(toCompact);
return allFiles;
}
/** Finish current task, and update result files to {@link #toCompact}. */
@Override
public Optional getCompactionResult(boolean blocking)
throws ExecutionException, InterruptedException {
Optional result = innerGetCompactionResult(blocking);
if (result.isPresent()) {
CompactResult compactResult = result.get();
if (!compactResult.after().isEmpty()) {
// if the last compacted file is still small,
// add it back to the head
DataFileMeta lastFile = compactResult.after().get(compactResult.after().size() - 1);
if (lastFile.fileSize() < targetFileSize) {
toCompact.add(lastFile);
}
}
compacting = null;
}
return result;
}
@VisibleForTesting
Optional> pickCompactBefore() {
if (toCompact.isEmpty()) {
return Optional.empty();
}
long totalFileSize = 0L;
int fileNum = 0;
LinkedList candidates = new LinkedList<>();
while (!toCompact.isEmpty()) {
DataFileMeta file = toCompact.pollFirst();
candidates.add(file);
totalFileSize += file.fileSize();
fileNum++;
if ((totalFileSize >= targetFileSize && fileNum >= minFileNum)
|| fileNum >= maxFileNum) {
return Optional.of(candidates);
} else if (totalFileSize >= targetFileSize) {
// let pointer shift one pos to right
DataFileMeta removed = candidates.pollFirst();
assert removed != null;
totalFileSize -= removed.fileSize();
fileNum--;
}
}
toCompact.addAll(candidates);
return Optional.empty();
}
@VisibleForTesting
TreeSet getToCompact() {
return toCompact;
}
@Override
public void close() throws IOException {
if (metrics != null) {
metrics.close();
}
}
/** A {@link CompactTask} impl for full compaction of append-only table. */
public static class FullCompactTask extends CompactTask {
private final LinkedList inputs;
private final long targetFileSize;
private final CompactRewriter rewriter;
public FullCompactTask(
Collection inputs,
long targetFileSize,
CompactRewriter rewriter,
@Nullable CompactionMetrics metrics) {
super(metrics);
this.inputs = new LinkedList<>(inputs);
this.targetFileSize = targetFileSize;
this.rewriter = rewriter;
}
@Override
protected CompactResult doCompact() throws Exception {
// remove large files
while (!inputs.isEmpty()) {
DataFileMeta file = inputs.peekFirst();
if (file.fileSize() >= targetFileSize) {
inputs.poll();
continue;
}
break;
}
// compute small files
int big = 0;
int small = 0;
for (DataFileMeta file : inputs) {
if (file.fileSize() >= targetFileSize) {
big++;
} else {
small++;
}
}
// do compaction
List compactBefore = new ArrayList<>();
List compactAfter = new ArrayList<>();
if (small > big && inputs.size() >= FULL_COMPACT_MIN_FILE) {
compactBefore = new ArrayList<>(inputs);
compactAfter = rewriter.rewrite(inputs);
}
return result(new ArrayList<>(compactBefore), compactAfter);
}
}
/**
* A {@link CompactTask} impl for append-only table auto-compaction.
*
* This task accepts an already-picked candidate to perform one-time rewrite. And for the
* rest of input files, it is the duty of {@link AppendOnlyWriter} to invoke the next time
* compaction.
*/
public static class AutoCompactTask extends CompactTask {
private final List toCompact;
private final CompactRewriter rewriter;
public AutoCompactTask(
List toCompact,
CompactRewriter rewriter,
@Nullable CompactionMetrics metrics) {
super(metrics);
this.toCompact = toCompact;
this.rewriter = rewriter;
}
@Override
protected CompactResult doCompact() throws Exception {
return result(toCompact, rewriter.rewrite(toCompact));
}
}
private static CompactResult result(List before, List after) {
return new CompactResult() {
@Override
public List before() {
return before;
}
@Override
public List after() {
return after;
}
};
}
/** Compact rewriter for append-only table. */
public interface CompactRewriter {
List rewrite(List compactBefore) throws Exception;
}
/**
* New files may be created during the compaction process, then the results of the compaction
* may be put after the new files, and this order will be disrupted. We need to ensure this
* order, so we force the order by sequence.
*/
public static Comparator fileComparator(boolean ignoreOverlap) {
return (o1, o2) -> {
if (o1 == o2) {
return 0;
}
if (!ignoreOverlap && isOverlap(o1, o2)) {
LOG.warn(
String.format(
"There should no overlap in append files, but Range1(%s, %s), Range2(%s, %s),"
+ " check if you have multiple write jobs.",
o1.minSequenceNumber(),
o1.maxSequenceNumber(),
o2.minSequenceNumber(),
o2.maxSequenceNumber()));
}
return Long.compare(o1.minSequenceNumber(), o2.minSequenceNumber());
};
}
private static boolean isOverlap(DataFileMeta o1, DataFileMeta o2) {
return o2.minSequenceNumber() <= o1.maxSequenceNumber()
&& o2.maxSequenceNumber() >= o1.minSequenceNumber();
}
}