![JAR search and dependency download from the Maven repository](/logo.png)
org.dinky.shaded.paimon.mergetree.compact.MergeTreeCompactTask Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dinky.shaded.paimon.mergetree.compact;
import org.dinky.shaded.paimon.compact.CompactResult;
import org.dinky.shaded.paimon.compact.CompactTask;
import org.dinky.shaded.paimon.compact.CompactUnit;
import org.dinky.shaded.paimon.data.InternalRow;
import org.dinky.shaded.paimon.io.DataFileMeta;
import org.dinky.shaded.paimon.mergetree.SortedRun;
import org.dinky.shaded.paimon.operation.metrics.CompactionMetrics;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import static java.util.Collections.singletonList;
/** Compact task for merge tree compaction. */
public class MergeTreeCompactTask extends CompactTask {
private final long minFileSize;
private final CompactRewriter rewriter;
private final int outputLevel;
private final List> partitioned;
private final boolean dropDelete;
// metric
private int upgradeFilesNum;
public MergeTreeCompactTask(
Comparator keyComparator,
long minFileSize,
CompactRewriter rewriter,
CompactUnit unit,
boolean dropDelete,
@Nullable CompactionMetrics metrics) {
super(metrics);
this.minFileSize = minFileSize;
this.rewriter = rewriter;
this.outputLevel = unit.outputLevel();
this.partitioned = new IntervalPartition(unit.files(), keyComparator).partition();
this.dropDelete = dropDelete;
this.upgradeFilesNum = 0;
}
@Override
protected CompactResult doCompact() throws Exception {
List> candidate = new ArrayList<>();
CompactResult result = new CompactResult();
// Checking the order and compacting adjacent and contiguous files
// Note: can't skip an intermediate file to compact, this will destroy the overall
// orderliness
for (List section : partitioned) {
if (section.size() > 1) {
candidate.add(section);
} else {
SortedRun run = section.get(0);
// No overlapping:
// We can just upgrade the large file and just change the level instead of
// rewriting it
// But for small files, we will try to compact it
for (DataFileMeta file : run.files()) {
if (file.fileSize() < minFileSize) {
// Smaller files are rewritten along with the previous files
candidate.add(singletonList(SortedRun.fromSingle(file)));
} else {
// Large file appear, rewrite previous and upgrade it
rewrite(candidate, result);
upgrade(file, result);
}
}
}
}
rewrite(candidate, result);
return result;
}
@Override
protected String logMetric(
long startMillis, List compactBefore, List compactAfter) {
return String.format(
"%s, upgrade file num = %d",
super.logMetric(startMillis, compactBefore, compactAfter), upgradeFilesNum);
}
private void upgrade(DataFileMeta file, CompactResult toUpdate) throws Exception {
if (file.level() != outputLevel) {
CompactResult upgradeResult = rewriter.upgrade(outputLevel, file);
toUpdate.merge(upgradeResult);
upgradeFilesNum++;
}
}
private void rewrite(List> candidate, CompactResult toUpdate) throws Exception {
if (candidate.isEmpty()) {
return;
}
if (candidate.size() == 1) {
List section = candidate.get(0);
if (section.size() == 0) {
return;
} else if (section.size() == 1) {
for (DataFileMeta file : section.get(0).files()) {
upgrade(file, toUpdate);
}
candidate.clear();
return;
}
}
CompactResult rewriteResult = rewriter.rewrite(outputLevel, dropDelete, candidate);
toUpdate.merge(rewriteResult);
candidate.clear();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy