Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.paimon.flink.compact.changelog.ChangelogCompactTask Maven / Gradle / Ivy
package org.apache.paimon.flink.compact.changelog;
import org.apache.paimon.data.BinaryRow;
import org.apache.paimon.flink.compact.changelog.format.CompactedChangelogReadOnlyFormat;
import org.apache.paimon.flink.sink.Committable;
import org.apache.paimon.fs.Path;
import org.apache.paimon.fs.PositionOutputStream;
import org.apache.paimon.fs.SeekableInputStream;
import org.apache.paimon.io.CompactIncrement;
import org.apache.paimon.io.DataFileMeta;
import org.apache.paimon.io.DataFilePathFactory;
import org.apache.paimon.io.DataIncrement;
import org.apache.paimon.table.FileStoreTable;
import org.apache.paimon.table.sink.CommitMessageImpl;
import org.apache.paimon.utils.FileStorePathFactory;
import org.apache.paimon.utils.IOUtils;
import org.apache.paimon.utils.Preconditions;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.UUID;
public class ChangelogCompactTask implements Serializable {
private final long checkpointId;
private final BinaryRow partition;
private final Map> newFileChangelogFiles;
private final Map> compactChangelogFiles;
public ChangelogCompactTask (
long checkpointId,
BinaryRow partition,
Map > newFileChangelogFiles,
Map > compactChangelogFiles) {
this .checkpointId = checkpointId;
this .partition = partition;
this .newFileChangelogFiles = newFileChangelogFiles;
this .compactChangelogFiles = compactChangelogFiles;
}
public long checkpointId () {
return checkpointId;
}
public BinaryRow partition () {
return partition;
}
public Map> newFileChangelogFiles() {
return newFileChangelogFiles;
}
public Map> compactChangelogFiles() {
return compactChangelogFiles;
}
public List doCompact (FileStoreTable table) throws Exception {
FileStorePathFactory pathFactory = table.store().pathFactory();
OutputStream outputStream = new OutputStream();
List results = new ArrayList<>();
for (Map.Entry> entry : newFileChangelogFiles.entrySet()) {
int bucket = entry.getKey();
DataFilePathFactory dataFilePathFactory =
pathFactory.createDataFilePathFactory(partition, bucket);
for (DataFileMeta meta : entry.getValue()) {
copyFile(
outputStream,
results,
table,
dataFilePathFactory.toPath(meta),
bucket,
false ,
meta);
}
}
for (Map.Entry> entry : compactChangelogFiles.entrySet()) {
Integer bucket = entry.getKey();
DataFilePathFactory dataFilePathFactory =
pathFactory.createDataFilePathFactory(partition, bucket);
for (DataFileMeta meta : entry.getValue()) {
copyFile(
outputStream,
results,
table,
dataFilePathFactory.toPath(meta),
bucket,
true ,
meta);
}
}
outputStream.out.close();
return produceNewCommittables(results, table, pathFactory, outputStream.path);
}
private void copyFile (
OutputStream outputStream,
List results,
FileStoreTable table,
Path path,
int bucket,
boolean isCompactResult,
DataFileMeta meta)
throws Exception {
if (!outputStream.isInitialized) {
Path outputPath =
new Path(path.getParent(), "tmp-compacted-changelog-" + UUID.randomUUID());
outputStream.init(outputPath, table.fileIO().newOutputStream(outputPath, false ));
}
long offset = outputStream.out.getPos();
try (SeekableInputStream in = table.fileIO().newInputStream(path)) {
IOUtils.copyBytes(in, outputStream.out, IOUtils.BLOCKSIZE, false );
}
table.fileIO().deleteQuietly(path);
results.add(
new Result(
bucket, isCompactResult, meta, offset, outputStream.out.getPos() - offset));
}
private List produceNewCommittables (
List results,
FileStoreTable table,
FileStorePathFactory pathFactory,
Path changelogTempPath)
throws IOException {
Result baseResult = results.get(0 );
Preconditions.checkArgument(baseResult.offset == 0 );
DataFilePathFactory dataFilePathFactory =
pathFactory.createDataFilePathFactory(partition, baseResult.bucket);
String realName =
"compacted-changelog-"
+ UUID.randomUUID()
+ "$"
+ baseResult.bucket
+ "-"
+ baseResult.length;
table.fileIO()
.rename(
changelogTempPath,
dataFilePathFactory.toAlignedPath(
realName
+ "."
+ CompactedChangelogReadOnlyFormat.getIdentifier(
baseResult.meta.fileFormat()),
baseResult.meta));
List newCommittables = new ArrayList<>();
Map> bucketedResults = new HashMap<>();
for (Result result : results) {
bucketedResults.computeIfAbsent(result.bucket, b -> new ArrayList<>()).add(result);
}
for (Map.Entry> entry : bucketedResults.entrySet()) {
List newFilesChangelog = new ArrayList<>();
List compactChangelog = new ArrayList<>();
for (Result result : entry.getValue()) {
String name =
(result.offset == 0
? realName
: realName + "-" + result.offset + "-" + result.length)
+ "."
+ CompactedChangelogReadOnlyFormat.getIdentifier(
result.meta.fileFormat());
if (result.isCompactResult) {
compactChangelog.add(result.meta.rename(name));
} else {
newFilesChangelog.add(result.meta.rename(name));
}
}
CommitMessageImpl newMessage =
new CommitMessageImpl(
partition,
entry.getKey(),
new DataIncrement(
Collections.emptyList(),
Collections.emptyList(),
newFilesChangelog),
new CompactIncrement(
Collections.emptyList(),
Collections.emptyList(),
compactChangelog));
newCommittables.add(new Committable(checkpointId, Committable.Kind.FILE, newMessage));
}
return newCommittables;
}
public int hashCode () {
return Objects.hash(checkpointId, partition, newFileChangelogFiles, compactChangelogFiles);
}
@Override
public boolean equals (Object o) {
if (this == o) {
return true ;
}
if (o == null || getClass() != o.getClass()) {
return false ;
}
ChangelogCompactTask that = (ChangelogCompactTask) o;
return checkpointId == that.checkpointId
&& Objects.equals(partition, that.partition)
&& Objects.equals(newFileChangelogFiles, that.newFileChangelogFiles)
&& Objects.equals(compactChangelogFiles, that.compactChangelogFiles);
}
@Override
public String toString () {
return String.format(
"ChangelogCompactionTask {"
+ "partition = %s, "
+ "newFileChangelogFiles = %s, "
+ "compactChangelogFiles = %s}" ,
partition, newFileChangelogFiles, compactChangelogFiles);
}
private static class OutputStream {
private Path path;
private PositionOutputStream out;
private boolean isInitialized;
private OutputStream () {
this .isInitialized = false ;
}
private void init (Path path, PositionOutputStream out) {
this .path = path;
this .out = out;
this .isInitialized = true ;
}
}
private static class Result {
private final int bucket;
private final boolean isCompactResult;
private final DataFileMeta meta;
private final long offset;
private final long length;
private Result (
int bucket, boolean isCompactResult, DataFileMeta meta, long offset, long length) {
this .bucket = bucket;
this .isCompactResult = isCompactResult;
this .meta = meta;
this .offset = offset;
this .length = length;
}
}
}