
org.dinky.shaded.paimon.operation.AppendOnlyFileStoreWrite Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dinky.shaded.paimon.operation;
import org.dinky.shaded.paimon.AppendOnlyFileStore;
import org.dinky.shaded.paimon.CoreOptions;
import org.dinky.shaded.paimon.append.AppendOnlyCompactManager;
import org.dinky.shaded.paimon.append.AppendOnlyWriter;
import org.dinky.shaded.paimon.compact.CompactManager;
import org.dinky.shaded.paimon.compact.NoopCompactManager;
import org.dinky.shaded.paimon.data.BinaryRow;
import org.dinky.shaded.paimon.data.InternalRow;
import org.dinky.shaded.paimon.format.FileFormat;
import org.dinky.shaded.paimon.fs.FileIO;
import org.dinky.shaded.paimon.io.DataFileMeta;
import org.dinky.shaded.paimon.io.DataFilePathFactory;
import org.dinky.shaded.paimon.io.RowDataRollingFileWriter;
import org.dinky.shaded.paimon.reader.RecordReaderIterator;
import org.dinky.shaded.paimon.statistics.FieldStatsCollector;
import org.dinky.shaded.paimon.table.BucketMode;
import org.dinky.shaded.paimon.table.source.DataSplit;
import org.dinky.shaded.paimon.types.RowType;
import org.dinky.shaded.paimon.utils.CommitIncrement;
import org.dinky.shaded.paimon.utils.FileStorePathFactory;
import org.dinky.shaded.paimon.utils.LongCounter;
import org.dinky.shaded.paimon.utils.RecordWriter;
import org.dinky.shaded.paimon.utils.SnapshotManager;
import org.dinky.shaded.paimon.utils.StatsCollectorFactories;
import javax.annotation.Nullable;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutorService;
import static org.dinky.shaded.paimon.io.DataFileMeta.getMaxSequenceNumber;
/** {@link FileStoreWrite} for {@link AppendOnlyFileStore}. */
public class AppendOnlyFileStoreWrite extends MemoryFileStoreWrite {
private final FileIO fileIO;
private final AppendOnlyFileStoreRead read;
private final long schemaId;
private final RowType rowType;
private final FileFormat fileFormat;
private final FileStorePathFactory pathFactory;
private final long targetFileSize;
private final int compactionMinFileNum;
private final int compactionMaxFileNum;
private final boolean commitForceCompact;
private final String fileCompression;
private final boolean useWriteBuffer;
private final boolean spillable;
private final FieldStatsCollector.Factory[] statsCollectors;
private boolean skipCompaction;
private BucketMode bucketMode = BucketMode.FIXED;
public AppendOnlyFileStoreWrite(
FileIO fileIO,
AppendOnlyFileStoreRead read,
long schemaId,
String commitUser,
RowType rowType,
FileStorePathFactory pathFactory,
SnapshotManager snapshotManager,
FileStoreScan scan,
CoreOptions options,
String tableName) {
super(commitUser, snapshotManager, scan, options, null, tableName, pathFactory);
this.fileIO = fileIO;
this.read = read;
this.schemaId = schemaId;
this.rowType = rowType;
this.fileFormat = options.fileFormat();
this.pathFactory = pathFactory;
this.targetFileSize = options.targetFileSize();
this.compactionMinFileNum = options.compactionMinFileNum();
this.compactionMaxFileNum = options.compactionMaxFileNum();
this.commitForceCompact = options.commitForceCompact();
this.skipCompaction = options.writeOnly();
this.fileCompression = options.fileCompression();
this.useWriteBuffer = options.useWriteBufferForAppend();
this.spillable = options.writeBufferSpillable(fileIO.isObjectStore(), isStreamingMode);
this.statsCollectors =
StatsCollectorFactories.createStatsFactories(options, rowType.getFieldNames());
}
@Override
protected RecordWriter createWriter(
BinaryRow partition,
int bucket,
List restoredFiles,
@Nullable CommitIncrement restoreIncrement,
ExecutorService compactExecutor) {
// let writer and compact manager hold the same reference
// and make restore files mutable to update
long maxSequenceNumber = getMaxSequenceNumber(restoredFiles);
DataFilePathFactory factory = pathFactory.createDataFilePathFactory(partition, bucket);
CompactManager compactManager =
skipCompaction
? new NoopCompactManager()
: new AppendOnlyCompactManager(
compactExecutor,
restoredFiles,
compactionMinFileNum,
compactionMaxFileNum,
targetFileSize,
compactRewriter(partition, bucket),
getCompactionMetrics(partition, bucket));
return new AppendOnlyWriter(
fileIO,
ioManager,
schemaId,
fileFormat,
targetFileSize,
rowType,
maxSequenceNumber,
compactManager,
commitForceCompact,
factory,
restoreIncrement,
useWriteBuffer,
spillable,
fileCompression,
statsCollectors,
getWriterMetrics(partition, bucket));
}
public AppendOnlyCompactManager.CompactRewriter compactRewriter(
BinaryRow partition, int bucket) {
return toCompact -> {
if (toCompact.isEmpty()) {
return Collections.emptyList();
}
RowDataRollingFileWriter rewriter =
new RowDataRollingFileWriter(
fileIO,
schemaId,
fileFormat,
targetFileSize,
rowType,
pathFactory.createDataFilePathFactory(partition, bucket),
new LongCounter(toCompact.get(0).minSequenceNumber()),
fileCompression,
statsCollectors);
try {
rewriter.write(
new RecordReaderIterator<>(
read.createReader(
DataSplit.builder()
.withPartition(partition)
.withBucket(bucket)
.withDataFiles(toCompact)
.build())));
} finally {
rewriter.close();
}
return rewriter.result();
};
}
public AppendOnlyFileStoreWrite withBucketMode(BucketMode bucketMode) {
// AppendOnlyFileStoreWrite is sensitive with bucket mode. It will act difference in
// unaware-bucket mode (no compaction and force empty-writer).
this.bucketMode = bucketMode;
if (bucketMode == BucketMode.UNAWARE) {
super.withIgnorePreviousFiles(true);
skipCompaction = true;
}
return this;
}
@Override
public void withIgnorePreviousFiles(boolean ignorePrevious) {
// in unaware bucket mode, we need all writers to be empty
super.withIgnorePreviousFiles(ignorePrevious || bucketMode == BucketMode.UNAWARE);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy