
org.dinky.shaded.paimon.io.KeyValueDataFileWriter Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dinky.shaded.paimon.io;
import org.dinky.shaded.paimon.CoreOptions;
import org.dinky.shaded.paimon.KeyValue;
import org.dinky.shaded.paimon.data.BinaryRow;
import org.dinky.shaded.paimon.data.InternalRow;
import org.dinky.shaded.paimon.data.serializer.InternalRowSerializer;
import org.dinky.shaded.paimon.format.FieldStats;
import org.dinky.shaded.paimon.format.FormatWriterFactory;
import org.dinky.shaded.paimon.format.TableStatsExtractor;
import org.dinky.shaded.paimon.fs.FileIO;
import org.dinky.shaded.paimon.fs.Path;
import org.dinky.shaded.paimon.stats.BinaryTableStats;
import org.dinky.shaded.paimon.stats.FieldStatsArraySerializer;
import org.dinky.shaded.paimon.types.RowType;
import org.dinky.shaded.paimon.utils.StatsCollectorFactories;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.Arrays;
import java.util.function.Function;
/**
* A {@link StatsCollectingSingleFileWriter} to write data files containing {@link KeyValue}s. Also
* produces {@link DataFileMeta} after writing a file.
*
* NOTE: records given to the writer must be sorted because it does not compare the min max keys
* to produce {@link DataFileMeta}.
*/
public class KeyValueDataFileWriter
extends StatsCollectingSingleFileWriter {
private static final Logger LOG = LoggerFactory.getLogger(KeyValueDataFileWriter.class);
private final RowType keyType;
private final RowType valueType;
private final long schemaId;
private final int level;
private final FieldStatsArraySerializer keyStatsConverter;
private final FieldStatsArraySerializer valueStatsConverter;
private final InternalRowSerializer keySerializer;
private BinaryRow minKey = null;
private InternalRow maxKey = null;
private long minSeqNumber = Long.MAX_VALUE;
private long maxSeqNumber = Long.MIN_VALUE;
public KeyValueDataFileWriter(
FileIO fileIO,
FormatWriterFactory factory,
Path path,
Function converter,
RowType keyType,
RowType valueType,
@Nullable TableStatsExtractor tableStatsExtractor,
long schemaId,
int level,
String compression,
CoreOptions options) {
super(
fileIO,
factory,
path,
converter,
KeyValue.schema(keyType, valueType),
tableStatsExtractor,
compression,
StatsCollectorFactories.createStatsFactories(
options, KeyValue.schema(keyType, valueType).getFieldNames()));
this.keyType = keyType;
this.valueType = valueType;
this.schemaId = schemaId;
this.level = level;
this.keyStatsConverter = new FieldStatsArraySerializer(keyType);
this.valueStatsConverter = new FieldStatsArraySerializer(valueType);
this.keySerializer = new InternalRowSerializer(keyType);
}
@Override
public void write(KeyValue kv) throws IOException {
super.write(kv);
updateMinKey(kv);
updateMaxKey(kv);
updateMinSeqNumber(kv);
updateMaxSeqNumber(kv);
if (LOG.isDebugEnabled()) {
LOG.debug("Write to Path " + path + " key value " + kv.toString(keyType, valueType));
}
}
private void updateMinKey(KeyValue kv) {
if (minKey == null) {
minKey = keySerializer.toBinaryRow(kv.key()).copy();
}
}
private void updateMaxKey(KeyValue kv) {
maxKey = kv.key();
}
private void updateMinSeqNumber(KeyValue kv) {
minSeqNumber = Math.min(minSeqNumber, kv.sequenceNumber());
}
private void updateMaxSeqNumber(KeyValue kv) {
maxSeqNumber = Math.max(maxSeqNumber, kv.sequenceNumber());
}
@Override
@Nullable
public DataFileMeta result() throws IOException {
if (recordCount() == 0) {
return null;
}
FieldStats[] rowStats = fieldStats();
int numKeyFields = keyType.getFieldCount();
FieldStats[] keyFieldStats = Arrays.copyOfRange(rowStats, 0, numKeyFields);
BinaryTableStats keyStats = keyStatsConverter.toBinary(keyFieldStats);
FieldStats[] valFieldStats =
Arrays.copyOfRange(rowStats, numKeyFields + 2, rowStats.length);
BinaryTableStats valueStats = valueStatsConverter.toBinary(valFieldStats);
return new DataFileMeta(
path.getName(),
fileIO.getFileSize(path),
recordCount(),
minKey,
keySerializer.toBinaryRow(maxKey).copy(),
keyStats,
valueStats,
minSeqNumber,
maxSeqNumber,
schemaId,
level);
}
}