![JAR search and dependency download from the Maven repository](/logo.png)
org.dinky.shaded.paimon.table.PrimaryKeyFileStoreTable Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dinky.shaded.paimon.table;
import org.dinky.shaded.paimon.CoreOptions;
import org.dinky.shaded.paimon.CoreOptions.ChangelogProducer;
import org.dinky.shaded.paimon.KeyValue;
import org.dinky.shaded.paimon.KeyValueFileStore;
import org.dinky.shaded.paimon.data.InternalRow;
import org.dinky.shaded.paimon.fs.FileIO;
import org.dinky.shaded.paimon.fs.Path;
import org.dinky.shaded.paimon.manifest.ManifestCacheFilter;
import org.dinky.shaded.paimon.mergetree.compact.LookupMergeFunction;
import org.dinky.shaded.paimon.mergetree.compact.MergeFunctionFactory;
import org.dinky.shaded.paimon.operation.FileStoreScan;
import org.dinky.shaded.paimon.operation.KeyValueFileStoreScan;
import org.dinky.shaded.paimon.operation.Lock;
import org.dinky.shaded.paimon.options.Options;
import org.dinky.shaded.paimon.predicate.Predicate;
import org.dinky.shaded.paimon.reader.RecordReader;
import org.dinky.shaded.paimon.schema.KeyValueFieldsExtractor;
import org.dinky.shaded.paimon.schema.TableSchema;
import org.dinky.shaded.paimon.table.sink.SequenceGenerator;
import org.dinky.shaded.paimon.table.sink.TableWriteImpl;
import org.dinky.shaded.paimon.table.source.InnerTableRead;
import org.dinky.shaded.paimon.table.source.KeyValueTableRead;
import org.dinky.shaded.paimon.table.source.MergeTreeSplitGenerator;
import org.dinky.shaded.paimon.table.source.SplitGenerator;
import org.dinky.shaded.paimon.table.source.ValueContentRowDataRecordIterator;
import org.dinky.shaded.paimon.types.RowType;
import java.util.List;
import java.util.function.BiConsumer;
import static org.dinky.shaded.paimon.predicate.PredicateBuilder.and;
import static org.dinky.shaded.paimon.predicate.PredicateBuilder.pickTransformFieldMapping;
import static org.dinky.shaded.paimon.predicate.PredicateBuilder.splitAnd;
/** {@link FileStoreTable} for primary key table. */
public class PrimaryKeyFileStoreTable extends AbstractFileStoreTable {
private static final long serialVersionUID = 1L;
private transient KeyValueFileStore lazyStore;
PrimaryKeyFileStoreTable(FileIO fileIO, Path path, TableSchema tableSchema) {
this(fileIO, path, tableSchema, new CatalogEnvironment(Lock.emptyFactory(), null, null));
}
PrimaryKeyFileStoreTable(
FileIO fileIO,
Path path,
TableSchema tableSchema,
CatalogEnvironment catalogEnvironment) {
super(fileIO, path, tableSchema, catalogEnvironment);
}
@Override
protected FileStoreTable copy(TableSchema newTableSchema) {
return new PrimaryKeyFileStoreTable(fileIO, path, newTableSchema, catalogEnvironment);
}
@Override
public KeyValueFileStore store() {
if (lazyStore == null) {
RowType rowType = tableSchema.logicalRowType();
Options conf = Options.fromMap(tableSchema.options());
CoreOptions options = new CoreOptions(conf);
KeyValueFieldsExtractor extractor =
PrimaryKeyTableUtils.PrimaryKeyFieldsExtractor.EXTRACTOR;
MergeFunctionFactory mfFactory =
PrimaryKeyTableUtils.createMergeFunctionFactory(tableSchema, extractor);
if (options.changelogProducer() == ChangelogProducer.LOOKUP) {
mfFactory =
LookupMergeFunction.wrap(
mfFactory, new RowType(extractor.keyFields(tableSchema)), rowType);
}
lazyStore =
new KeyValueFileStore(
fileIO(),
schemaManager(),
tableSchema.id(),
tableSchema.crossPartitionUpdate(),
options,
tableSchema.logicalPartitionType(),
PrimaryKeyTableUtils.addKeyNamePrefix(
tableSchema.logicalBucketKeyType()),
new RowType(extractor.keyFields(tableSchema)),
rowType,
extractor,
mfFactory,
name());
}
return lazyStore;
}
@Override
public SplitGenerator splitGenerator() {
return new MergeTreeSplitGenerator(
store().newKeyComparator(),
store().options().splitTargetSize(),
store().options().splitOpenFileCost());
}
@Override
public boolean supportStreamingReadOverwrite() {
return new CoreOptions(tableSchema.options()).streamingReadOverwrite();
}
@Override
public BiConsumer nonPartitionFilterConsumer() {
return (scan, predicate) -> {
// currently we can only perform filter push down on keys
// consider this case:
// data file 1: insert key = a, value = 1
// data file 2: update key = a, value = 2
// filter: value = 1
// if we perform filter push down on values, data file 1 will be chosen, but data
// file 2 will be ignored, and the final result will be key = a, value = 1 while the
// correct result is an empty set
List keyFilters =
pickTransformFieldMapping(
splitAnd(predicate),
tableSchema.fieldNames(),
tableSchema.trimmedPrimaryKeys());
if (keyFilters.size() > 0) {
((KeyValueFileStoreScan) scan).withKeyFilter(and(keyFilters));
}
// support value filter in bucket level
((KeyValueFileStoreScan) scan).withValueFilter(predicate);
};
}
@Override
public InnerTableRead newRead() {
return new KeyValueTableRead(store().newRead(), schema()) {
@Override
public void projection(int[][] projection) {
read.withValueProjection(projection);
}
@Override
protected RecordReader.RecordIterator rowDataRecordIteratorFromKv(
RecordReader.RecordIterator kvRecordIterator) {
return new ValueContentRowDataRecordIterator(kvRecordIterator);
}
@Override
public InnerTableRead forceKeepDelete() {
read.forceKeepDelete();
return this;
}
};
}
@Override
public TableWriteImpl newWrite(String commitUser) {
return newWrite(commitUser, null);
}
@Override
public TableWriteImpl newWrite(
String commitUser, ManifestCacheFilter manifestFilter) {
final SequenceGenerator sequenceGenerator =
SequenceGenerator.create(schema(), store().options());
final KeyValue kv = new KeyValue();
return new TableWriteImpl<>(
store().newWrite(commitUser, manifestFilter),
createRowKeyExtractor(),
record -> {
long sequenceNumber =
sequenceGenerator == null
? KeyValue.UNKNOWN_SEQUENCE
: sequenceGenerator.generate(record.row());
return kv.replace(
record.primaryKey(),
sequenceNumber,
record.row().getRowKind(),
record.row());
},
name());
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy