![JAR search and dependency download from the Maven repository](/logo.png)
org.dinky.shaded.paimon.operation.AppendOnlyFileStoreRead Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dinky.shaded.paimon.operation;
import org.dinky.shaded.paimon.AppendOnlyFileStore;
import org.dinky.shaded.paimon.data.BinaryRow;
import org.dinky.shaded.paimon.data.InternalRow;
import org.dinky.shaded.paimon.format.FileFormatDiscover;
import org.dinky.shaded.paimon.format.FormatKey;
import org.dinky.shaded.paimon.fs.FileIO;
import org.dinky.shaded.paimon.io.DataFileMeta;
import org.dinky.shaded.paimon.io.DataFilePathFactory;
import org.dinky.shaded.paimon.io.RowDataFileRecordReader;
import org.dinky.shaded.paimon.mergetree.compact.ConcatRecordReader;
import org.dinky.shaded.paimon.partition.PartitionUtils;
import org.dinky.shaded.paimon.predicate.Predicate;
import org.dinky.shaded.paimon.reader.RecordReader;
import org.dinky.shaded.paimon.schema.IndexCastMapping;
import org.dinky.shaded.paimon.schema.SchemaEvolutionUtil;
import org.dinky.shaded.paimon.schema.SchemaManager;
import org.dinky.shaded.paimon.schema.TableSchema;
import org.dinky.shaded.paimon.table.source.DataSplit;
import org.dinky.shaded.paimon.types.RowType;
import org.dinky.shaded.paimon.utils.BulkFormatMapping;
import org.dinky.shaded.paimon.utils.FileStorePathFactory;
import org.dinky.shaded.paimon.utils.Pair;
import org.dinky.shaded.paimon.utils.Projection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.dinky.shaded.paimon.predicate.PredicateBuilder.splitAnd;
/** {@link FileStoreRead} for {@link AppendOnlyFileStore}. */
public class AppendOnlyFileStoreRead implements FileStoreRead {
private static final Logger LOG = LoggerFactory.getLogger(AppendOnlyFileStoreRead.class);
private final FileIO fileIO;
private final SchemaManager schemaManager;
private final long schemaId;
private final FileFormatDiscover formatDiscover;
private final FileStorePathFactory pathFactory;
private final Map bulkFormatMappings;
private int[][] projection;
@Nullable private List filters;
public AppendOnlyFileStoreRead(
FileIO fileIO,
SchemaManager schemaManager,
long schemaId,
RowType rowType,
FileFormatDiscover formatDiscover,
FileStorePathFactory pathFactory) {
this.fileIO = fileIO;
this.schemaManager = schemaManager;
this.schemaId = schemaId;
this.formatDiscover = formatDiscover;
this.pathFactory = pathFactory;
this.bulkFormatMappings = new HashMap<>();
this.projection = Projection.range(0, rowType.getFieldCount()).toNestedIndexes();
}
public FileStoreRead withProjection(int[][] projectedFields) {
projection = projectedFields;
return this;
}
@Override
public FileStoreRead withFilter(Predicate predicate) {
this.filters = splitAnd(predicate);
return this;
}
@Override
public RecordReader createReader(DataSplit split) throws IOException {
DataFilePathFactory dataFilePathFactory =
pathFactory.createDataFilePathFactory(split.partition(), split.bucket());
List> suppliers = new ArrayList<>();
if (split.beforeFiles().size() > 0) {
LOG.info("Ignore split before files: " + split.beforeFiles());
}
for (DataFileMeta file : split.dataFiles()) {
String formatIdentifier = DataFilePathFactory.formatIdentifier(file.fileName());
BulkFormatMapping bulkFormatMapping =
bulkFormatMappings.computeIfAbsent(
new FormatKey(file.schemaId(), formatIdentifier),
key -> {
TableSchema tableSchema = schemaManager.schema(this.schemaId);
TableSchema dataSchema = schemaManager.schema(key.schemaId);
// projection to data schema
int[][] dataProjection =
SchemaEvolutionUtil.createDataProjection(
tableSchema.fields(),
dataSchema.fields(),
projection);
IndexCastMapping indexCastMapping =
SchemaEvolutionUtil.createIndexCastMapping(
Projection.of(projection).toTopLevelIndexes(),
tableSchema.fields(),
Projection.of(dataProjection).toTopLevelIndexes(),
dataSchema.fields());
List dataFilters =
this.schemaId == key.schemaId
? filters
: SchemaEvolutionUtil.createDataFilters(
tableSchema.fields(),
dataSchema.fields(),
filters);
Pair partitionPair = null;
if (!dataSchema.partitionKeys().isEmpty()) {
Pair partitionMappping =
PartitionUtils.constructPartitionMapping(
dataSchema, dataProjection);
// if partition fields are not selected, we just do nothing
if (partitionMappping != null) {
dataProjection = partitionMappping.getRight();
partitionPair =
Pair.of(
partitionMappping.getLeft(),
dataSchema.projectedLogicalRowType(
dataSchema.partitionKeys()));
}
}
RowType projectedRowType =
Projection.of(dataProjection)
.project(dataSchema.logicalRowType());
return new BulkFormatMapping(
indexCastMapping.getIndexMapping(),
indexCastMapping.getCastMapping(),
partitionPair,
formatDiscover
.discover(formatIdentifier)
.createReaderFactory(
projectedRowType, dataFilters));
});
final BinaryRow partition = split.partition();
suppliers.add(
() ->
new RowDataFileRecordReader(
fileIO,
dataFilePathFactory.toPath(file.fileName()),
bulkFormatMapping.getReaderFactory(),
bulkFormatMapping.getIndexMapping(),
bulkFormatMapping.getCastMapping(),
PartitionUtils.create(
bulkFormatMapping.getPartitionPair(), partition)));
}
return ConcatRecordReader.create(suppliers);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy