org.dinky.shaded.paimon.table.system.FilesTable Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dinky.shaded.paimon.table.system;
import org.dinky.shaded.paimon.data.BinaryString;
import org.dinky.shaded.paimon.data.InternalRow;
import org.dinky.shaded.paimon.data.LazyGenericRow;
import org.dinky.shaded.paimon.disk.IOManager;
import org.dinky.shaded.paimon.format.FieldStats;
import org.dinky.shaded.paimon.io.DataFileMeta;
import org.dinky.shaded.paimon.io.DataFilePathFactory;
import org.dinky.shaded.paimon.predicate.Predicate;
import org.dinky.shaded.paimon.reader.RecordReader;
import org.dinky.shaded.paimon.schema.SchemaManager;
import org.dinky.shaded.paimon.schema.TableSchema;
import org.dinky.shaded.paimon.stats.BinaryTableStats;
import org.dinky.shaded.paimon.stats.FieldStatsArraySerializer;
import org.dinky.shaded.paimon.stats.FieldStatsConverters;
import org.dinky.shaded.paimon.table.FileStoreTable;
import org.dinky.shaded.paimon.table.ReadonlyTable;
import org.dinky.shaded.paimon.table.Table;
import org.dinky.shaded.paimon.table.source.DataSplit;
import org.dinky.shaded.paimon.table.source.InnerTableRead;
import org.dinky.shaded.paimon.table.source.InnerTableScan;
import org.dinky.shaded.paimon.table.source.ReadOnceTableScan;
import org.dinky.shaded.paimon.table.source.Split;
import org.dinky.shaded.paimon.table.source.TableRead;
import org.dinky.shaded.paimon.table.source.TableScan;
import org.dinky.shaded.paimon.types.BigIntType;
import org.dinky.shaded.paimon.types.DataField;
import org.dinky.shaded.paimon.types.DataTypes;
import org.dinky.shaded.paimon.types.IntType;
import org.dinky.shaded.paimon.types.RowType;
import org.dinky.shaded.paimon.utils.IteratorRecordReader;
import org.dinky.shaded.paimon.utils.ProjectedRow;
import org.dinky.shaded.paimon.utils.RowDataToObjectArrayConverter;
import org.dinky.shaded.paimon.utils.SerializationUtils;
import org.dinky.shaded.paimon.shade.guava30.com.google.common.collect.Iterators;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.function.Supplier;
import static org.dinky.shaded.paimon.catalog.Catalog.SYSTEM_TABLE_SPLITTER;
/** A {@link Table} for showing files of a snapshot in specific table. */
public class FilesTable implements ReadonlyTable {
private static final long serialVersionUID = 1L;
public static final String FILES = "files";
public static final RowType TABLE_TYPE =
new RowType(
Arrays.asList(
new DataField(0, "partition", SerializationUtils.newStringType(true)),
new DataField(1, "bucket", new IntType(false)),
new DataField(2, "file_path", SerializationUtils.newStringType(false)),
new DataField(
3, "file_format", SerializationUtils.newStringType(false)),
new DataField(4, "schema_id", new BigIntType(false)),
new DataField(5, "level", new IntType(false)),
new DataField(6, "record_count", new BigIntType(false)),
new DataField(7, "file_size_in_bytes", new BigIntType(false)),
new DataField(8, "min_key", SerializationUtils.newStringType(true)),
new DataField(9, "max_key", SerializationUtils.newStringType(true)),
new DataField(
10,
"null_value_counts",
SerializationUtils.newStringType(false)),
new DataField(
11, "min_value_stats", SerializationUtils.newStringType(false)),
new DataField(
12, "max_value_stats", SerializationUtils.newStringType(false)),
new DataField(13, "min_sequence_number", new BigIntType(true)),
new DataField(14, "max_sequence_number", new BigIntType(true)),
new DataField(15, "creation_time", DataTypes.TIMESTAMP_MILLIS())));
private final FileStoreTable storeTable;
public FilesTable(FileStoreTable storeTable) {
this.storeTable = storeTable;
}
@Override
public String name() {
return storeTable.name() + SYSTEM_TABLE_SPLITTER + FILES;
}
@Override
public RowType rowType() {
return TABLE_TYPE;
}
@Override
public List primaryKeys() {
return Collections.singletonList("file_path");
}
@Override
public InnerTableScan newScan() {
return new FilesScan(storeTable);
}
@Override
public InnerTableRead newRead() {
return new FilesRead(new SchemaManager(storeTable.fileIO(), storeTable.location()));
}
@Override
public Table copy(Map dynamicOptions) {
return new FilesTable(storeTable.copy(dynamicOptions));
}
private static class FilesScan extends ReadOnceTableScan {
private final FileStoreTable storeTable;
private FilesScan(FileStoreTable storeTable) {
this.storeTable = storeTable;
}
@Override
public InnerTableScan withFilter(Predicate predicate) {
// TODO
return this;
}
@Override
public Plan innerPlan() {
return () -> Collections.singletonList(new FilesSplit(storeTable));
}
}
private static class FilesSplit implements Split {
private static final long serialVersionUID = 1L;
private final FileStoreTable storeTable;
private FilesSplit(FileStoreTable storeTable) {
this.storeTable = storeTable;
}
@Override
public long rowCount() {
TableScan.Plan plan = plan();
return plan.splits().stream()
.map(s -> (DataSplit) s)
.mapToLong(s -> s.dataFiles().size())
.sum();
}
private TableScan.Plan plan() {
return storeTable.newScan().plan();
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
FilesSplit that = (FilesSplit) o;
return Objects.equals(storeTable, that.storeTable);
}
@Override
public int hashCode() {
return Objects.hash(storeTable);
}
}
private static class FilesRead implements InnerTableRead {
private final SchemaManager schemaManager;
private int[][] projection;
private FilesRead(SchemaManager schemaManager) {
this.schemaManager = schemaManager;
}
@Override
public InnerTableRead withFilter(Predicate predicate) {
// TODO
return this;
}
@Override
public InnerTableRead withProjection(int[][] projection) {
this.projection = projection;
return this;
}
@Override
public TableRead withIOManager(IOManager ioManager) {
return this;
}
@Override
public RecordReader createReader(Split split) throws IOException {
if (!(split instanceof FilesSplit)) {
throw new IllegalArgumentException("Unsupported split: " + split.getClass());
}
FilesSplit filesSplit = (FilesSplit) split;
FileStoreTable table = filesSplit.storeTable;
TableScan.Plan plan = filesSplit.plan();
if (plan.splits().isEmpty()) {
return new IteratorRecordReader<>(Collections.emptyIterator());
}
List> iteratorList = new ArrayList<>();
// dataFilePlan.snapshotId indicates there's no files in the table, use the newest
// schema id directly
FieldStatsConverters fieldStatsConverters =
new FieldStatsConverters(
sid -> schemaManager.schema(sid).fields(), table.schema().id());
RowDataToObjectArrayConverter partitionConverter =
new RowDataToObjectArrayConverter(table.schema().logicalPartitionType());
Function keyConverters =
new Function() {
final Map keyConverterMap =
new HashMap<>();
@Override
public RowDataToObjectArrayConverter apply(Long schemaId) {
return keyConverterMap.computeIfAbsent(
schemaId,
k -> {
TableSchema dataSchema = schemaManager.schema(schemaId);
RowType keysType =
dataSchema.logicalTrimmedPrimaryKeysType();
return keysType.getFieldCount() > 0
? new RowDataToObjectArrayConverter(
dataSchema.logicalTrimmedPrimaryKeysType())
: new RowDataToObjectArrayConverter(
dataSchema.logicalRowType());
});
}
};
for (Split dataSplit : plan.splits()) {
iteratorList.add(
Iterators.transform(
((DataSplit) dataSplit).dataFiles().iterator(),
file ->
toRow(
(DataSplit) dataSplit,
partitionConverter,
keyConverters,
file,
table.getSchemaFieldStats(file),
fieldStatsConverters)));
}
Iterator rows = Iterators.concat(iteratorList.iterator());
if (projection != null) {
rows =
Iterators.transform(
rows, row -> ProjectedRow.from(projection).replaceRow(row));
}
return new IteratorRecordReader<>(rows);
}
private LazyGenericRow toRow(
DataSplit dataSplit,
RowDataToObjectArrayConverter partitionConverter,
Function keyConverters,
DataFileMeta dataFileMeta,
BinaryTableStats tableStats,
FieldStatsConverters fieldStatsConverters) {
StatsLazyGetter statsGetter =
new StatsLazyGetter(tableStats, dataFileMeta, fieldStatsConverters);
@SuppressWarnings("unchecked")
Supplier