com.facebook.presto.iceberg.PartitionTable Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of presto-iceberg Show documentation
Show all versions of presto-iceberg Show documentation
Presto - Iceberg Connector
The newest version!
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.iceberg;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.block.BlockBuilder;
import com.facebook.presto.common.predicate.TupleDomain;
import com.facebook.presto.common.type.RowType;
import com.facebook.presto.common.type.TimestampType;
import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.common.type.TypeUtils;
import com.facebook.presto.spi.ColumnMetadata;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.ConnectorTableMetadata;
import com.facebook.presto.spi.InMemoryRecordSet;
import com.facebook.presto.spi.RecordCursor;
import com.facebook.presto.spi.SchemaTableName;
import com.facebook.presto.spi.SystemTable;
import com.facebook.presto.spi.connector.ConnectorTransactionHandle;
import com.google.common.collect.ImmutableList;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.PartitionField;
import org.apache.iceberg.Schema;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableScan;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.StructLikeWrapper;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.iceberg.IcebergUtil.getIdentityPartitions;
import static com.facebook.presto.iceberg.TypeConverter.toPrestoType;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.TimeUnit.MICROSECONDS;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.stream.Collectors.toSet;
public class PartitionTable
implements SystemTable
{
private final TypeManager typeManager;
private final Table icebergTable;
private final Optional snapshotId;
private final Map idToTypeMapping;
private final List nonPartitionPrimitiveColumns;
private final List partitionColumnTypes;
private final List resultTypes;
private final List columnMetricTypes;
private final ConnectorTableMetadata connectorTableMetadata;
public PartitionTable(SchemaTableName tableName, TypeManager typeManager, Table icebergTable, Optional snapshotId)
{
this.typeManager = requireNonNull(typeManager, "typeManager is null");
this.icebergTable = requireNonNull(icebergTable, "icebergTable is null");
this.snapshotId = requireNonNull(snapshotId, "snapshotId is null");
this.idToTypeMapping = icebergTable.schema().columns().stream()
.filter(column -> column.type().isPrimitiveType())
.collect(Collectors.toMap(Types.NestedField::fieldId, (column) -> column.type().asPrimitiveType()));
List columns = icebergTable.schema().columns();
List partitionFields = icebergTable.spec().fields();
ImmutableList.Builder columnMetadataBuilder = ImmutableList.builder();
List partitionColumnsMetadata = getPartitionColumnsMetadata(partitionFields, icebergTable.schema());
this.partitionColumnTypes = partitionColumnsMetadata.stream()
.map(ColumnMetadata::getType)
.collect(toImmutableList());
columnMetadataBuilder.addAll(partitionColumnsMetadata);
Set identityPartitionIds = getIdentityPartitions(icebergTable.spec()).keySet().stream()
.map(PartitionField::sourceId)
.collect(toSet());
this.nonPartitionPrimitiveColumns = columns.stream()
.filter(column -> !identityPartitionIds.contains(column.fieldId()) && column.type().isPrimitiveType())
.collect(toImmutableList());
ImmutableList.of("row_count", "file_count", "total_size")
.forEach(metric -> columnMetadataBuilder.add(new ColumnMetadata(metric, BIGINT)));
List columnMetricsMetadata = getColumnMetadata(nonPartitionPrimitiveColumns);
columnMetadataBuilder.addAll(columnMetricsMetadata);
this.columnMetricTypes = columnMetricsMetadata.stream().map(m -> (RowType) m.getType()).collect(toImmutableList());
ImmutableList columnMetadata = columnMetadataBuilder.build();
this.resultTypes = columnMetadata.stream()
.map(ColumnMetadata::getType)
.collect(toImmutableList());
this.connectorTableMetadata = new ConnectorTableMetadata(tableName, columnMetadata);
}
@Override
public Distribution getDistribution()
{
return Distribution.SINGLE_COORDINATOR;
}
@Override
public ConnectorTableMetadata getTableMetadata()
{
return connectorTableMetadata;
}
private List getPartitionColumnsMetadata(List fields, Schema schema)
{
return fields.stream()
.map(field -> new ColumnMetadata(
field.name(),
toPrestoType(field.transform().getResultType(schema.findType(field.sourceId())), typeManager)))
.collect(toImmutableList());
}
private List getColumnMetadata(List columns)
{
return columns.stream().map(column -> new ColumnMetadata(column.name(),
RowType.from(ImmutableList.of(
new RowType.Field(Optional.of("min"), toPrestoType(column.type(), typeManager)),
new RowType.Field(Optional.of("max"), toPrestoType(column.type(), typeManager)),
new RowType.Field(Optional.of("null_count"), BIGINT)))))
.collect(toImmutableList());
}
@Override
public RecordCursor cursor(ConnectorTransactionHandle transactionHandle, ConnectorSession session, TupleDomain constraint)
{
// TODO instead of cursor use pageSource method.
if (!snapshotId.isPresent()) {
return new InMemoryRecordSet(resultTypes, ImmutableList.of()).cursor();
}
TableScan tableScan = icebergTable.newScan()
.useSnapshot(snapshotId.get())
.includeColumnStats();
return buildRecordCursor(getPartitions(tableScan), icebergTable.spec().fields());
}
private Map getPartitions(TableScan tableScan)
{
try (CloseableIterable fileScanTasks = tableScan.planFiles()) {
Map partitions = new HashMap<>();
for (FileScanTask fileScanTask : fileScanTasks) {
DataFile dataFile = fileScanTask.file();
Types.StructType structType = fileScanTask.spec().partitionType();
StructLike partitionStruct = dataFile.partition();
StructLikeWrapper partitionWrapper = StructLikeWrapper.forType(structType).set(partitionStruct);
if (!partitions.containsKey(partitionWrapper)) {
Partition partition = new Partition(
idToTypeMapping,
nonPartitionPrimitiveColumns,
partitionStruct,
dataFile.recordCount(),
dataFile.fileSizeInBytes(),
toMap(dataFile.lowerBounds()),
toMap(dataFile.upperBounds()),
dataFile.nullValueCounts(),
dataFile.columnSizes());
partitions.put(partitionWrapper, partition);
continue;
}
Partition partition = partitions.get(partitionWrapper);
partition.incrementFileCount();
partition.incrementRecordCount(dataFile.recordCount());
partition.incrementSize(dataFile.fileSizeInBytes());
partition.updateMin(toMap(dataFile.lowerBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
partition.updateMax(toMap(dataFile.upperBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
partition.updateNullCount(dataFile.nullValueCounts());
}
return partitions;
}
catch (IOException e) {
throw new UncheckedIOException(e);
}
}
private RecordCursor buildRecordCursor(Map partitions, List partitionFields)
{
List partitionTypes = partitionTypes(partitionFields);
List extends Class>> partitionColumnClass = partitionTypes.stream()
.map(type -> type.typeId().javaClass())
.collect(toImmutableList());
int columnCounts = partitionColumnTypes.size() + 3 + columnMetricTypes.size();
ImmutableList.Builder> records = ImmutableList.builder();
for (Partition partition : partitions.values()) {
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy