com.facebook.presto.iceberg.FilesTable Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of presto-iceberg Show documentation
Show all versions of presto-iceberg Show documentation
Presto - Iceberg Connector
The newest version!
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.iceberg;
import com.facebook.presto.common.Page;
import com.facebook.presto.common.predicate.TupleDomain;
import com.facebook.presto.common.type.ArrayType;
import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.common.type.TypeSignatureParameter;
import com.facebook.presto.iceberg.util.PageListBuilder;
import com.facebook.presto.spi.ColumnMetadata;
import com.facebook.presto.spi.ConnectorPageSource;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.ConnectorTableMetadata;
import com.facebook.presto.spi.FixedPageSource;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.SchemaTableName;
import com.facebook.presto.spi.SystemTable;
import com.facebook.presto.spi.connector.ConnectorTransactionHandle;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.airlift.slice.Slices;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableScan;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.transforms.Transforms;
import org.apache.iceberg.types.Conversions;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.IntegerType.INTEGER;
import static com.facebook.presto.common.type.VarbinaryType.VARBINARY;
import static com.facebook.presto.common.type.VarcharType.VARCHAR;
import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR;
import static com.facebook.presto.iceberg.IcebergUtil.getTableScan;
import static com.facebook.presto.iceberg.util.PageListBuilder.forTable;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static java.util.Objects.requireNonNull;
public class FilesTable
implements SystemTable
{
private final ConnectorTableMetadata tableMetadata;
private final Table icebergTable;
private final Optional snapshotId;
public FilesTable(SchemaTableName tableName, Table icebergTable, Optional snapshotId, TypeManager typeManager)
{
this.icebergTable = requireNonNull(icebergTable, "icebergTable is null");
tableMetadata = new ConnectorTableMetadata(requireNonNull(tableName, "tableName is null"),
ImmutableList.builder()
.add(new ColumnMetadata("content", INTEGER))
.add(new ColumnMetadata("file_path", VARCHAR))
.add(new ColumnMetadata("file_format", VARCHAR))
.add(new ColumnMetadata("record_count", BIGINT))
.add(new ColumnMetadata("file_size_in_bytes", BIGINT))
.add(new ColumnMetadata("column_sizes", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(
TypeSignatureParameter.of(INTEGER.getTypeSignature()),
TypeSignatureParameter.of(BIGINT.getTypeSignature())))))
.add(new ColumnMetadata("value_counts", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(
TypeSignatureParameter.of(INTEGER.getTypeSignature()),
TypeSignatureParameter.of(BIGINT.getTypeSignature())))))
.add(new ColumnMetadata("null_value_counts", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(
TypeSignatureParameter.of(INTEGER.getTypeSignature()),
TypeSignatureParameter.of(BIGINT.getTypeSignature())))))
.add(new ColumnMetadata("nan_value_counts", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(
TypeSignatureParameter.of(INTEGER.getTypeSignature()),
TypeSignatureParameter.of(BIGINT.getTypeSignature())))))
.add(new ColumnMetadata("lower_bounds", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(
TypeSignatureParameter.of(INTEGER.getTypeSignature()),
TypeSignatureParameter.of(VARCHAR.getTypeSignature())))))
.add(new ColumnMetadata("upper_bounds", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(
TypeSignatureParameter.of(INTEGER.getTypeSignature()),
TypeSignatureParameter.of(VARCHAR.getTypeSignature())))))
.add(new ColumnMetadata("key_metadata", VARBINARY))
.add(new ColumnMetadata("split_offsets", new ArrayType(BIGINT)))
.add(new ColumnMetadata("equality_ids", new ArrayType(INTEGER)))
.build());
this.snapshotId = requireNonNull(snapshotId, "snapshotId is null");
}
@Override
public Distribution getDistribution()
{
return Distribution.SINGLE_COORDINATOR;
}
@Override
public ConnectorTableMetadata getTableMetadata()
{
return tableMetadata;
}
@Override
public ConnectorPageSource pageSource(ConnectorTransactionHandle transactionHandle, ConnectorSession session, TupleDomain constraint)
{
return new FixedPageSource(buildPages(tableMetadata, icebergTable, snapshotId));
}
private static List buildPages(ConnectorTableMetadata tableMetadata, Table icebergTable, Optional snapshotId)
{
PageListBuilder pagesBuilder = forTable(tableMetadata);
TableScan tableScan = getTableScan(TupleDomain.all(), snapshotId, icebergTable).includeColumnStats();
Map idToTypeMap = getIdToTypeMap(icebergTable.schema());
try (CloseableIterable fileScanTasks = tableScan.planFiles()) {
for (FileScanTask fileScanTask : fileScanTasks) {
DataFile dataFile = fileScanTask.file();
pagesBuilder.beginRow();
pagesBuilder.appendInteger(dataFile.content().id());
pagesBuilder.appendVarchar(dataFile.path().toString());
pagesBuilder.appendVarchar(dataFile.format().name());
pagesBuilder.appendBigint(dataFile.recordCount());
pagesBuilder.appendBigint(dataFile.fileSizeInBytes());
if (checkNonNull(dataFile.columnSizes(), pagesBuilder)) {
pagesBuilder.appendIntegerBigintMap(dataFile.columnSizes());
}
if (checkNonNull(dataFile.valueCounts(), pagesBuilder)) {
pagesBuilder.appendIntegerBigintMap(dataFile.valueCounts());
}
if (checkNonNull(dataFile.nullValueCounts(), pagesBuilder)) {
pagesBuilder.appendIntegerBigintMap(dataFile.nullValueCounts());
}
if (checkNonNull(dataFile.nanValueCounts(), pagesBuilder)) {
pagesBuilder.appendIntegerBigintMap(dataFile.nanValueCounts());
}
if (checkNonNull(dataFile.lowerBounds(), pagesBuilder)) {
pagesBuilder.appendIntegerVarcharMap(dataFile.lowerBounds().entrySet().stream()
.filter(entry -> idToTypeMap.containsKey(entry.getKey()))
.collect(toImmutableMap(
Map.Entry::getKey,
entry -> Transforms.identity().toHumanString(idToTypeMap.get(entry.getKey()),
Conversions.fromByteBuffer(idToTypeMap.get(entry.getKey()), entry.getValue())))));
}
if (checkNonNull(dataFile.upperBounds(), pagesBuilder)) {
pagesBuilder.appendIntegerVarcharMap(dataFile.upperBounds().entrySet().stream()
.filter(entry -> idToTypeMap.containsKey(entry.getKey()))
.collect(toImmutableMap(
Map.Entry::getKey,
entry -> Transforms.identity().toHumanString(idToTypeMap.get(entry.getKey()),
Conversions.fromByteBuffer(idToTypeMap.get(entry.getKey()), entry.getValue())))));
}
if (checkNonNull(dataFile.keyMetadata(), pagesBuilder)) {
pagesBuilder.appendVarbinary(Slices.wrappedBuffer(dataFile.keyMetadata()));
}
if (checkNonNull(dataFile.splitOffsets(), pagesBuilder)) {
pagesBuilder.appendBigintArray(dataFile.splitOffsets());
}
if (checkNonNull(dataFile.equalityFieldIds(), pagesBuilder)) {
pagesBuilder.appendIntegerArray(dataFile.equalityFieldIds());
}
pagesBuilder.endRow();
}
}
catch (IOException e) {
throw new PrestoException(ICEBERG_FILESYSTEM_ERROR, "failed to read table files", e);
}
return pagesBuilder.build();
}
private static Map getIdToTypeMap(Schema schema)
{
ImmutableMap.Builder idToTypeMap = ImmutableMap.builder();
for (Types.NestedField field : schema.columns()) {
populateIdToTypeMap(field, idToTypeMap);
}
return idToTypeMap.build();
}
private static void populateIdToTypeMap(Types.NestedField field, ImmutableMap.Builder idToTypeMap)
{
Type type = field.type();
idToTypeMap.put(field.fieldId(), type);
if (type instanceof Type.NestedType) {
type.asNestedType().fields().forEach(child -> populateIdToTypeMap(child, idToTypeMap));
}
}
private static boolean checkNonNull(Object object, PageListBuilder pagesBuilder)
{
if (object == null) {
pagesBuilder.appendNull();
return false;
}
return true;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy