com.facebook.presto.tpch.TpchMetadata Maven / Gradle / Ivy
The newest version!
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.tpch;
import com.facebook.presto.common.block.SortOrder;
import com.facebook.presto.common.predicate.Domain;
import com.facebook.presto.common.predicate.NullableValue;
import com.facebook.presto.common.predicate.TupleDomain;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.common.type.VarcharType;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.ColumnMetadata;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.ConnectorTableHandle;
import com.facebook.presto.spi.ConnectorTableLayout;
import com.facebook.presto.spi.ConnectorTableLayoutHandle;
import com.facebook.presto.spi.ConnectorTableLayoutResult;
import com.facebook.presto.spi.ConnectorTableMetadata;
import com.facebook.presto.spi.ConnectorTablePartitioning;
import com.facebook.presto.spi.Constraint;
import com.facebook.presto.spi.LocalProperty;
import com.facebook.presto.spi.SchemaTableName;
import com.facebook.presto.spi.SchemaTablePrefix;
import com.facebook.presto.spi.SortingProperty;
import com.facebook.presto.spi.connector.ConnectorMetadata;
import com.facebook.presto.spi.statistics.ColumnStatistics;
import com.facebook.presto.spi.statistics.ComputedStatistics;
import com.facebook.presto.spi.statistics.DoubleRange;
import com.facebook.presto.spi.statistics.Estimate;
import com.facebook.presto.spi.statistics.TableStatistics;
import com.facebook.presto.spi.statistics.TableStatisticsMetadata;
import com.facebook.presto.tpch.statistics.ColumnStatisticsData;
import com.facebook.presto.tpch.statistics.StatisticsEstimator;
import com.facebook.presto.tpch.statistics.TableStatisticsData;
import com.facebook.presto.tpch.statistics.TableStatisticsDataRepository;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import io.airlift.tpch.Distributions;
import io.airlift.tpch.LineItemColumn;
import io.airlift.tpch.OrderColumn;
import io.airlift.tpch.OrderGenerator;
import io.airlift.tpch.PartColumn;
import io.airlift.tpch.TpchColumn;
import io.airlift.tpch.TpchColumnType;
import io.airlift.tpch.TpchEntity;
import io.airlift.tpch.TpchTable;
import java.time.LocalDate;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.DateType.DATE;
import static com.facebook.presto.common.type.DoubleType.DOUBLE;
import static com.facebook.presto.common.type.IntegerType.INTEGER;
import static com.facebook.presto.common.type.VarcharType.createVarcharType;
import static com.facebook.presto.spi.statistics.SourceInfo.ConfidenceLevel.HIGH;
import static com.facebook.presto.spi.statistics.TableStatisticType.ROW_COUNT;
import static com.facebook.presto.tpch.util.PredicateUtils.convertToPredicate;
import static com.facebook.presto.tpch.util.PredicateUtils.filterOutColumnFromPredicate;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static com.google.common.collect.Maps.asMap;
import static io.airlift.tpch.OrderColumn.ORDER_STATUS;
import static java.util.Collections.emptyList;
import static java.util.Collections.emptyMap;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toSet;
public class TpchMetadata
implements ConnectorMetadata
{
public static final String TINY_SCHEMA_NAME = "tiny";
public static final double TINY_SCALE_FACTOR = 0.01;
public static final List SCHEMA_NAMES = ImmutableList.of(
TINY_SCHEMA_NAME, "sf1", "sf100", "sf300", "sf1000", "sf3000", "sf10000", "sf30000", "sf100000");
public static final String ROW_NUMBER_COLUMN_NAME = "row_number";
private static final Set ORDER_STATUS_VALUES = ImmutableSet.of("F", "O", "P").stream()
.map(Slices::utf8Slice)
.collect(toImmutableSet());
private static final Set ORDER_STATUS_NULLABLE_VALUES = ORDER_STATUS_VALUES.stream()
.map(value -> new NullableValue(getPrestoType(OrderColumn.ORDER_STATUS), value))
.collect(toSet());
private static final Set PART_TYPE_VALUES = Distributions.getDefaultDistributions().getPartTypes().getValues().stream()
.map(Slices::utf8Slice)
.collect(toImmutableSet());
private static final Set PART_TYPE_NULLABLE_VALUES = PART_TYPE_VALUES.stream()
.map(value -> new NullableValue(getPrestoType(PartColumn.TYPE), value))
.collect(toSet());
private static final Set PART_CONTAINER_VALUES = Distributions.getDefaultDistributions().getPartContainers().getValues().stream()
.map(Slices::utf8Slice)
.collect(toImmutableSet());
private static final Set PART_CONTAINER_NULLABLE_VALUES = PART_CONTAINER_VALUES.stream()
.map(value -> new NullableValue(getPrestoType(PartColumn.CONTAINER), value))
.collect(toSet());
private final String connectorId;
private final Set tableNames;
private final ColumnNaming columnNaming;
private final StatisticsEstimator statisticsEstimator;
private final boolean predicatePushdownEnabled;
private final boolean partitioningEnabled;
public TpchMetadata(String connectorId)
{
this(connectorId, ColumnNaming.SIMPLIFIED, true, true);
}
public TpchMetadata(String connectorId, ColumnNaming columnNaming, boolean predicatePushdownEnabled, boolean partitioningEnabled)
{
ImmutableSet.Builder tableNames = ImmutableSet.builder();
for (TpchTable> tpchTable : TpchTable.getTables()) {
tableNames.add(tpchTable.getTableName());
}
this.tableNames = tableNames.build();
this.connectorId = connectorId;
this.columnNaming = columnNaming;
this.predicatePushdownEnabled = predicatePushdownEnabled;
this.partitioningEnabled = partitioningEnabled;
this.statisticsEstimator = createStatisticsEstimator();
}
private static StatisticsEstimator createStatisticsEstimator()
{
ObjectMapper objectMapper = new ObjectMapper()
.registerModule(new Jdk8Module());
TableStatisticsDataRepository tableStatisticsDataRepository = new TableStatisticsDataRepository(objectMapper);
return new StatisticsEstimator(tableStatisticsDataRepository);
}
@Override
public List listSchemaNames(ConnectorSession session)
{
return SCHEMA_NAMES;
}
@Override
public TpchTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName)
{
requireNonNull(tableName, "tableName is null");
if (!tableNames.contains(tableName.getTableName())) {
return null;
}
// parse the scale factor
double scaleFactor = schemaNameToScaleFactor(tableName.getSchemaName());
if (scaleFactor < 0) {
return null;
}
return new TpchTableHandle(tableName.getTableName(), scaleFactor);
}
@Override
public ConnectorTableHandle getTableHandleForStatisticsCollection(ConnectorSession session, SchemaTableName tableName, Map analyzeProperties)
{
return getTableHandle(session, tableName);
}
@Override
public List getTableLayouts(
ConnectorSession session,
ConnectorTableHandle table,
Constraint constraint,
Optional> desiredColumns)
{
TpchTableHandle tableHandle = (TpchTableHandle) table;
Optional tablePartitioning = Optional.empty();
Optional> partitioningColumns = Optional.empty();
List> localProperties = ImmutableList.of();
TupleDomain predicate = TupleDomain.all();
TupleDomain unenforcedConstraint = constraint.getSummary();
Map columns = getColumnHandles(session, tableHandle);
if (tableHandle.getTableName().equals(TpchTable.ORDERS.getTableName())) {
if (partitioningEnabled) {
ColumnHandle orderKeyColumn = columns.get(columnNaming.getName(OrderColumn.ORDER_KEY));
tablePartitioning = Optional.of(new ConnectorTablePartitioning(
new TpchPartitioningHandle(
TpchTable.ORDERS.getTableName(),
calculateTotalRows(OrderGenerator.SCALE_BASE, tableHandle.getScaleFactor())),
ImmutableList.of(orderKeyColumn)));
partitioningColumns = Optional.of(ImmutableSet.of(orderKeyColumn));
localProperties = ImmutableList.of(new SortingProperty<>(orderKeyColumn, SortOrder.ASC_NULLS_FIRST));
}
if (predicatePushdownEnabled) {
predicate = toTupleDomain(ImmutableMap.of(
toColumnHandle(OrderColumn.ORDER_STATUS),
filterValues(ORDER_STATUS_NULLABLE_VALUES, OrderColumn.ORDER_STATUS, constraint)));
unenforcedConstraint = filterOutColumnFromPredicate(constraint.getSummary(), toColumnHandle(OrderColumn.ORDER_STATUS));
}
}
else if (predicatePushdownEnabled && tableHandle.getTableName().equals(TpchTable.PART.getTableName())) {
predicate = toTupleDomain(ImmutableMap.of(
toColumnHandle(PartColumn.CONTAINER),
filterValues(PART_CONTAINER_NULLABLE_VALUES, PartColumn.CONTAINER, constraint),
toColumnHandle(PartColumn.TYPE),
filterValues(PART_TYPE_NULLABLE_VALUES, PartColumn.TYPE, constraint)));
unenforcedConstraint = filterOutColumnFromPredicate(constraint.getSummary(), toColumnHandle(PartColumn.CONTAINER));
unenforcedConstraint = filterOutColumnFromPredicate(unenforcedConstraint, toColumnHandle(PartColumn.TYPE));
}
else if (tableHandle.getTableName().equals(TpchTable.LINE_ITEM.getTableName())) {
if (partitioningEnabled) {
ColumnHandle orderKeyColumn = columns.get(columnNaming.getName(LineItemColumn.ORDER_KEY));
tablePartitioning = Optional.of(new ConnectorTablePartitioning(
new TpchPartitioningHandle(
TpchTable.ORDERS.getTableName(),
calculateTotalRows(OrderGenerator.SCALE_BASE, tableHandle.getScaleFactor())),
ImmutableList.of(orderKeyColumn)));
partitioningColumns = Optional.of(ImmutableSet.of(orderKeyColumn));
localProperties = ImmutableList.of(
new SortingProperty<>(orderKeyColumn, SortOrder.ASC_NULLS_FIRST),
new SortingProperty<>(columns.get(columnNaming.getName(LineItemColumn.LINE_NUMBER)), SortOrder.ASC_NULLS_FIRST));
}
}
ConnectorTableLayout layout = new ConnectorTableLayout(
new TpchTableLayoutHandle(tableHandle, predicate),
Optional.empty(),
predicate, // TODO: conditionally return well-known properties (e.g., orderkey > 0, etc)
tablePartitioning,
partitioningColumns,
Optional.empty(),
localProperties);
return ImmutableList.of(new ConnectorTableLayoutResult(layout, unenforcedConstraint));
}
private Set filterValues(Set nullableValues, TpchColumn> column, Constraint constraint)
{
return nullableValues.stream()
.filter(convertToPredicate(constraint.getSummary(), toColumnHandle(column)))
.filter(value -> !constraint.predicate().isPresent() || constraint.predicate().get().test(ImmutableMap.of(toColumnHandle(column), value)))
.collect(toSet());
}
@Override
public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTableLayoutHandle handle)
{
TpchTableLayoutHandle layout = (TpchTableLayoutHandle) handle;
// tables in this connector have a single layout
return getTableLayouts(session, layout.getTable(), Constraint.alwaysTrue(), Optional.empty())
.get(0)
.getTableLayout();
}
@Override
public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle tableHandle)
{
TpchTableHandle tpchTableHandle = (TpchTableHandle) tableHandle;
TpchTable> tpchTable = TpchTable.getTable(tpchTableHandle.getTableName());
String schemaName = scaleFactorSchemaName(tpchTableHandle.getScaleFactor());
return getTableMetadata(schemaName, tpchTable, columnNaming);
}
private static ConnectorTableMetadata getTableMetadata(String schemaName, TpchTable> tpchTable, ColumnNaming columnNaming)
{
ImmutableList.Builder columns = ImmutableList.builder();
for (TpchColumn extends TpchEntity> column : tpchTable.getColumns()) {
columns.add(new ColumnMetadata(columnNaming.getName(column), getPrestoType(column), false, null, null, false, emptyMap()));
}
columns.add(new ColumnMetadata(ROW_NUMBER_COLUMN_NAME, BIGINT, null, true));
SchemaTableName tableName = new SchemaTableName(schemaName, tpchTable.getTableName());
return new ConnectorTableMetadata(tableName, columns.build());
}
@Override
public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle)
{
ImmutableMap.Builder builder = ImmutableMap.builder();
for (ColumnMetadata columnMetadata : getTableMetadata(session, tableHandle).getColumns()) {
builder.put(columnMetadata.getName(), new TpchColumnHandle(columnMetadata.getName(), columnMetadata.getType()));
}
return builder.build();
}
@Override
public Map> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix)
{
ImmutableMap.Builder> tableColumns = ImmutableMap.builder();
for (String schemaName : getSchemaNames(session, Optional.ofNullable(prefix.getSchemaName()))) {
for (TpchTable> tpchTable : TpchTable.getTables()) {
if (prefix.getTableName() == null || tpchTable.getTableName().equals(prefix.getTableName())) {
ConnectorTableMetadata tableMetadata = getTableMetadata(schemaName, tpchTable, columnNaming);
tableColumns.put(new SchemaTableName(schemaName, tpchTable.getTableName()), tableMetadata.getColumns());
}
}
}
return tableColumns.build();
}
@Override
public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTableHandle tableHandle, Optional tableLayoutHandle, List columnHandles, Constraint constraint)
{
TpchTableHandle tpchTableHandle = (TpchTableHandle) tableHandle;
String tableName = tpchTableHandle.getTableName();
TpchTable> tpchTable = TpchTable.getTable(tableName);
Map, List