com.facebook.presto.iceberg.optimizer.IcebergPlanOptimizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of presto-iceberg Show documentation
Show all versions of presto-iceberg Show documentation
Presto - Iceberg Connector
The newest version!
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.iceberg.optimizer;
import com.facebook.presto.common.Subfield;
import com.facebook.presto.common.predicate.Domain;
import com.facebook.presto.common.predicate.Range;
import com.facebook.presto.common.predicate.TupleDomain;
import com.facebook.presto.common.predicate.ValueSet;
import com.facebook.presto.common.type.TimestampType;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.expressions.LogicalRowExpressions;
import com.facebook.presto.hive.SubfieldExtractor;
import com.facebook.presto.iceberg.IcebergAbstractMetadata;
import com.facebook.presto.iceberg.IcebergColumnHandle;
import com.facebook.presto.iceberg.IcebergTableHandle;
import com.facebook.presto.iceberg.IcebergTableLayoutHandle;
import com.facebook.presto.iceberg.IcebergTransactionManager;
import com.facebook.presto.iceberg.PartitionTransforms;
import com.facebook.presto.iceberg.PartitionTransforms.ColumnTransform;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.ConnectorPlanOptimizer;
import com.facebook.presto.spi.ConnectorPlanRewriter;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.ConnectorTableLayoutHandle;
import com.facebook.presto.spi.TableHandle;
import com.facebook.presto.spi.VariableAllocator;
import com.facebook.presto.spi.function.FunctionMetadataManager;
import com.facebook.presto.spi.function.StandardFunctionResolution;
import com.facebook.presto.spi.plan.FilterNode;
import com.facebook.presto.spi.plan.PlanNode;
import com.facebook.presto.spi.plan.PlanNodeIdAllocator;
import com.facebook.presto.spi.plan.TableScanNode;
import com.facebook.presto.spi.relation.DomainTranslator;
import com.facebook.presto.spi.relation.RowExpression;
import com.facebook.presto.spi.relation.RowExpressionService;
import com.google.common.base.Predicates;
import com.google.common.base.VerifyException;
import com.google.common.collect.ImmutableBiMap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
import org.apache.iceberg.PartitionField;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Table;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import static com.facebook.presto.common.Utils.nativeValueToBlock;
import static com.facebook.presto.expressions.LogicalRowExpressions.FALSE_CONSTANT;
import static com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT;
import static com.facebook.presto.iceberg.IcebergAbstractMetadata.toSubfield;
import static com.facebook.presto.iceberg.IcebergPageSink.adjustTimestampForPartitionTransform;
import static com.facebook.presto.iceberg.IcebergSessionProperties.isPushdownFilterEnabled;
import static com.facebook.presto.iceberg.IcebergTableType.DATA;
import static com.facebook.presto.iceberg.IcebergUtil.getAdjacentValue;
import static com.facebook.presto.iceberg.IcebergUtil.getIcebergTable;
import static com.facebook.presto.iceberg.IcebergUtil.getPartitionSpecsIncludingValidData;
import static com.facebook.presto.spi.ConnectorPlanRewriter.rewriteWith;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Verify.verify;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toList;
public class IcebergPlanOptimizer
implements ConnectorPlanOptimizer
{
private final RowExpressionService rowExpressionService;
private final StandardFunctionResolution functionResolution;
private final FunctionMetadataManager functionMetadataManager;
private final IcebergTransactionManager transactionManager;
IcebergPlanOptimizer(StandardFunctionResolution functionResolution,
RowExpressionService rowExpressionService,
FunctionMetadataManager functionMetadataManager,
IcebergTransactionManager transactionManager)
{
this.functionResolution = requireNonNull(functionResolution, "functionResolution is null");
this.rowExpressionService = requireNonNull(rowExpressionService, "rowExpressionService is null");
this.functionMetadataManager = requireNonNull(functionMetadataManager, "functionMetadataManager is null");
this.transactionManager = requireNonNull(transactionManager, "transactionManager is null");
}
@Override
public PlanNode optimize(PlanNode maxSubplan, ConnectorSession session, VariableAllocator variableAllocator, PlanNodeIdAllocator idAllocator)
{
if (isPushdownFilterEnabled(session)) {
return maxSubplan;
}
return rewriteWith(new FilterPushdownRewriter(functionResolution, rowExpressionService, functionMetadataManager,
transactionManager, idAllocator, session), maxSubplan);
}
private static class FilterPushdownRewriter
extends ConnectorPlanRewriter
{
private final ConnectorSession session;
private final RowExpressionService rowExpressionService;
private final StandardFunctionResolution functionResolution;
private final FunctionMetadataManager functionMetadataManager;
private final PlanNodeIdAllocator idAllocator;
private final IcebergTransactionManager transactionManager;
public FilterPushdownRewriter(
StandardFunctionResolution functionResolution,
RowExpressionService rowExpressionService,
FunctionMetadataManager functionMetadataManager,
IcebergTransactionManager transactionManager,
PlanNodeIdAllocator idAllocator,
ConnectorSession session)
{
this.functionResolution = functionResolution;
this.rowExpressionService = rowExpressionService;
this.functionMetadataManager = functionMetadataManager;
this.transactionManager = transactionManager;
this.idAllocator = idAllocator;
this.session = session;
}
@Override
public PlanNode visitFilter(FilterNode filter, RewriteContext context)
{
if (!(filter.getSource() instanceof TableScanNode)) {
return visitPlan(filter, context);
}
TableScanNode tableScan = (TableScanNode) filter.getSource();
if (((IcebergTableHandle) tableScan.getTable().getConnectorHandle()).getIcebergTableName().getTableType() != DATA) {
return visitPlan(filter, context);
}
Map nameToColumnHandlesMapping = tableScan.getAssignments().entrySet().stream()
.collect(Collectors.toMap(e -> e.getKey().getName(), e -> (IcebergColumnHandle) e.getValue()));
Map columnHandleToNameMapping = ImmutableBiMap.copyOf(nameToColumnHandlesMapping).inverse();
RowExpression filterPredicate = filter.getPredicate();
checkArgument(!filterPredicate.equals(FALSE_CONSTANT), "Filter expression 'FALSE' should not be left to handle here");
//TODO we should optimize the filter expression
DomainTranslator.ExtractionResult decomposedFilter = rowExpressionService.getDomainTranslator()
.fromPredicate(session, filterPredicate, new SubfieldExtractor(functionResolution, rowExpressionService.getExpressionOptimizer(), session).toColumnExtractor());
// Only pushdown the range filters which apply to entire columns, because iceberg does not accept the filters on the subfields in nested structures
TupleDomain entireColumnDomain = decomposedFilter.getTupleDomain()
.transform(subfield -> subfield.getPath().isEmpty() ? subfield.getRootName() : null)
.transform(nameToColumnHandlesMapping::get);
TableHandle handle = tableScan.getTable();
IcebergTableHandle tableHandle = (IcebergTableHandle) handle.getConnectorHandle();
IcebergAbstractMetadata metadata = (IcebergAbstractMetadata) transactionManager.get(handle.getTransaction());
Table icebergTable = getIcebergTable(metadata, session, tableHandle.getSchemaTableName());
// Get predicate expression on subfield
SubfieldExtractor subfieldExtractor = new SubfieldExtractor(functionResolution, rowExpressionService.getExpressionOptimizer(), session);
Map columnTypes = nameToColumnHandlesMapping.entrySet().stream()
.collect(toImmutableMap(entry -> entry.getKey(), entry -> entry.getValue().getType()));
TupleDomain subfieldTupleDomain = decomposedFilter.getTupleDomain()
.transform(subfield -> subfield.getPath().isEmpty() ? null : subfield)
.transform(subfield -> subfieldExtractor.toRowExpression(subfield, columnTypes.get(subfield.getRootName())));
RowExpression subfieldPredicate = rowExpressionService.getDomainTranslator().toPredicate(subfieldTupleDomain);
// Get partition specs that really need to be checked
Set partitionSpecIds = getPartitionSpecsIncludingValidData(icebergTable, tableHandle.getIcebergTableName().getSnapshotId());
Set enforcedColumns = getEnforcedColumns(icebergTable,
partitionSpecIds,
entireColumnDomain,
session);
// Get predicate tuple domain on the columns that could be enforced by iceberg table itself
TupleDomain identityPartitionColumnPredicate = TupleDomain.withColumnDomains(
Maps.filterKeys(
entireColumnDomain.transform(icebergColumnHandle -> (ColumnHandle) icebergColumnHandle)
.getDomains().get(),
Predicates.in(enforcedColumns)));
// Get predicate expression on entire columns that could not be enforced by iceberg table
TupleDomain nonPartitionColumnPredicate = TupleDomain.withColumnDomains(
Maps.filterKeys(
entireColumnDomain.transform(icebergColumnHandle -> (ColumnHandle) icebergColumnHandle)
.getDomains().get(),
Predicates.not(Predicates.in(enforcedColumns))))
.transform(columnHandle -> new Subfield(columnHandleToNameMapping.get(columnHandle), ImmutableList.of()))
.transform(subfield -> subfieldExtractor.toRowExpression(subfield, columnTypes.get(subfield.getRootName())));
RowExpression nonPartitionColumn = rowExpressionService.getDomainTranslator().toPredicate(nonPartitionColumnPredicate);
// Combine all the rest predicate expressions except predicate on identity partition columns
LogicalRowExpressions logicalRowExpressions = new LogicalRowExpressions(
rowExpressionService.getDeterminismEvaluator(),
functionResolution,
functionMetadataManager);
RowExpression remainingFilterExpression = logicalRowExpressions.combineConjuncts(
ImmutableList.of(
decomposedFilter.getRemainingExpression(),
subfieldPredicate,
nonPartitionColumn));
// Simplify call is required because iceberg does not support a large value list for IN predicate
TupleDomain simplifiedColumnDomain = entireColumnDomain.simplify();
boolean predicateNotChangedBySimplification = simplifiedColumnDomain.equals(entireColumnDomain);
Optional newConnectorTableLayoutHandle = handle.getLayout().map(IcebergTableLayoutHandle.class::cast)
.map(icebergTableLayoutHandle -> new IcebergTableLayoutHandle(
icebergTableLayoutHandle.getPartitionColumns().stream()
.map(IcebergColumnHandle.class::cast).collect(toList()),
icebergTableLayoutHandle.getDataColumns(),
simplifiedColumnDomain.transform(columnHandle -> toSubfield(columnHandle))
.intersect(icebergTableLayoutHandle.getDomainPredicate()),
icebergTableLayoutHandle.getRemainingPredicate(),
icebergTableLayoutHandle.getPredicateColumns(),
icebergTableLayoutHandle.getRequestedColumns(),
icebergTableLayoutHandle.isPushdownFilterEnabled(),
identityPartitionColumnPredicate.simplify()
.intersect(icebergTableLayoutHandle.getPartitionColumnPredicate()),
icebergTableLayoutHandle.getPartitions(),
icebergTableLayoutHandle.getTable()));
TableScanNode newTableScan = new TableScanNode(
tableScan.getSourceLocation(),
tableScan.getId(),
new TableHandle(handle.getConnectorId(), handle.getConnectorHandle(), handle.getTransaction(), newConnectorTableLayoutHandle),
tableScan.getOutputVariables(),
tableScan.getAssignments(),
simplifiedColumnDomain.transform(ColumnHandle.class::cast)
.intersect(tableScan.getCurrentConstraint()),
predicateNotChangedBySimplification ?
identityPartitionColumnPredicate.intersect(tableScan.getEnforcedConstraint()) :
tableScan.getEnforcedConstraint());
if (TRUE_CONSTANT.equals(remainingFilterExpression) && predicateNotChangedBySimplification) {
return newTableScan;
}
else if (predicateNotChangedBySimplification) {
return new FilterNode(filter.getSourceLocation(), idAllocator.getNextId(), newTableScan, remainingFilterExpression);
}
else {
return new FilterNode(filter.getSourceLocation(), idAllocator.getNextId(), newTableScan, filterPredicate);
}
}
}
public static Set getEnforcedColumns(
Table table,
Set partitionSpecIds,
TupleDomain entireColumnDomain,
ConnectorSession session)
{
ImmutableSet.Builder result = ImmutableSet.builder();
Map domains = entireColumnDomain.getDomains().orElseThrow(() -> new VerifyException("No domains"));
domains.forEach((columnHandle, domain) -> {
if (canEnforceColumnConstraintInSpecs(table, partitionSpecIds, columnHandle, domain, session)) {
result.add(columnHandle);
}
});
return result.build();
}
public static boolean canEnforceColumnConstraintInSpecs(
Table table,
Set partitionSpecIds,
IcebergColumnHandle columnHandle,
Domain domain,
ConnectorSession session)
{
return table.specs().values().stream()
.filter(partitionSpec -> partitionSpecIds.contains(partitionSpec.specId()))
.allMatch(spec -> canEnforceConstraintWithinPartitioningSpec(spec, columnHandle, domain, session));
}
private static boolean canEnforceConstraintWithinPartitioningSpec(
PartitionSpec spec,
IcebergColumnHandle column,
Domain domain,
ConnectorSession session)
{
for (PartitionField field : spec.getFieldsBySourceId(column.getId())) {
if (canEnforceConstraintWithPartitionField(field, column, domain, session)) {
return true;
}
}
return false;
}
private static boolean canEnforceConstraintWithPartitionField(
PartitionField field,
IcebergColumnHandle column,
Domain domain,
ConnectorSession session)
{
if (field.transform().isVoid()) {
// Useless for filtering.
return false;
}
if (field.transform().isIdentity()) {
// A predicate on an identity partitioning column can always be enforced.
return true;
}
ColumnTransform transform = PartitionTransforms.getColumnTransform(field, column.getType());
ValueSet domainValues = domain.getValues();
boolean canEnforce = domainValues.getValuesProcessor().transform(
ranges -> {
for (Range range : ranges.getOrderedRanges()) {
if (!canEnforceRangeWithPartitioningField(field, transform, range, session)) {
return false;
}
}
return true;
},
discreteValues -> false,
allOrNone -> true);
return canEnforce;
}
private static boolean canEnforceRangeWithPartitioningField(
PartitionField field,
ColumnTransform transform,
Range range,
ConnectorSession session)
{
if (transform.getTransformName().startsWith("bucket")) {
// bucketing transform could not be enforced
return false;
}
Type type = range.getType();
if (!type.isOrderable()) {
return false;
}
if (!range.isLowUnbounded()) {
Object boundedValue = range.getLowBoundedValue();
Optional
© 2015 - 2025 Weber Informatics LLC | Privacy Policy