io.trino.sql.planner.optimizations.AddLocalExchanges Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.sql.planner.optimizations;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import io.trino.Session;
import io.trino.cost.TableStatsProvider;
import io.trino.execution.querystats.PlanOptimizersStatsCollector;
import io.trino.execution.warnings.WarningCollector;
import io.trino.spi.connector.ConstantProperty;
import io.trino.spi.connector.GroupingProperty;
import io.trino.spi.connector.LocalProperty;
import io.trino.spi.connector.WriterScalingOptions;
import io.trino.sql.PlannerContext;
import io.trino.sql.planner.Partitioning;
import io.trino.sql.planner.PartitioningHandle;
import io.trino.sql.planner.PartitioningScheme;
import io.trino.sql.planner.PlanNodeIdAllocator;
import io.trino.sql.planner.Symbol;
import io.trino.sql.planner.SymbolAllocator;
import io.trino.sql.planner.SystemPartitioningHandle;
import io.trino.sql.planner.TypeAnalyzer;
import io.trino.sql.planner.TypeProvider;
import io.trino.sql.planner.optimizations.StreamPropertyDerivations.StreamProperties;
import io.trino.sql.planner.plan.AggregationNode;
import io.trino.sql.planner.plan.ApplyNode;
import io.trino.sql.planner.plan.CorrelatedJoinNode;
import io.trino.sql.planner.plan.DataOrganizationSpecification;
import io.trino.sql.planner.plan.DistinctLimitNode;
import io.trino.sql.planner.plan.EnforceSingleRowNode;
import io.trino.sql.planner.plan.ExchangeNode;
import io.trino.sql.planner.plan.ExplainAnalyzeNode;
import io.trino.sql.planner.plan.IndexJoinNode;
import io.trino.sql.planner.plan.JoinNode;
import io.trino.sql.planner.plan.LimitNode;
import io.trino.sql.planner.plan.MarkDistinctNode;
import io.trino.sql.planner.plan.MergeWriterNode;
import io.trino.sql.planner.plan.OutputNode;
import io.trino.sql.planner.plan.PatternRecognitionNode;
import io.trino.sql.planner.plan.PlanNode;
import io.trino.sql.planner.plan.PlanVisitor;
import io.trino.sql.planner.plan.ProjectNode;
import io.trino.sql.planner.plan.RowNumberNode;
import io.trino.sql.planner.plan.SemiJoinNode;
import io.trino.sql.planner.plan.SimpleTableExecuteNode;
import io.trino.sql.planner.plan.SortNode;
import io.trino.sql.planner.plan.SpatialJoinNode;
import io.trino.sql.planner.plan.StatisticsWriterNode;
import io.trino.sql.planner.plan.TableExecuteNode;
import io.trino.sql.planner.plan.TableFinishNode;
import io.trino.sql.planner.plan.TableFunctionNode;
import io.trino.sql.planner.plan.TableFunctionProcessorNode;
import io.trino.sql.planner.plan.TableWriterNode;
import io.trino.sql.planner.plan.TopNNode;
import io.trino.sql.planner.plan.TopNRankingNode;
import io.trino.sql.planner.plan.UnionNode;
import io.trino.sql.planner.plan.WindowNode;
import io.trino.sql.tree.SymbolReference;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Verify.verify;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static io.trino.SystemSessionProperties.getTaskConcurrency;
import static io.trino.SystemSessionProperties.getTaskMaxWriterCount;
import static io.trino.SystemSessionProperties.getTaskMinWriterCount;
import static io.trino.SystemSessionProperties.isDistributedSortEnabled;
import static io.trino.SystemSessionProperties.isSpillEnabled;
import static io.trino.SystemSessionProperties.isTaskScaleWritersEnabled;
import static io.trino.sql.ExpressionUtils.isEffectivelyLiteral;
import static io.trino.sql.planner.SystemPartitioningHandle.FIXED_ARBITRARY_DISTRIBUTION;
import static io.trino.sql.planner.SystemPartitioningHandle.FIXED_HASH_DISTRIBUTION;
import static io.trino.sql.planner.SystemPartitioningHandle.SCALED_WRITER_HASH_DISTRIBUTION;
import static io.trino.sql.planner.SystemPartitioningHandle.SCALED_WRITER_ROUND_ROBIN_DISTRIBUTION;
import static io.trino.sql.planner.SystemPartitioningHandle.SINGLE_DISTRIBUTION;
import static io.trino.sql.planner.optimizations.StreamPreferredProperties.any;
import static io.trino.sql.planner.optimizations.StreamPreferredProperties.defaultParallelism;
import static io.trino.sql.planner.optimizations.StreamPreferredProperties.exactlyPartitionedOn;
import static io.trino.sql.planner.optimizations.StreamPreferredProperties.fixedParallelism;
import static io.trino.sql.planner.optimizations.StreamPreferredProperties.partitionedOn;
import static io.trino.sql.planner.optimizations.StreamPreferredProperties.singleStream;
import static io.trino.sql.planner.optimizations.StreamPropertyDerivations.StreamProperties.StreamDistribution.FIXED;
import static io.trino.sql.planner.optimizations.StreamPropertyDerivations.StreamProperties.StreamDistribution.SINGLE;
import static io.trino.sql.planner.optimizations.StreamPropertyDerivations.derivePropertiesRecursively;
import static io.trino.sql.planner.plan.ChildReplacer.replaceChildren;
import static io.trino.sql.planner.plan.ExchangeNode.Scope.LOCAL;
import static io.trino.sql.planner.plan.ExchangeNode.Type.GATHER;
import static io.trino.sql.planner.plan.ExchangeNode.Type.REPARTITION;
import static io.trino.sql.planner.plan.ExchangeNode.gatheringExchange;
import static io.trino.sql.planner.plan.ExchangeNode.mergingExchange;
import static io.trino.sql.planner.plan.ExchangeNode.partitionedExchange;
import static io.trino.sql.planner.plan.TableWriterNode.WriterTarget;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toList;
public class AddLocalExchanges
implements PlanOptimizer
{
private final PlannerContext plannerContext;
private final TypeAnalyzer typeAnalyzer;
public AddLocalExchanges(PlannerContext plannerContext, TypeAnalyzer typeAnalyzer)
{
this.plannerContext = requireNonNull(plannerContext, "plannerContext is null");
this.typeAnalyzer = requireNonNull(typeAnalyzer, "typeAnalyzer is null");
}
@Override
public PlanNode optimize(
PlanNode plan,
Session session,
TypeProvider types,
SymbolAllocator symbolAllocator,
PlanNodeIdAllocator idAllocator,
WarningCollector warningCollector,
PlanOptimizersStatsCollector planOptimizersStatsCollector,
TableStatsProvider tableStatsProvider)
{
PlanWithProperties result = plan.accept(new Rewriter(symbolAllocator, idAllocator, session), any());
return result.getNode();
}
private class Rewriter
extends PlanVisitor
{
private final PlanNodeIdAllocator idAllocator;
private final Session session;
private final TypeProvider types;
public Rewriter(SymbolAllocator symbolAllocator, PlanNodeIdAllocator idAllocator, Session session)
{
this.types = symbolAllocator.getTypes();
this.idAllocator = idAllocator;
this.session = session;
}
@Override
protected PlanWithProperties visitPlan(PlanNode node, StreamPreferredProperties parentPreferences)
{
return planAndEnforceChildren(
node,
parentPreferences.withoutPreference().withDefaultParallelism(session),
parentPreferences.withDefaultParallelism(session));
}
@Override
public PlanWithProperties visitApply(ApplyNode node, StreamPreferredProperties parentPreferences)
{
throw new IllegalStateException("Unexpected node: " + node.getClass().getName());
}
@Override
public PlanWithProperties visitCorrelatedJoin(CorrelatedJoinNode node, StreamPreferredProperties parentPreferences)
{
throw new IllegalStateException("Unexpected node: " + node.getClass().getName());
}
@Override
public PlanWithProperties visitOutput(OutputNode node, StreamPreferredProperties parentPreferences)
{
return planAndEnforceChildren(
node,
any().withOrderSensitivity(),
any().withOrderSensitivity());
}
@Override
public PlanWithProperties visitExplainAnalyze(ExplainAnalyzeNode node, StreamPreferredProperties parentPreferences)
{
// Although explain analyze discards all output, we want to maintain the behavior
// of a normal output node, so declare the node to be order sensitive
return planAndEnforceChildren(
node,
singleStream().withOrderSensitivity(),
singleStream().withOrderSensitivity());
}
@Override
public PlanWithProperties visitProject(ProjectNode node, StreamPreferredProperties parentPreferences)
{
// Special handling for trivial projections. Applies to identity and renaming projections, and constants
// It might be extended to handle other low-cost projections.
if (node.getAssignments().getExpressions().stream().allMatch(expression -> expression instanceof SymbolReference || isEffectivelyLiteral(plannerContext, session, expression))) {
if (parentPreferences.isSingleStreamPreferred()) {
// Do not enforce gathering exchange below project:
// - if project's source is single stream, no exchanges will be added around project,
// - if project's source is distributed, gather will be added on top of project.
return planAndEnforceChildren(
node,
parentPreferences.withoutPreference(),
parentPreferences.withDefaultParallelism(session));
}
// Do not enforce hashed repartition below project. Execute project with the same distribution as its source:
// - if project's source is single stream, hash partitioned exchange will be added on top of project,
// - if project's source is distributed, and the distribution does not satisfy parent partitioning requirements, hash partitioned exchange will be added on top of project.
if (parentPreferences.getPartitioningColumns().isPresent() && !parentPreferences.getPartitioningColumns().get().isEmpty()) {
return planAndEnforceChildren(
node,
parentPreferences.withoutPreference(),
parentPreferences.withDefaultParallelism(session));
}
// If round-robin exchange is required by the parent, enforce it below project:
// - if project's source is single stream, round robin exchange will be added below project,
// - if project's source is distributed, no exchanges will be added around project.
return planAndEnforceChildren(
node,
parentPreferences,
parentPreferences.withDefaultParallelism(session));
}
return planAndEnforceChildren(
node,
parentPreferences.withoutPreference().withDefaultParallelism(session),
parentPreferences.withDefaultParallelism(session));
}
//
// Nodes that always require a single stream
//
@Override
public PlanWithProperties visitSort(SortNode node, StreamPreferredProperties parentPreferences)
{
if (isDistributedSortEnabled(session)) {
PlanWithProperties sortPlan = planAndEnforceChildren(node, fixedParallelism(), fixedParallelism());
if (!sortPlan.getProperties().isSingleStream()) {
return deriveProperties(
mergingExchange(
idAllocator.getNextId(),
LOCAL,
sortPlan.getNode(),
node.getOrderingScheme()),
sortPlan.getProperties());
}
return sortPlan;
}
// sort requires that all data be in one stream
// this node changes the input organization completely, so we do not pass through parent preferences
return planAndEnforceChildren(node, singleStream(), defaultParallelism(session));
}
@Override
public PlanWithProperties visitStatisticsWriterNode(StatisticsWriterNode node, StreamPreferredProperties context)
{
// analyze finish requires that all data be in one stream
// this node changes the input organization completely, so we do not pass through parent preferences
return planAndEnforceChildren(node, singleStream(), defaultParallelism(session));
}
@Override
public PlanWithProperties visitTableFinish(TableFinishNode node, StreamPreferredProperties parentPreferences)
{
// table commit requires that all data be in one stream
// this node changes the input organization completely, so we do not pass through parent preferences
return planAndEnforceChildren(node, singleStream(), defaultParallelism(session));
}
@Override
public PlanWithProperties visitTopN(TopNNode node, StreamPreferredProperties parentPreferences)
{
if (node.getStep() == TopNNode.Step.PARTIAL) {
return planAndEnforceChildren(
node,
parentPreferences.withoutPreference().withDefaultParallelism(session),
parentPreferences.withDefaultParallelism(session));
}
// final topN requires that all data be in one stream
// also, a final changes the input organization completely, so we do not pass through parent preferences
return planAndEnforceChildren(
node,
singleStream(),
defaultParallelism(session));
}
@Override
public PlanWithProperties visitLimit(LimitNode node, StreamPreferredProperties parentPreferences)
{
if (node.isWithTies()) {
throw new IllegalStateException("Unexpected node: LimitNode with ties");
}
if (node.isPartial()) {
StreamPreferredProperties requiredProperties = parentPreferences.withoutPreference().withDefaultParallelism(session);
StreamPreferredProperties preferredProperties = parentPreferences.withDefaultParallelism(session);
if (node.requiresPreSortedInputs()) {
requiredProperties = requiredProperties.withOrderSensitivity();
preferredProperties = preferredProperties.withOrderSensitivity();
}
return planAndEnforceChildren(node, requiredProperties, preferredProperties);
}
// final limit requires that all data be in one stream
// also, a final changes the input organization completely, so we do not pass through parent preferences
return planAndEnforceChildren(
node,
singleStream(),
defaultParallelism(session));
}
@Override
public PlanWithProperties visitDistinctLimit(DistinctLimitNode node, StreamPreferredProperties parentPreferences)
{
// final limit requires that all data be in one stream
StreamPreferredProperties requiredProperties;
StreamPreferredProperties preferredProperties;
if (node.isPartial()) {
requiredProperties = parentPreferences.withoutPreference().withDefaultParallelism(session);
preferredProperties = parentPreferences.withDefaultParallelism(session);
}
else {
// a final changes the input organization completely, so we do not pass through parent preferences
requiredProperties = singleStream();
preferredProperties = defaultParallelism(session);
}
return planAndEnforceChildren(node, requiredProperties, preferredProperties);
}
@Override
public PlanWithProperties visitEnforceSingleRow(EnforceSingleRowNode node, StreamPreferredProperties parentPreferences)
{
return planAndEnforceChildren(node, singleStream(), defaultParallelism(session));
}
//
// Nodes that require parallel streams to be partitioned
//
@Override
public PlanWithProperties visitAggregation(AggregationNode node, StreamPreferredProperties parentPreferences)
{
checkState(node.getStep() == AggregationNode.Step.SINGLE, "step of aggregation is expected to be SINGLE, but it is %s", node.getStep());
if (node.hasSingleNodeExecutionPreference(session, plannerContext.getMetadata())) {
return planAndEnforceChildren(node, singleStream(), defaultParallelism(session));
}
List groupingKeys = node.getGroupingKeys();
if (node.hasDefaultOutput()) {
checkState(node.isDecomposable(session, plannerContext.getMetadata()));
// Put fixed local exchange directly below final aggregation to ensure that final and partial aggregations are separated by exchange (in a local runner mode)
// This is required so that default outputs from multiple instances of partial aggregations are passed to a single final aggregation.
PlanWithProperties child = planAndEnforce(node.getSource(), any(), defaultParallelism(session));
PlanWithProperties exchange = deriveProperties(
partitionedExchange(
idAllocator.getNextId(),
LOCAL,
child.getNode(),
groupingKeys,
Optional.empty()),
child.getProperties());
return rebaseAndDeriveProperties(node, ImmutableList.of(exchange));
}
StreamPreferredProperties childRequirements = parentPreferences
.constrainTo(node.getSource().getOutputSymbols())
.withDefaultParallelism(session)
.withPartitioning(groupingKeys);
PlanWithProperties child = planAndEnforce(node.getSource(), childRequirements, childRequirements);
List preGroupedSymbols = ImmutableList.of();
if (LocalProperties.match(child.getProperties().getLocalProperties(), LocalProperties.grouped(groupingKeys)).get(0).isEmpty()) {
// !isPresent() indicates the property was satisfied completely
preGroupedSymbols = groupingKeys;
}
AggregationNode result = AggregationNode.builderFrom(node)
.setSource(child.getNode())
.setPreGroupedSymbols(preGroupedSymbols)
.build();
return deriveProperties(result, child.getProperties());
}
@Override
public PlanWithProperties visitWindow(WindowNode node, StreamPreferredProperties parentPreferences)
{
StreamPreferredProperties childRequirements = parentPreferences
.constrainTo(node.getSource().getOutputSymbols())
.withDefaultParallelism(session)
.withPartitioning(node.getPartitionBy());
PlanWithProperties child = planAndEnforce(node.getSource(), childRequirements, childRequirements);
List> desiredProperties = new ArrayList<>();
if (!node.getPartitionBy().isEmpty()) {
desiredProperties.add(new GroupingProperty<>(node.getPartitionBy()));
}
node.getOrderingScheme().ifPresent(orderingScheme -> desiredProperties.addAll(orderingScheme.toLocalProperties()));
Iterator>> matchIterator = LocalProperties.match(child.getProperties().getLocalProperties(), desiredProperties).iterator();
Set prePartitionedInputs = ImmutableSet.of();
if (!node.getPartitionBy().isEmpty()) {
Optional> groupingRequirement = matchIterator.next();
Set unPartitionedInputs = groupingRequirement.map(LocalProperty::getColumns).orElse(ImmutableSet.of());
prePartitionedInputs = node.getPartitionBy().stream()
.filter(symbol -> !unPartitionedInputs.contains(symbol))
.collect(toImmutableSet());
}
int preSortedOrderPrefix = 0;
if (prePartitionedInputs.equals(ImmutableSet.copyOf(node.getPartitionBy()))) {
while (matchIterator.hasNext() && matchIterator.next().isEmpty()) {
preSortedOrderPrefix++;
}
}
WindowNode result = new WindowNode(
node.getId(),
child.getNode(),
node.getSpecification(),
node.getWindowFunctions(),
node.getHashSymbol(),
prePartitionedInputs,
preSortedOrderPrefix);
return deriveProperties(result, child.getProperties());
}
@Override
public PlanWithProperties visitPatternRecognition(PatternRecognitionNode node, StreamPreferredProperties parentPreferences)
{
StreamPreferredProperties childRequirements = parentPreferences
.constrainTo(node.getSource().getOutputSymbols())
.withDefaultParallelism(session)
.withPartitioning(node.getPartitionBy());
PlanWithProperties child = planAndEnforce(node.getSource(), childRequirements, childRequirements);
List> desiredProperties = new ArrayList<>();
if (!node.getPartitionBy().isEmpty()) {
desiredProperties.add(new GroupingProperty<>(node.getPartitionBy()));
}
node.getOrderingScheme().ifPresent(orderingScheme -> desiredProperties.addAll(orderingScheme.toLocalProperties()));
Iterator>> matchIterator = LocalProperties.match(child.getProperties().getLocalProperties(), desiredProperties).iterator();
Set prePartitionedInputs = ImmutableSet.of();
if (!node.getPartitionBy().isEmpty()) {
Optional> groupingRequirement = matchIterator.next();
Set unPartitionedInputs = groupingRequirement.map(LocalProperty::getColumns).orElse(ImmutableSet.of());
prePartitionedInputs = node.getPartitionBy().stream()
.filter(symbol -> !unPartitionedInputs.contains(symbol))
.collect(toImmutableSet());
}
int preSortedOrderPrefix = 0;
if (prePartitionedInputs.equals(ImmutableSet.copyOf(node.getPartitionBy()))) {
while (matchIterator.hasNext() && matchIterator.next().isEmpty()) {
preSortedOrderPrefix++;
}
}
PatternRecognitionNode result = new PatternRecognitionNode(
node.getId(),
child.getNode(),
node.getSpecification(),
node.getHashSymbol(),
prePartitionedInputs,
preSortedOrderPrefix,
node.getWindowFunctions(),
node.getMeasures(),
node.getCommonBaseFrame(),
node.getRowsPerMatch(),
node.getSkipToLabel(),
node.getSkipToPosition(),
node.isInitial(),
node.getPattern(),
node.getSubsets(),
node.getVariableDefinitions());
return deriveProperties(result, child.getProperties());
}
@Override
public PlanWithProperties visitTableFunction(TableFunctionNode node, StreamPreferredProperties parentPreferences)
{
throw new IllegalStateException(format("Unexpected node: TableFunctionNode (%s)", node.getName()));
}
@Override
public PlanWithProperties visitTableFunctionProcessor(TableFunctionProcessorNode node, StreamPreferredProperties parentPreferences)
{
if (node.getSource().isEmpty()) {
return deriveProperties(node, ImmutableList.of());
}
if (node.getSpecification().isEmpty()) {
// node.getSpecification.isEmpty() indicates that there were no sources or a single source with row semantics.
// The case of no sources was addressed above.
// The case of a single source with row semantics is addressed here. Source's properties do not hold after the TableFunctionProcessorNode
PlanWithProperties child = planAndEnforce(node.getSource().orElseThrow(), StreamPreferredProperties.any(), StreamPreferredProperties.any());
return rebaseAndDeriveProperties(node, ImmutableList.of(child));
}
List partitionBy = node.getSpecification().orElseThrow().getPartitionBy();
StreamPreferredProperties childRequirements;
if (!node.isPruneWhenEmpty()) {
childRequirements = singleStream();
}
else {
childRequirements = parentPreferences
.constrainTo(node.getSource().orElseThrow().getOutputSymbols())
.withDefaultParallelism(session)
.withPartitioning(partitionBy);
}
PlanWithProperties child = planAndEnforce(node.getSource().orElseThrow(), childRequirements, childRequirements);
List> desiredProperties = new ArrayList<>();
if (!partitionBy.isEmpty()) {
desiredProperties.add(new GroupingProperty<>(partitionBy));
}
node.getSpecification().flatMap(DataOrganizationSpecification::getOrderingScheme).ifPresent(orderingScheme -> desiredProperties.addAll(orderingScheme.toLocalProperties()));
Iterator>> matchIterator = LocalProperties.match(child.getProperties().getLocalProperties(), desiredProperties).iterator();
Set prePartitionedInputs = ImmutableSet.of();
if (!partitionBy.isEmpty()) {
Optional> groupingRequirement = matchIterator.next();
Set unPartitionedInputs = groupingRequirement.map(LocalProperty::getColumns).orElse(ImmutableSet.of());
prePartitionedInputs = partitionBy.stream()
.filter(symbol -> !unPartitionedInputs.contains(symbol))
.collect(toImmutableSet());
}
int preSortedOrderPrefix = 0;
if (prePartitionedInputs.equals(ImmutableSet.copyOf(partitionBy))) {
while (matchIterator.hasNext() && matchIterator.next().isEmpty()) {
preSortedOrderPrefix++;
}
}
TableFunctionProcessorNode result = new TableFunctionProcessorNode(
node.getId(),
node.getName(),
node.getProperOutputs(),
Optional.of(child.getNode()),
node.isPruneWhenEmpty(),
node.getPassThroughSpecifications(),
node.getRequiredSymbols(),
node.getMarkerSymbols(),
node.getSpecification(),
prePartitionedInputs,
preSortedOrderPrefix,
node.getHashSymbol(),
node.getHandle());
return deriveProperties(result, child.getProperties());
}
@Override
public PlanWithProperties visitMarkDistinct(MarkDistinctNode node, StreamPreferredProperties parentPreferences)
{
// mark distinct requires that all data partitioned
StreamPreferredProperties childRequirements = parentPreferences
.constrainTo(node.getSource().getOutputSymbols())
.withDefaultParallelism(session)
.withPartitioning(node.getDistinctSymbols());
PlanWithProperties child = planAndEnforce(node.getSource(), childRequirements, childRequirements);
MarkDistinctNode result = new MarkDistinctNode(
node.getId(),
child.getNode(),
node.getMarkerSymbol(),
pruneMarkDistinctSymbols(node, child.getProperties().getLocalProperties()),
node.getHashSymbol());
return deriveProperties(result, child.getProperties());
}
/**
* Prune redundant distinct symbols to reduce CPU cost of hashing corresponding values and amount of memory
* needed to store all the distinct values.
*
* Consider the following plan,
*
* - MarkDistinctNode (unique, c1, c2)
* - Join
* - AssignUniqueId (unique)
* - probe (c1, c2)
* - build
*
* In this case MarkDistinctNode (unique, c1, c2) is equivalent to MarkDistinctNode (unique),
* because if two rows match on `unique`, they must match on `c1` and `c2` as well.
*
* More generally, any distinct symbol that is functionally dependent on a subset of
* other distinct symbols can be dropped.
*
* Ideally, this logic would be encapsulated in a separate rule, but currently no rule other
* than AddLocalExchanges can reason about local properties.
*/
private List pruneMarkDistinctSymbols(MarkDistinctNode node, List> localProperties)
{
if (localProperties.isEmpty()) {
return node.getDistinctSymbols();
}
// Identify functional dependencies between distinct symbols: in the list of local properties any constant
// symbol is functionally dependent on the set of symbols that appears earlier.
ImmutableSet.Builder redundantSymbolsBuilder = ImmutableSet.builder();
for (LocalProperty property : localProperties) {
if (property instanceof ConstantProperty) {
redundantSymbolsBuilder.add(((ConstantProperty) property).getColumn());
}
else if (!node.getDistinctSymbols().containsAll(property.getColumns())) {
// Ran into a non-distinct symbol. There will be no more symbols that are functionally dependent on distinct symbols exclusively.
break;
}
}
Set redundantSymbols = redundantSymbolsBuilder.build();
List remainingSymbols = node.getDistinctSymbols().stream()
.filter(symbol -> !redundantSymbols.contains(symbol))
.collect(toImmutableList());
if (remainingSymbols.isEmpty()) {
// This happens when all distinct symbols are constants.
// In that case, keep the first symbol (don't drop them all).
return ImmutableList.of(node.getDistinctSymbols().get(0));
}
return remainingSymbols;
}
@Override
public PlanWithProperties visitRowNumber(RowNumberNode node, StreamPreferredProperties parentPreferences)
{
StreamPreferredProperties requiredProperties;
if (node.isOrderSensitive()) {
// for an order sensitive RowNumberNode pass the orderSensitive context
verify(node.getPartitionBy().isEmpty(), "unexpected partitioning");
requiredProperties = singleStream().withOrderSensitivity();
}
else {
requiredProperties = parentPreferences.withDefaultParallelism(session).withPartitioning(node.getPartitionBy());
}
return planAndEnforceChildren(node, requiredProperties, requiredProperties);
}
@Override
public PlanWithProperties visitTopNRanking(TopNRankingNode node, StreamPreferredProperties parentPreferences)
{
StreamPreferredProperties requiredProperties = parentPreferences.withDefaultParallelism(session);
// final topN ranking requires that all data be partitioned
if (!node.isPartial()) {
requiredProperties = requiredProperties.withPartitioning(node.getPartitionBy());
}
return planAndEnforceChildren(node, requiredProperties, requiredProperties);
}
@Override
public PlanWithProperties visitSimpleTableExecuteNode(SimpleTableExecuteNode node, StreamPreferredProperties context)
{
return planAndEnforceChildren(node, singleStream(), singleStream());
}
//
// Table Writer and Table Execute
//
@Override
public PlanWithProperties visitTableWriter(TableWriterNode node, StreamPreferredProperties parentPreferences)
{
WriterScalingOptions scalingOptions = node.getTarget().getWriterScalingOptions(plannerContext.getMetadata(), session);
return visitTableWriter(
node,
node.getPartitioningScheme(),
node.getSource(),
parentPreferences,
node.getTarget(),
isTaskScaleWritersEnabled(session) && scalingOptions.isPerTaskWriterScalingEnabled());
}
@Override
public PlanWithProperties visitTableExecute(TableExecuteNode node, StreamPreferredProperties parentPreferences)
{
return visitTableWriter(
node,
node.getPartitioningScheme(),
node.getSource(),
parentPreferences,
node.getTarget(),
// Disable task writer scaling for TableExecute since it can result in smaller files than
// file_size_threshold, which can be undesirable behaviour.
false);
}
private PlanWithProperties visitTableWriter(
PlanNode node,
Optional partitioningScheme,
PlanNode source,
StreamPreferredProperties parentPreferences,
WriterTarget writerTarget,
boolean isTaskScaleWritersEnabled)
{
if (isTaskScaleWritersEnabled
&& writerTarget.supportsMultipleWritersPerPartition(plannerContext.getMetadata(), session)
&& partitioningScheme.isPresent()) {
return visitScalePartitionedWriter(node, partitioningScheme.get(), source);
}
return partitioningScheme
.map(scheme -> visitPartitionedWriter(node, scheme, source, parentPreferences))
.orElseGet(() -> visitUnpartitionedWriter(node, source, isTaskScaleWritersEnabled));
}
private PlanWithProperties visitUnpartitionedWriter(PlanNode node, PlanNode source, boolean isTaskScaleWritersEnabled)
{
if (isTaskScaleWritersEnabled) {
PlanWithProperties newSource = source.accept(this, defaultParallelism(session));
PlanWithProperties exchange = deriveProperties(
partitionedExchange(
idAllocator.getNextId(),
LOCAL,
newSource.getNode(),
new PartitioningScheme(
Partitioning.create(SCALED_WRITER_ROUND_ROBIN_DISTRIBUTION, ImmutableList.of()),
newSource.getNode().getOutputSymbols())),
newSource.getProperties());
return rebaseAndDeriveProperties(node, ImmutableList.of(exchange));
}
if (getTaskMinWriterCount(session) == 1) {
return planAndEnforceChildren(node, singleStream(), defaultParallelism(session));
}
return planAndEnforceChildren(node, fixedParallelism(), fixedParallelism());
}
private PlanWithProperties visitPartitionedWriter(PlanNode node, PartitioningScheme partitioningScheme, PlanNode source, StreamPreferredProperties parentPreferences)
{
if (getTaskMaxWriterCount(session) == 1) {
return planAndEnforceChildren(node, singleStream(), defaultParallelism(session));
}
if (partitioningScheme.getPartitioning().getHandle().equals(FIXED_HASH_DISTRIBUTION)) {
// arbitrary hash function on predefined set of partition columns
StreamPreferredProperties preference = partitionedOn(partitioningScheme.getPartitioning().getColumns());
return planAndEnforceChildren(node, preference, preference);
}
// connector provided hash function
verify(!(partitioningScheme.getPartitioning().getHandle().getConnectorHandle() instanceof SystemPartitioningHandle));
verify(
partitioningScheme.getPartitioning().getArguments().stream().noneMatch(Partitioning.ArgumentBinding::isConstant),
"Table writer partitioning has constant arguments");
PlanWithProperties newSource = source.accept(this, parentPreferences);
PlanWithProperties exchange = deriveProperties(
partitionedExchange(
idAllocator.getNextId(),
LOCAL,
newSource.getNode(),
partitioningScheme),
newSource.getProperties());
return rebaseAndDeriveProperties(node, ImmutableList.of(exchange));
}
private PlanWithProperties visitScalePartitionedWriter(PlanNode node, PartitioningScheme partitioningScheme, PlanNode source)
{
if (getTaskMaxWriterCount(session) == 1) {
return planAndEnforceChildren(node, singleStream(), defaultParallelism(session));
}
if (partitioningScheme.getPartitioning().getHandle().equals(FIXED_HASH_DISTRIBUTION)) {
// arbitrary hash function on predefined set of partition columns
PlanWithProperties newSource = source.accept(this, defaultParallelism(session));
PlanWithProperties exchange = deriveProperties(
partitionedExchange(
idAllocator.getNextId(),
LOCAL,
newSource.getNode(),
partitioningScheme.withPartitioningHandle(SCALED_WRITER_HASH_DISTRIBUTION)),
newSource.getProperties());
return rebaseAndDeriveProperties(node, ImmutableList.of(exchange));
}
// connector provided hash function
verify(!(partitioningScheme.getPartitioning().getHandle().getConnectorHandle() instanceof SystemPartitioningHandle));
verify(
partitioningScheme.getPartitioning().getArguments().stream().noneMatch(Partitioning.ArgumentBinding::isConstant),
"Table writer partitioning has constant arguments");
PlanWithProperties newSource = source.accept(this, defaultParallelism(session));
PartitioningHandle partitioningHandle = partitioningScheme.getPartitioning().getHandle();
PlanWithProperties exchange = deriveProperties(
partitionedExchange(
idAllocator.getNextId(),
LOCAL,
newSource.getNode(),
partitioningScheme
.withPartitioningHandle(
new PartitioningHandle(
partitioningHandle.getCatalogHandle(),
partitioningHandle.getTransactionHandle(),
partitioningHandle.getConnectorHandle(),
true))),
newSource.getProperties());
return rebaseAndDeriveProperties(node, ImmutableList.of(exchange));
}
//
// Merge
//
@Override
public PlanWithProperties visitMergeWriter(MergeWriterNode node, StreamPreferredProperties parentPreferences)
{
return visitTableWriter(node, node.getPartitioningScheme(), node.getSource(), parentPreferences, node.getTarget(), false);
}
//
// Exchanges
//
@Override
public PlanWithProperties visitExchange(ExchangeNode node, StreamPreferredProperties parentPreferences)
{
checkArgument(node.getScope() != LOCAL, "AddLocalExchanges cannot process a plan containing a local exchange");
// this node changes the input organization completely, so we do not pass through parent preferences
if (node.getOrderingScheme().isPresent()) {
return planAndEnforceChildren(
node,
any().withOrderSensitivity(),
any().withOrderSensitivity());
}
return planAndEnforceChildren(node, any(), defaultParallelism(session));
}
@Override
public PlanWithProperties visitUnion(UnionNode node, StreamPreferredProperties preferredProperties)
{
// Union is replaced with an exchange which does not retain streaming properties from the children
List sourcesWithProperties = node.getSources().stream()
.map(source -> source.accept(this, any()))
.collect(toImmutableList());
List sources = sourcesWithProperties.stream()
.map(PlanWithProperties::getNode)
.collect(toImmutableList());
List inputProperties = sourcesWithProperties.stream()
.map(PlanWithProperties::getProperties)
.collect(toImmutableList());
List> inputLayouts = new ArrayList<>(sources.size());
for (int i = 0; i < sources.size(); i++) {
inputLayouts.add(node.sourceOutputLayout(i));
}
if (preferredProperties.isSingleStreamPreferred()) {
ExchangeNode exchangeNode = new ExchangeNode(
idAllocator.getNextId(),
GATHER,
LOCAL,
new PartitioningScheme(Partitioning.create(SINGLE_DISTRIBUTION, ImmutableList.of()), node.getOutputSymbols()),
sources,
inputLayouts,
Optional.empty());
return deriveProperties(exchangeNode, inputProperties);
}
Optional> preferredPartitionColumns = preferredProperties.getPartitioningColumns();
if (preferredPartitionColumns.isPresent()) {
ExchangeNode exchangeNode = new ExchangeNode(
idAllocator.getNextId(),
REPARTITION,
LOCAL,
new PartitioningScheme(
Partitioning.create(FIXED_HASH_DISTRIBUTION, preferredPartitionColumns.get()),
node.getOutputSymbols(),
Optional.empty()),
sources,
inputLayouts,
Optional.empty());
return deriveProperties(exchangeNode, inputProperties);
}
// multiple streams preferred
ExchangeNode exchangeNode = new ExchangeNode(
idAllocator.getNextId(),
REPARTITION,
LOCAL,
new PartitioningScheme(Partitioning.create(FIXED_ARBITRARY_DISTRIBUTION, ImmutableList.of()), node.getOutputSymbols()),
sources,
inputLayouts,
Optional.empty());
return deriveProperties(exchangeNode, inputProperties);
}
//
// Joins
//
@Override
public PlanWithProperties visitJoin(JoinNode node, StreamPreferredProperties parentPreferences)
{
PlanWithProperties probe = planAndEnforce(
node.getLeft(),
defaultParallelism(session),
parentPreferences.constrainTo(node.getLeft().getOutputSymbols()).withDefaultParallelism(session));
if (isSpillEnabled(session)) {
if (probe.getProperties().getDistribution() != FIXED) {
// Disable spill for joins over non-fixed streams as otherwise we would need to insert local exchange.
// Such local exchanges can hurt performance when spill is not triggered.
// When spill is not triggered it should not induce performance penalty.
node = node.withSpillable(false);
}
else {
node = node.withSpillable(true);
}
}
// this build consumes the input completely, so we do not pass through parent preferences
List buildHashSymbols = Lists.transform(node.getCriteria(), JoinNode.EquiJoinClause::getRight);
StreamPreferredProperties buildPreference;
if (getTaskConcurrency(session) > 1) {
buildPreference = exactlyPartitionedOn(buildHashSymbols);
}
else {
buildPreference = singleStream();
}
PlanWithProperties build = planAndEnforce(node.getRight(), buildPreference, buildPreference);
return rebaseAndDeriveProperties(node, ImmutableList.of(probe, build));
}
@Override
public PlanWithProperties visitSemiJoin(SemiJoinNode node, StreamPreferredProperties parentPreferences)
{
PlanWithProperties source = planAndEnforce(
node.getSource(),
defaultParallelism(session),
parentPreferences.constrainTo(node.getSource().getOutputSymbols()).withDefaultParallelism(session));
// this filter source consumes the input completely, so we do not pass through parent preferences
PlanWithProperties filteringSource = planAndEnforce(node.getFilteringSource(), singleStream(), singleStream());
return rebaseAndDeriveProperties(node, ImmutableList.of(source, filteringSource));
}
@Override
public PlanWithProperties visitSpatialJoin(SpatialJoinNode node, StreamPreferredProperties parentPreferences)
{
PlanWithProperties probe = planAndEnforce(
node.getLeft(),
defaultParallelism(session),
parentPreferences.constrainTo(node.getLeft().getOutputSymbols())
.withDefaultParallelism(session));
PlanWithProperties build = planAndEnforce(node.getRight(), singleStream(), singleStream());
return rebaseAndDeriveProperties(node, ImmutableList.of(probe, build));
}
@Override
public PlanWithProperties visitIndexJoin(IndexJoinNode node, StreamPreferredProperties parentPreferences)
{
PlanWithProperties probe = planAndEnforce(
node.getProbeSource(),
defaultParallelism(session),
parentPreferences.constrainTo(node.getProbeSource().getOutputSymbols()).withDefaultParallelism(session));
// index source does not support local parallel and must produce a single stream
StreamProperties indexStreamProperties = derivePropertiesRecursively(node.getIndexSource(), plannerContext, session, types, typeAnalyzer);
checkArgument(indexStreamProperties.getDistribution() == SINGLE, "index source must be single stream");
PlanWithProperties index = new PlanWithProperties(node.getIndexSource(), indexStreamProperties);
return rebaseAndDeriveProperties(node, ImmutableList.of(probe, index));
}
//
// Helpers
//
private PlanWithProperties planAndEnforceChildren(PlanNode node, StreamPreferredProperties requiredProperties, StreamPreferredProperties preferredProperties)
{
// plan and enforce each child, but strip any requirement not in terms of symbols produced from the child
// Note: this assumes the child uses the same symbols as the parent
List children = node.getSources().stream()
.map(source -> planAndEnforce(
source,
requiredProperties.constrainTo(source.getOutputSymbols()),
preferredProperties.constrainTo(source.getOutputSymbols())))
.collect(toImmutableList());
return rebaseAndDeriveProperties(node, children);
}
private PlanWithProperties planAndEnforce(PlanNode node, StreamPreferredProperties requiredProperties, StreamPreferredProperties preferredProperties)
{
// verify properties are in terms of symbols produced by the node
List outputSymbols = node.getOutputSymbols();
checkArgument(requiredProperties.getPartitioningColumns().map(outputSymbols::containsAll).orElse(true));
checkArgument(preferredProperties.getPartitioningColumns().map(outputSymbols::containsAll).orElse(true));
// plan the node using the preferred properties
PlanWithProperties result = node.accept(this, preferredProperties);
// enforce the required properties
result = enforce(result, requiredProperties);
checkState(requiredProperties.isSatisfiedBy(result.getProperties()), "required properties not enforced");
return result;
}
private PlanWithProperties enforce(PlanWithProperties planWithProperties, StreamPreferredProperties requiredProperties)
{
if (requiredProperties.isSatisfiedBy(planWithProperties.getProperties())) {
return planWithProperties;
}
if (requiredProperties.isSingleStreamPreferred()) {
ExchangeNode exchangeNode = gatheringExchange(idAllocator.getNextId(), LOCAL, planWithProperties.getNode());
return deriveProperties(exchangeNode, planWithProperties.getProperties());
}
Optional> requiredPartitionColumns = requiredProperties.getPartitioningColumns();
if (requiredPartitionColumns.isEmpty()) {
// unpartitioned parallel streams required
ExchangeNode exchangeNode = partitionedExchange(
idAllocator.getNextId(),
LOCAL,
planWithProperties.getNode(),
new PartitioningScheme(Partitioning.create(FIXED_ARBITRARY_DISTRIBUTION, ImmutableList.of()), planWithProperties.getNode().getOutputSymbols()));
return deriveProperties(exchangeNode, planWithProperties.getProperties());
}
if (requiredProperties.isParallelPreferred()) {
// partitioned parallel streams required
ExchangeNode exchangeNode = partitionedExchange(
idAllocator.getNextId(),
LOCAL,
planWithProperties.getNode(),
requiredPartitionColumns.get(),
Optional.empty());
return deriveProperties(exchangeNode, planWithProperties.getProperties());
}
// no explicit parallel requirement, so gather to a single stream
ExchangeNode exchangeNode = gatheringExchange(
idAllocator.getNextId(),
LOCAL,
planWithProperties.getNode());
return deriveProperties(exchangeNode, planWithProperties.getProperties());
}
private PlanWithProperties rebaseAndDeriveProperties(PlanNode node, List children)
{
PlanNode result = replaceChildren(
node,
children.stream()
.map(PlanWithProperties::getNode)
.collect(toList()));
List inputProperties = children.stream()
.map(PlanWithProperties::getProperties)
.collect(toImmutableList());
return deriveProperties(result, inputProperties);
}
private PlanWithProperties deriveProperties(PlanNode result, StreamProperties inputProperties)
{
return new PlanWithProperties(result, StreamPropertyDerivations.deriveProperties(result, inputProperties, plannerContext, session, types, typeAnalyzer));
}
private PlanWithProperties deriveProperties(PlanNode result, List inputProperties)
{
return new PlanWithProperties(result, StreamPropertyDerivations.deriveProperties(result, inputProperties, plannerContext, session, types, typeAnalyzer));
}
}
private static class PlanWithProperties
{
private final PlanNode node;
private final StreamProperties properties;
public PlanWithProperties(PlanNode node, StreamProperties properties)
{
this.node = requireNonNull(node, "node is null");
this.properties = requireNonNull(properties, "properties is null");
}
public PlanNode getNode()
{
return node;
}
public StreamProperties getProperties()
{
return properties;
}
}
}