All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.sql.planner.AdaptivePlanner Maven / Gradle / Ivy

There is a newer version: 465
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.trino.sql.planner;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.graph.Traverser;
import io.trino.Session;
import io.trino.cost.CachingTableStatsProvider;
import io.trino.cost.RuntimeInfoProvider;
import io.trino.cost.StatsAndCosts;
import io.trino.execution.querystats.PlanOptimizersStatsCollector;
import io.trino.execution.warnings.WarningCollector;
import io.trino.sql.PlannerContext;
import io.trino.sql.planner.optimizations.AdaptivePlanOptimizer;
import io.trino.sql.planner.optimizations.PlanNodeSearcher;
import io.trino.sql.planner.optimizations.PlanOptimizer;
import io.trino.sql.planner.plan.AdaptivePlanNode;
import io.trino.sql.planner.plan.ExchangeNode;
import io.trino.sql.planner.plan.PlanNode;
import io.trino.sql.planner.plan.PlanNodeId;
import io.trino.sql.planner.plan.RemoteSourceNode;
import io.trino.sql.planner.plan.SimplePlanRewriter;
import io.trino.sql.planner.sanity.PlanSanityChecker;

import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

import static com.google.common.base.Verify.verify;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static io.trino.sql.planner.SystemPartitioningHandle.SINGLE_DISTRIBUTION;
import static io.trino.sql.planner.plan.ExchangeNode.Scope.REMOTE;
import static io.trino.sql.planner.plan.SimplePlanRewriter.rewriteWith;
import static io.trino.tracing.ScopedSpan.scopedSpan;
import static java.util.Objects.requireNonNull;

/**
 * This class is responsible for re-optimizing the plan using exchange statistics in FTE. For example,
 * reordering of join or mitigation of skewness. It will significantly impact cost and
 * performance if the plan chosen by the static optimiser isn’t the best due to the
 * underestimation of statistics or lack of statistics.
 * 

* Re-planning Steps: * 1. It first merges all SubPlans into a single PlanNode where RemoteSourceNode for stages * that haven’t finished will get replaced with Remote Exchanges. On the other hand, * RemoteSourceNode for finished stages will remain as it is in the plan. *

* 2. Once we have a merged plan which contains all the unfinished parts, we will reoptimize it using * a set of PlanOptimizers. *

* 3. During re-optimization, it is possible that some new exchanges need to be added due to the change * in partitioning strategy. For instance, if a rule changes the distribution type of the join * from BROADCAST to PARTITIONED. It is also possible that some remote exchanges are removed. * For example, while changing the order of the join. *

* 4. Ultimately, the planner will fragment the optimized PlanNode again and generate the SubPlans with * new PlanFragmentIds. The re-fragmentation will only happen if the old plan and the new * plan have some differences. To check these differences, we rely on * PlanOptimizer#optimizeAndMarkPlanChanges api which also returns changed plan ids. *

* Note: We do not change the fragment ids which have no changes and are not downstream of the * changed plan nodes. This optimization is done to avoid unnecessary stage restart due to speculative execution. */ public class AdaptivePlanner { private final Session session; private final PlannerContext plannerContext; private final List planOptimizers; private final PlanFragmenter planFragmenter; private final PlanSanityChecker planSanityChecker; private final WarningCollector warningCollector; private final PlanOptimizersStatsCollector planOptimizersStatsCollector; private final CachingTableStatsProvider tableStatsProvider; public AdaptivePlanner( Session session, PlannerContext plannerContext, List planOptimizers, PlanFragmenter planFragmenter, PlanSanityChecker planSanityChecker, WarningCollector warningCollector, PlanOptimizersStatsCollector planOptimizersStatsCollector, CachingTableStatsProvider tableStatsProvider) { this.session = requireNonNull(session, "session is null"); this.plannerContext = requireNonNull(plannerContext, "plannerContext is null"); this.planOptimizers = requireNonNull(planOptimizers, "planOptimizers is null"); this.planFragmenter = requireNonNull(planFragmenter, "planFragmenter is null"); this.planSanityChecker = requireNonNull(planSanityChecker, "planSanityChecker is null"); this.warningCollector = requireNonNull(warningCollector, "warningCollector is null"); this.planOptimizersStatsCollector = requireNonNull(planOptimizersStatsCollector, "planOptimizersStatsCollector is null"); this.tableStatsProvider = requireNonNull(tableStatsProvider, "tableStatsProvider is null"); } public SubPlan optimize(SubPlan root, RuntimeInfoProvider runtimeInfoProvider) { // No need to run optimizer since the root is already finished or its stats are almost accurate based on // estimate by progress. // TODO: We need add an ability to re-plan fragment whose stats are estimated by progress. if (runtimeInfoProvider.getRuntimeOutputStats(root.getFragment().getId()).isAccurate()) { return root; } List subPlans = traverse(root).collect(toImmutableList()); // create a new fragment id allocator and symbol allocator PlanFragmentIdAllocator fragmentIdAllocator = new PlanFragmentIdAllocator(getMaxPlanFragmentId(subPlans) + 1); SymbolAllocator symbolAllocator = createSymbolAllocator(subPlans); // rewrite remote source nodes to exchange nodes, except for fragments which are finisher or whose stats are // estimated by progress. ReplaceUnchangedFragmentsWithRemoteSourcesRewriter rewriter = new ReplaceUnchangedFragmentsWithRemoteSourcesRewriter(runtimeInfoProvider); PlanNode currentAdaptivePlan = rewriteWith(rewriter, root.getFragment().getRoot(), root.getChildren()); // Remove the adaptive plan node and replace it with initial plan PlanNode initialPlan = getInitialPlan(currentAdaptivePlan); // Remove the adaptive plan node and replace it with current plan PlanNode currentPlan = getCurrentPlan(currentAdaptivePlan); // Collect the sub plans for each remote exchange and remote source node. We will use this map during // re-fragmentation as a cache for all unchanged sub plans. ExchangeSourceIdToSubPlanCollector exchangeSourceIdToSubPlanCollector = new ExchangeSourceIdToSubPlanCollector(); currentPlan.accept(exchangeSourceIdToSubPlanCollector, subPlans); Map exchangeSourceIdToSubPlan = exchangeSourceIdToSubPlanCollector.getExchangeSourceIdToSubPlan(); // optimize the current plan PlanNodeIdAllocator idAllocator = new PlanNodeIdAllocator(getMaxPlanId(currentPlan) + 1); AdaptivePlanOptimizer.Result optimizationResult = optimizePlan(currentPlan, symbolAllocator, runtimeInfoProvider, idAllocator); // Check whether there are some changes in the plan after optimization if (optimizationResult.changedPlanNodes().isEmpty()) { return root; } // Add the adaptive plan node recursively where initialPlan remain as it is and optimizedPlan as new currentPlan PlanNode adaptivePlan = addAdaptivePlanNode(idAllocator, initialPlan, optimizationResult.plan(), optimizationResult.changedPlanNodes()); // validate the adaptive plan try (var _ = scopedSpan(plannerContext.getTracer(), "validate-adaptive-plan")) { planSanityChecker.validateAdaptivePlan(adaptivePlan, session, plannerContext, warningCollector); } // Fragment the adaptive plan return planFragmenter.createSubPlans( session, new Plan(adaptivePlan, StatsAndCosts.empty()), false, warningCollector, fragmentIdAllocator, new PartitioningScheme(Partitioning.create(SINGLE_DISTRIBUTION, ImmutableList.of()), adaptivePlan.getOutputSymbols()), // We do not change the subPlans which have no changes and are not downstream of the // changed plan nodes. This optimization is done to avoid unnecessary stage restart due to speculative // execution. getUnchangedSubPlans(adaptivePlan, optimizationResult.changedPlanNodes(), exchangeSourceIdToSubPlan)); } private AdaptivePlanOptimizer.Result optimizePlan( PlanNode plan, SymbolAllocator symbolAllocator, RuntimeInfoProvider runtimeInfoProvider, PlanNodeIdAllocator idAllocator) { AdaptivePlanOptimizer.Result result = new AdaptivePlanOptimizer.Result(plan, Set.of()); ImmutableSet.Builder changedPlanNodes = ImmutableSet.builder(); for (AdaptivePlanOptimizer optimizer : planOptimizers) { result = optimizer.optimizeAndMarkPlanChanges( result.plan(), new PlanOptimizer.Context( session, symbolAllocator, idAllocator, warningCollector, planOptimizersStatsCollector, tableStatsProvider, runtimeInfoProvider)); changedPlanNodes.addAll(result.changedPlanNodes()); } return new AdaptivePlanOptimizer.Result(result.plan(), changedPlanNodes.build()); } private PlanNode addAdaptivePlanNode( PlanNodeIdAllocator idAllocator, PlanNode initialPlan, PlanNode optimizedPlanNode, Set changedPlanNodes) { // We should check optimizedPlanNode here instead of initialPlan since it is possible that new // nodes have been added, and they aren't part of initialPlan. However, we should put the adaptive plan node // above them. if (changedPlanNodes.contains(optimizedPlanNode.getId())) { return new AdaptivePlanNode( idAllocator.getNextId(), initialPlan, SymbolsExtractor.extractOutputSymbols(initialPlan), optimizedPlanNode); } // This condition should always be true because if a plan node is changed, then it should be captured in the // changedPlanNodes set based on the semantics of PlanOptimizer#optimizeAndMarkPlanChanges. verify(initialPlan.getSources().size() == optimizedPlanNode.getSources().size()); ImmutableList.Builder sources = ImmutableList.builder(); for (int i = 0; i < initialPlan.getSources().size(); i++) { PlanNode initialSource = initialPlan.getSources().get(i); PlanNode optimizedSource = optimizedPlanNode.getSources().get(i); sources.add(addAdaptivePlanNode(idAllocator, initialSource, optimizedSource, changedPlanNodes)); } return optimizedPlanNode.replaceChildren(sources.build()); } private Map getUnchangedSubPlans( PlanNode adaptivePlan, Set changedPlanIds, Map exchangeSourceIdToSubPlan) { Set changedPlanIdsWithDownstream = new HashSet<>(); for (PlanNodeId changedId : changedPlanIds) { changedPlanIdsWithDownstream.addAll(getDownstreamPlanNodeIds(adaptivePlan, changedId)); } return exchangeSourceIdToSubPlan.entrySet().stream() .filter(entry -> !changedPlanIdsWithDownstream.contains(entry.getKey().exchangeId()) && !changedPlanIdsWithDownstream.contains(entry.getKey().sourceId())) .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue)); } private Set getDownstreamPlanNodeIds(PlanNode root, PlanNodeId id) { if (root.getId().equals(id)) { return ImmutableSet.of(id); } Set upstreamNodes = new HashSet<>(); root.getSources().stream() .map(source -> getDownstreamPlanNodeIds(source, id)) .forEach(upstreamNodes::addAll); if (!upstreamNodes.isEmpty()) { upstreamNodes.add(root.getId()); } return upstreamNodes; } private PlanNode getCurrentPlan(PlanNode node) { return rewriteWith(new CurrentPlanRewriter(), node); } private PlanNode getInitialPlan(PlanNode node) { return rewriteWith(new InitialPlanRewriter(), node); } private SymbolAllocator createSymbolAllocator(List subPlans) { return new SymbolAllocator( subPlans.stream() .map(SubPlan::getFragment) .map(PlanFragment::getSymbols) .flatMap(Set::stream) .collect(toImmutableSet())); } private int getMaxPlanFragmentId(List subPlans) { return subPlans.stream() .map(SubPlan::getFragment) .map(PlanFragment::getId) .mapToInt(fragmentId -> Integer.parseInt(fragmentId.toString())) .max() .orElseThrow(); } private int getMaxPlanId(PlanNode node) { return traverse(node) .map(PlanNode::getId) .mapToInt(planNodeId -> Integer.parseInt(planNodeId.toString())) .max() .orElseThrow(); } private Stream traverse(PlanNode node) { Iterable iterable = Traverser.forTree(PlanNode::getSources).depthFirstPreOrder(node); return StreamSupport.stream(iterable.spliterator(), false); } private Stream traverse(SubPlan subPlan) { Iterable iterable = Traverser.forTree(SubPlan::getChildren).depthFirstPreOrder(subPlan); return StreamSupport.stream(iterable.spliterator(), false); } private static class ReplaceUnchangedFragmentsWithRemoteSourcesRewriter extends SimplePlanRewriter> { private final RuntimeInfoProvider runtimeInfoProvider; private ReplaceUnchangedFragmentsWithRemoteSourcesRewriter(RuntimeInfoProvider runtimeInfoProvider) { this.runtimeInfoProvider = requireNonNull(runtimeInfoProvider, "runtimeInfoProvider is null"); } @Override public PlanNode visitAdaptivePlanNode(AdaptivePlanNode node, RewriteContext> context) { // It is possible that the initial plan also contains remote source nodes, therefore we need to // rewrite them as well. PlanNode initialPlan = context.rewrite(node.getInitialPlan(), context.get()); PlanNode currentPlan = context.rewrite(node.getCurrentPlan(), context.get()); return new AdaptivePlanNode(node.getId(), initialPlan, SymbolsExtractor.extractOutputSymbols(initialPlan), currentPlan); } @Override public PlanNode visitRemoteSource(RemoteSourceNode node, RewriteContext> context) { // We won't run optimizer rules on sub plans which are either finished or their stats are almost accurate // based are estimated by progress. // TODO: We need add an ability to re-plan fragment whose stats are estimated by progress. if (node.getSourceFragmentIds().stream() .anyMatch(planFragmentId -> runtimeInfoProvider.getRuntimeOutputStats(planFragmentId).isAccurate())) { return node; } List sourceSubPlans = context.get().stream() .filter(subPlan -> node.getSourceFragmentIds().contains(subPlan.getFragment().getId())) .collect(toImmutableList()); ImmutableList.Builder sourceNodesBuilder = ImmutableList.builder(); for (SubPlan sourceSubPlan : sourceSubPlans) { PlanNode sourceNode = context.rewrite(sourceSubPlan.getFragment().getRoot(), sourceSubPlan.getChildren()); sourceNodesBuilder.add(sourceNode); } List sourceNodes = sourceNodesBuilder.build(); List> inputs = sourceNodes.stream().map(PlanNode::getOutputSymbols).collect(toImmutableList()); PartitioningScheme partitioningScheme = node.getSourceFragmentIds().stream() .map(runtimeInfoProvider::getPlanFragment) .map(PlanFragment::getOutputPartitioningScheme) .findFirst() .orElseThrow(); return new ExchangeNode( node.getId(), node.getExchangeType(), REMOTE, partitioningScheme, sourceNodes, inputs, node.getOrderingScheme()); } } private static class ExchangeSourceIdToSubPlanCollector extends SimplePlanVisitor> { private final Map exchangeSourceIdToSubPlan = new HashMap<>(); @Override public Void visitExchange(ExchangeNode node, List context) { // Process the source nodes first visitPlan(node, context); // No need to process the exchange node if it is not a remote exchange if (node.getScope() != REMOTE) { return null; } // Find the sub plans for this exchange node List sourceIds = node.getSources().stream().map(PlanNode::getId).collect(toImmutableList()); List sourceSubPlans = context.stream() .filter(subPlan -> sourceIds.contains(subPlan.getFragment().getRoot().getId())) .collect(toImmutableList()); verify( sourceSubPlans.size() == sourceIds.size(), "Source subPlans not found for exchange node"); for (SubPlan sourceSubPlan : sourceSubPlans) { PlanNodeId sourceId = sourceSubPlan.getFragment().getRoot().getId(); exchangeSourceIdToSubPlan.put(new ExchangeSourceId(node.getId(), sourceId), sourceSubPlan); } return null; } @Override public Void visitRemoteSource(RemoteSourceNode node, List context) { List sourceSubPlans = context.stream() .filter(subPlan -> node.getSourceFragmentIds().contains(subPlan.getFragment().getId())) .collect(toImmutableList()); for (SubPlan sourceSubPlan : sourceSubPlans) { PlanNodeId sourceId = sourceSubPlan.getFragment().getRoot().getId(); exchangeSourceIdToSubPlan.put(new ExchangeSourceId(node.getId(), sourceId), sourceSubPlan); } return null; } public Map getExchangeSourceIdToSubPlan() { return ImmutableMap.copyOf(exchangeSourceIdToSubPlan); } } private static class CurrentPlanRewriter extends SimplePlanRewriter> { @Override public PlanNode visitAdaptivePlanNode(AdaptivePlanNode node, RewriteContext> context) { verify( !containsAdaptivePlanNode(node.getCurrentPlan()), "Adaptive plan node cannot have a nested adaptive plan node"); return node.getCurrentPlan(); } } private static class InitialPlanRewriter extends SimplePlanRewriter> { @Override public PlanNode visitAdaptivePlanNode(AdaptivePlanNode node, RewriteContext> context) { verify( !containsAdaptivePlanNode(node.getInitialPlan()), "Adaptive plan node cannot have a nested adaptive plan node"); return node.getInitialPlan(); } } private static boolean containsAdaptivePlanNode(PlanNode node) { return PlanNodeSearcher.searchFrom(node) .whereIsInstanceOfAny(AdaptivePlanNode.class) .matches(); } public record ExchangeSourceId(PlanNodeId exchangeId, PlanNodeId sourceId) { public ExchangeSourceId { requireNonNull(exchangeId, "exchangeId is null"); requireNonNull(sourceId, "sourceId is null"); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy