Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.flink.optimizer.dag.OptimizerNode Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.optimizer.dag;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.ExecutionMode;
import org.apache.flink.api.common.operators.AbstractUdfOperator;
import org.apache.flink.api.common.operators.CompilerHints;
import org.apache.flink.api.common.operators.Operator;
import org.apache.flink.api.common.operators.SemanticProperties;
import org.apache.flink.api.common.operators.util.FieldSet;
import org.apache.flink.optimizer.CompilerException;
import org.apache.flink.optimizer.DataStatistics;
import org.apache.flink.optimizer.costs.CostEstimator;
import org.apache.flink.optimizer.dataproperties.InterestingProperties;
import org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties;
import org.apache.flink.optimizer.dataproperties.RequestedLocalProperties;
import org.apache.flink.optimizer.plan.PlanNode;
import org.apache.flink.optimizer.plandump.DumpableConnection;
import org.apache.flink.optimizer.plandump.DumpableNode;
import org.apache.flink.runtime.operators.shipping.ShipStrategyType;
import org.apache.flink.util.Visitable;
import org.apache.flink.util.Visitor;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* The OptimizerNode is the base class of all nodes in the optimizer DAG. The optimizer DAG is the
* optimizer's representation of a program, created before the actual optimization (which creates
* different candidate plans and computes their cost).
*
* Nodes in the DAG correspond (almost) one-to-one to the operators in a program. The optimizer
* DAG is constructed to hold the additional information that the optimizer needs:
*
*
* Estimates of the data size processed by each operator
* Helper structures to track where the data flow "splits" and "joins", to support flows that
* are DAGs but not trees.
* Tags and weights to differentiate between loop-variant and -invariant parts of an iteration
* Interesting properties to be used during the enumeration of candidate plans
*
*/
public abstract class OptimizerNode
implements Visitable, EstimateProvider, DumpableNode {
public static final int MAX_DYNAMIC_PATH_COST_WEIGHT = 100;
// --------------------------------------------------------------------------------------------
// Members
// --------------------------------------------------------------------------------------------
private final Operator> operator; // The operator (Reduce / Join / DataSource / ...)
private List broadcastConnectionNames =
new ArrayList(); // the broadcast inputs names of this node
private List broadcastConnections =
new ArrayList(); // the broadcast inputs of this node
private List outgoingConnections; // The links to succeeding nodes
private InterestingProperties intProps; // the interesting properties of this node
// --------------------------------- Branch Handling ------------------------------------------
protected List
openBranches; // stack of branches in the sub-graph that are not joined
protected Set
closedBranchingNodes; // stack of branching nodes which have already been closed
protected List
hereJoinedBranches; // the branching nodes (node with multiple outputs)
// that are partially joined (through multiple inputs or broadcast vars)
// ---------------------------- Estimates and Annotations -------------------------------------
protected long estimatedOutputSize = -1; // the estimated size of the output (bytes)
protected long estimatedNumRecords =
-1; // the estimated number of key/value pairs in the output
protected Set
uniqueFields; // set of attributes that will always be unique after this node
// --------------------------------- General Parameters ---------------------------------------
private int parallelism =
ExecutionConfig.PARALLELISM_DEFAULT; // the number of parallel instances of this node
private long minimalMemoryPerSubTask = -1;
protected int id = -1; // the id for this node.
protected int costWeight = 1; // factor to weight the costs for dynamic paths
protected boolean onDynamicPath;
protected List
cachedPlans; // cache candidates, because the may be accessed repeatedly
// ------------------------------------------------------------------------
// Constructor / Setup
// ------------------------------------------------------------------------
/**
* Creates a new optimizer node that represents the given program operator.
*
* @param op The operator that the node represents.
*/
public OptimizerNode(Operator> op) {
this.operator = op;
readStubAnnotations();
}
protected OptimizerNode(OptimizerNode toCopy) {
this.operator = toCopy.operator;
this.intProps = toCopy.intProps;
this.openBranches = toCopy.openBranches;
this.closedBranchingNodes = toCopy.closedBranchingNodes;
this.estimatedOutputSize = toCopy.estimatedOutputSize;
this.estimatedNumRecords = toCopy.estimatedNumRecords;
this.parallelism = toCopy.parallelism;
this.minimalMemoryPerSubTask = toCopy.minimalMemoryPerSubTask;
this.id = toCopy.id;
this.costWeight = toCopy.costWeight;
this.onDynamicPath = toCopy.onDynamicPath;
}
// ------------------------------------------------------------------------
// Methods specific to unary- / binary- / special nodes
// ------------------------------------------------------------------------
/**
* Gets the name of this node, which is the name of the function/operator, or data source / data
* sink.
*
* @return The node name.
*/
public abstract String getOperatorName();
/**
* This function connects the predecessors to this operator.
*
* @param operatorToNode The map from program operators to optimizer nodes.
* @param defaultExchangeMode The data exchange mode to use, if the operator does not specify
* one.
*/
public abstract void setInput(
Map, OptimizerNode> operatorToNode, ExecutionMode defaultExchangeMode);
/**
* This function connects the operators that produce the broadcast inputs to this operator.
*
* @param operatorToNode The map from program operators to optimizer nodes.
* @param defaultExchangeMode The data exchange mode to use, if the operator does not specify
* one.
* @throws CompilerException
*/
public void setBroadcastInputs(
Map, OptimizerNode> operatorToNode, ExecutionMode defaultExchangeMode) {
// skip for Operators that don't support broadcast variables
if (!(getOperator() instanceof AbstractUdfOperator, ?>)) {
return;
}
// get all broadcast inputs
AbstractUdfOperator, ?> operator = ((AbstractUdfOperator, ?>) getOperator());
// create connections and add them
for (Map.Entry> input : operator.getBroadcastInputs().entrySet()) {
OptimizerNode predecessor = operatorToNode.get(input.getValue());
DagConnection connection =
new DagConnection(
predecessor, this, ShipStrategyType.BROADCAST, defaultExchangeMode);
addBroadcastConnection(input.getKey(), connection);
predecessor.addOutgoingConnection(connection);
}
}
/**
* Gets all incoming connections of this node. This method needs to be overridden by subclasses
* to return the children.
*
* @return The list of incoming connections.
*/
public abstract List getIncomingConnections();
/**
* Tells the node to compute the interesting properties for its inputs. The interesting
* properties for the node itself must have been computed before. The node must then see how
* many of interesting properties it preserves and add its own.
*
* @param estimator The {@code CostEstimator} instance to use for plan cost estimation.
*/
public abstract void computeInterestingPropertiesForInputs(CostEstimator estimator);
/**
* This method causes the node to compute the description of open branches in its sub-plan. An
* open branch describes, that a (transitive) child node had multiple outputs, which have not
* all been re-joined in the sub-plan. This method needs to set the openBranches
* field to a stack of unclosed branches, the latest one top. A branch is considered closed, if
* some later node sees all of the branching node's outputs, no matter if there have been more
* branches to different paths in the meantime.
*/
public abstract void computeUnclosedBranchStack();
protected List computeUnclosedBranchStackForBroadcastInputs(
List branchesSoFar) {
// handle the data flow branching for the broadcast inputs
for (DagConnection broadcastInput : getBroadcastConnections()) {
OptimizerNode bcSource = broadcastInput.getSource();
addClosedBranches(bcSource.closedBranchingNodes);
List bcBranches =
bcSource.getBranchesForParent(broadcastInput);
ArrayList mergedBranches =
new ArrayList();
mergeLists(branchesSoFar, bcBranches, mergedBranches, true);
branchesSoFar =
mergedBranches.isEmpty()
? Collections.emptyList()
: mergedBranches;
}
return branchesSoFar;
}
/**
* Computes the plan alternatives for this node, an implicitly for all nodes that are children
* of this node. This method must determine for each alternative the global and local properties
* and the costs. This method may recursively call getAlternatives()
on its
* children to get their plan alternatives, and build its own alternatives on top of those.
*
* @param estimator The cost estimator used to estimate the costs of each plan alternative.
* @return A list containing all plan alternatives.
*/
public abstract List getAlternativePlans(CostEstimator estimator);
/**
* This method implements the visit of a depth-first graph traversing visitor. Implementers must
* first call the preVisit()
method, then hand the visitor to their children, and
* finally call the postVisit()
method.
*
* @param visitor The graph traversing visitor.
* @see org.apache.flink.util.Visitable#accept(org.apache.flink.util.Visitor)
*/
@Override
public abstract void accept(Visitor visitor);
public abstract SemanticProperties getSemanticProperties();
// ------------------------------------------------------------------------
// Getters / Setters
// ------------------------------------------------------------------------
@Override
public Iterable getPredecessors() {
List allPredecessors = new ArrayList();
for (DagConnection dagConnection : getIncomingConnections()) {
allPredecessors.add(dagConnection.getSource());
}
for (DagConnection conn : getBroadcastConnections()) {
allPredecessors.add(conn.getSource());
}
return allPredecessors;
}
/**
* Gets the ID of this node. If the id has not yet been set, this method returns -1;
*
* @return This node's id, or -1, if not yet set.
*/
public int getId() {
return this.id;
}
/**
* Sets the ID of this node.
*
* @param id The id for this node.
*/
public void initId(int id) {
if (id <= 0) {
throw new IllegalArgumentException();
}
if (this.id == -1) {
this.id = id;
} else {
throw new IllegalStateException("Id has already been initialized.");
}
}
/**
* Adds the broadcast connection identified by the given {@code name} to this node.
*
* @param broadcastConnection The connection to add.
*/
public void addBroadcastConnection(String name, DagConnection broadcastConnection) {
this.broadcastConnectionNames.add(name);
this.broadcastConnections.add(broadcastConnection);
}
/** Return the list of names associated with broadcast inputs for this node. */
public List getBroadcastConnectionNames() {
return this.broadcastConnectionNames;
}
/** Return the list of inputs associated with broadcast variables for this node. */
public List getBroadcastConnections() {
return this.broadcastConnections;
}
/**
* Adds a new outgoing connection to this node.
*
* @param connection The connection to add.
*/
public void addOutgoingConnection(DagConnection connection) {
if (this.outgoingConnections == null) {
this.outgoingConnections = new ArrayList();
} else {
if (this.outgoingConnections.size() == 64) {
throw new CompilerException(
"Cannot currently handle nodes with more than 64 outputs.");
}
}
this.outgoingConnections.add(connection);
}
/**
* The list of outgoing connections from this node to succeeding tasks.
*
* @return The list of outgoing connections.
*/
public List getOutgoingConnections() {
return this.outgoingConnections;
}
/**
* Gets the operator represented by this optimizer node.
*
* @return This node's operator.
*/
public Operator> getOperator() {
return this.operator;
}
/**
* Gets the parallelism for the operator represented by this optimizer node. The parallelism
* denotes how many parallel instances of the operator on will be spawned during the execution.
* If this value is {@link ExecutionConfig#PARALLELISM_DEFAULT} then the system will take the
* default number of parallel instances.
*
* @return The parallelism of the operator.
*/
public int getParallelism() {
return this.parallelism;
}
/**
* Sets the parallelism for this optimizer node. The parallelism denotes how many parallel
* instances of the operator will be spawned during the execution.
*
* @param parallelism The parallelism to set. If this value is {@link
* ExecutionConfig#PARALLELISM_DEFAULT} then the system will take the default number of
* parallel instances.
* @throws IllegalArgumentException If the parallelism is smaller than one.
*/
public void setParallelism(int parallelism) {
if (parallelism < 1 && parallelism != ExecutionConfig.PARALLELISM_DEFAULT) {
throw new IllegalArgumentException("Parallelism of " + parallelism + " is invalid.");
}
this.parallelism = parallelism;
}
/**
* Gets the amount of memory that all subtasks of this task have jointly available.
*
* @return The total amount of memory across all subtasks.
*/
public long getMinimalMemoryAcrossAllSubTasks() {
return this.minimalMemoryPerSubTask == -1
? -1
: this.minimalMemoryPerSubTask * this.parallelism;
}
public boolean isOnDynamicPath() {
return this.onDynamicPath;
}
public void identifyDynamicPath(int costWeight) {
boolean anyDynamic = false;
boolean allDynamic = true;
for (DagConnection conn : getIncomingConnections()) {
boolean dynamicIn = conn.isOnDynamicPath();
anyDynamic |= dynamicIn;
allDynamic &= dynamicIn;
}
for (DagConnection conn : getBroadcastConnections()) {
boolean dynamicIn = conn.isOnDynamicPath();
anyDynamic |= dynamicIn;
allDynamic &= dynamicIn;
}
if (anyDynamic) {
this.onDynamicPath = true;
this.costWeight = costWeight;
if (!allDynamic) {
// this node joins static and dynamic path.
// mark the connections where the source is not dynamic as cached
for (DagConnection conn : getIncomingConnections()) {
if (!conn.getSource().isOnDynamicPath()) {
conn.setMaterializationMode(conn.getMaterializationMode().makeCached());
}
}
// broadcast variables are always cached, because they stay unchanged available in
// the
// runtime context of the functions
}
}
}
public int getCostWeight() {
return this.costWeight;
}
public int getMaxDepth() {
int maxDepth = 0;
for (DagConnection conn : getIncomingConnections()) {
maxDepth = Math.max(maxDepth, conn.getMaxDepth());
}
for (DagConnection conn : getBroadcastConnections()) {
maxDepth = Math.max(maxDepth, conn.getMaxDepth());
}
return maxDepth;
}
/**
* Gets the properties that are interesting for this node to produce.
*
* @return The interesting properties for this node, or null, if not yet computed.
*/
public InterestingProperties getInterestingProperties() {
return this.intProps;
}
@Override
public long getEstimatedOutputSize() {
return this.estimatedOutputSize;
}
@Override
public long getEstimatedNumRecords() {
return this.estimatedNumRecords;
}
public void setEstimatedOutputSize(long estimatedOutputSize) {
this.estimatedOutputSize = estimatedOutputSize;
}
public void setEstimatedNumRecords(long estimatedNumRecords) {
this.estimatedNumRecords = estimatedNumRecords;
}
@Override
public float getEstimatedAvgWidthPerOutputRecord() {
if (this.estimatedOutputSize > 0 && this.estimatedNumRecords > 0) {
return ((float) this.estimatedOutputSize) / this.estimatedNumRecords;
} else {
return -1.0f;
}
}
/**
* Checks whether this node has branching output. A node's output is branched, if it has more
* than one output connection.
*
* @return True, if the node's output branches. False otherwise.
*/
public boolean isBranching() {
return getOutgoingConnections() != null && getOutgoingConnections().size() > 1;
}
public void markAllOutgoingConnectionsAsPipelineBreaking() {
if (this.outgoingConnections == null) {
throw new IllegalStateException(
"The outgoing connections have not yet been initialized.");
}
for (DagConnection conn : getOutgoingConnections()) {
conn.markBreaksPipeline();
}
}
// ------------------------------------------------------------------------
// Miscellaneous
// ------------------------------------------------------------------------
/**
* Checks, if all outgoing connections have their interesting properties set from their target
* nodes.
*
* @return True, if on all outgoing connections, the interesting properties are set. False
* otherwise.
*/
public boolean haveAllOutputConnectionInterestingProperties() {
for (DagConnection conn : getOutgoingConnections()) {
if (conn.getInterestingProperties() == null) {
return false;
}
}
return true;
}
/**
* Computes all the interesting properties that are relevant to this node. The interesting
* properties are a union of the interesting properties on each outgoing connection. However, if
* two interesting properties on the outgoing connections overlap, the interesting properties
* will occur only once in this set. For that, this method deduplicates and merges the
* interesting properties. This method returns copies of the original interesting properties
* objects and leaves the original objects, contained by the connections, unchanged.
*/
public void computeUnionOfInterestingPropertiesFromSuccessors() {
List conns = getOutgoingConnections();
if (conns.size() == 0) {
// no incoming, we have none ourselves
this.intProps = new InterestingProperties();
} else {
this.intProps = conns.get(0).getInterestingProperties().clone();
for (int i = 1; i < conns.size(); i++) {
this.intProps.addInterestingProperties(conns.get(i).getInterestingProperties());
}
}
this.intProps.dropTrivials();
}
public void clearInterestingProperties() {
this.intProps = null;
for (DagConnection conn : getIncomingConnections()) {
conn.clearInterestingProperties();
}
for (DagConnection conn : getBroadcastConnections()) {
conn.clearInterestingProperties();
}
}
/**
* Causes this node to compute its output estimates (such as number of rows, size in bytes)
* based on the inputs and the compiler hints. The compiler hints are instantiated with
* conservative default values which are used if no other values are provided. Nodes may access
* the statistics to determine relevant information.
*
* @param statistics The statistics object which may be accessed to get statistical information.
* The parameter may be null, if no statistics are available.
*/
public void computeOutputEstimates(DataStatistics statistics) {
// sanity checking
for (DagConnection c : getIncomingConnections()) {
if (c.getSource() == null) {
throw new CompilerException(
"Bug: Estimate computation called before inputs have been set.");
}
}
// let every operator do its computation
computeOperatorSpecificDefaultEstimates(statistics);
if (this.estimatedOutputSize < 0) {
this.estimatedOutputSize = -1;
}
if (this.estimatedNumRecords < 0) {
this.estimatedNumRecords = -1;
}
// overwrite default estimates with hints, if given
if (getOperator() == null || getOperator().getCompilerHints() == null) {
return;
}
CompilerHints hints = getOperator().getCompilerHints();
if (hints.getOutputSize() >= 0) {
this.estimatedOutputSize = hints.getOutputSize();
}
if (hints.getOutputCardinality() >= 0) {
this.estimatedNumRecords = hints.getOutputCardinality();
}
if (hints.getFilterFactor() >= 0.0f) {
if (this.estimatedNumRecords >= 0) {
this.estimatedNumRecords =
(long) (this.estimatedNumRecords * hints.getFilterFactor());
if (this.estimatedOutputSize >= 0) {
this.estimatedOutputSize =
(long) (this.estimatedOutputSize * hints.getFilterFactor());
}
} else if (this instanceof SingleInputNode) {
OptimizerNode pred = ((SingleInputNode) this).getPredecessorNode();
if (pred != null && pred.getEstimatedNumRecords() >= 0) {
this.estimatedNumRecords =
(long) (pred.getEstimatedNumRecords() * hints.getFilterFactor());
}
}
}
// use the width to infer the cardinality (given size) and vice versa
if (hints.getAvgOutputRecordSize() >= 1) {
// the estimated number of rows based on size
if (this.estimatedNumRecords == -1 && this.estimatedOutputSize >= 0) {
this.estimatedNumRecords =
(long) (this.estimatedOutputSize / hints.getAvgOutputRecordSize());
} else if (this.estimatedOutputSize == -1 && this.estimatedNumRecords >= 0) {
this.estimatedOutputSize =
(long) (this.estimatedNumRecords * hints.getAvgOutputRecordSize());
}
}
}
protected abstract void computeOperatorSpecificDefaultEstimates(DataStatistics statistics);
// ------------------------------------------------------------------------
// Reading of stub annotations
// ------------------------------------------------------------------------
/**
* Reads all stub annotations, i.e. which fields remain constant, what cardinality bounds the
* functions have, which fields remain unique.
*/
protected void readStubAnnotations() {
readUniqueFieldsAnnotation();
}
protected void readUniqueFieldsAnnotation() {
if (this.operator.getCompilerHints() != null) {
Set uniqueFieldSets = operator.getCompilerHints().getUniqueFields();
if (uniqueFieldSets != null) {
if (this.uniqueFields == null) {
this.uniqueFields = new HashSet();
}
this.uniqueFields.addAll(uniqueFieldSets);
}
}
}
// ------------------------------------------------------------------------
// Access of stub annotations
// ------------------------------------------------------------------------
/** Gets the FieldSets which are unique in the output of the node. */
public Set getUniqueFields() {
return this.uniqueFields == null ? Collections.emptySet() : this.uniqueFields;
}
// --------------------------------------------------------------------------------------------
// Pruning
// --------------------------------------------------------------------------------------------
protected void prunePlanAlternatives(List plans) {
if (plans.isEmpty()) {
throw new CompilerException(
"No plan meeting the requirements could be created @ "
+ this
+ ". Most likely reason: Too restrictive plan hints.");
}
// shortcut for the simple case
if (plans.size() == 1) {
return;
}
// we can only compare plan candidates that made equal choices
// at the branching points. for each choice at a branching point,
// we need to keep the cheapest (wrt. interesting properties).
// if we do not keep candidates for each branch choice, we might not
// find branch compatible candidates when joining the branches back.
// for pruning, we are quasi AFTER the node, so in the presence of
// branches, we need form the per-branch-choice groups by the choice
// they made at the latest un-joined branching node. Note that this is
// different from the check for branch compatibility of candidates, as
// this happens on the input sub-plans and hence BEFORE the node (therefore
// it is relevant to find the latest (partially) joined branch point.
if (this.openBranches == null || this.openBranches.isEmpty()) {
prunePlanAlternativesWithCommonBranching(plans);
} else {
// partition the candidates into groups that made the same sub-plan candidate
// choice at the latest unclosed branch point
final OptimizerNode[] branchDeterminers = new OptimizerNode[this.openBranches.size()];
for (int i = 0; i < branchDeterminers.length; i++) {
branchDeterminers[i] =
this.openBranches.get(this.openBranches.size() - 1 - i).getBranchingNode();
}
// this sorter sorts by the candidate choice at the branch point
Comparator sorter =
new Comparator() {
@Override
public int compare(PlanNode o1, PlanNode o2) {
for (OptimizerNode branchDeterminer : branchDeterminers) {
PlanNode n1 = o1.getCandidateAtBranchPoint(branchDeterminer);
PlanNode n2 = o2.getCandidateAtBranchPoint(branchDeterminer);
int hash1 = System.identityHashCode(n1);
int hash2 = System.identityHashCode(n2);
if (hash1 != hash2) {
return hash1 - hash2;
}
}
return 0;
}
};
Collections.sort(plans, sorter);
List result = new ArrayList();
List turn = new ArrayList();
final PlanNode[] determinerChoice = new PlanNode[branchDeterminers.length];
while (!plans.isEmpty()) {
// take one as the determiner
turn.clear();
PlanNode determiner = plans.remove(plans.size() - 1);
turn.add(determiner);
for (int i = 0; i < determinerChoice.length; i++) {
determinerChoice[i] =
determiner.getCandidateAtBranchPoint(branchDeterminers[i]);
}
// go backwards through the plans and find all that are equal
boolean stillEqual = true;
for (int k = plans.size() - 1; k >= 0 && stillEqual; k--) {
PlanNode toCheck = plans.get(k);
for (int i = 0; i < branchDeterminers.length; i++) {
PlanNode checkerChoice =
toCheck.getCandidateAtBranchPoint(branchDeterminers[i]);
if (checkerChoice != determinerChoice[i]) {
// not the same anymore
stillEqual = false;
break;
}
}
if (stillEqual) {
// the same
plans.remove(k);
turn.add(toCheck);
}
}
// now that we have only plans with the same branch alternatives, prune!
if (turn.size() > 1) {
prunePlanAlternativesWithCommonBranching(turn);
}
result.addAll(turn);
}
// after all turns are complete
plans.clear();
plans.addAll(result);
}
}
protected void prunePlanAlternativesWithCommonBranching(List plans) {
// for each interesting property, which plans are cheapest
final RequestedGlobalProperties[] gps =
this.intProps
.getGlobalProperties()
.toArray(
new RequestedGlobalProperties
[this.intProps.getGlobalProperties().size()]);
final RequestedLocalProperties[] lps =
this.intProps
.getLocalProperties()
.toArray(
new RequestedLocalProperties
[this.intProps.getLocalProperties().size()]);
final PlanNode[][] toKeep = new PlanNode[gps.length][];
final PlanNode[] cheapestForGlobal = new PlanNode[gps.length];
PlanNode cheapest = null; // the overall cheapest plan
// go over all plans from the list
for (PlanNode candidate : plans) {
// check if that plan is the overall cheapest
if (cheapest == null
|| (cheapest.getCumulativeCosts().compareTo(candidate.getCumulativeCosts())
> 0)) {
cheapest = candidate;
}
// find the interesting global properties that this plan matches
for (int i = 0; i < gps.length; i++) {
if (gps[i].isMetBy(candidate.getGlobalProperties())) {
// the candidate meets the global property requirements. That means
// it has a chance that its local properties are re-used (they would be
// destroyed if global properties need to be established)
if (cheapestForGlobal[i] == null
|| (cheapestForGlobal[i]
.getCumulativeCosts()
.compareTo(candidate.getCumulativeCosts())
> 0)) {
cheapestForGlobal[i] = candidate;
}
final PlanNode[] localMatches;
if (toKeep[i] == null) {
localMatches = new PlanNode[lps.length];
toKeep[i] = localMatches;
} else {
localMatches = toKeep[i];
}
for (int k = 0; k < lps.length; k++) {
if (lps[k].isMetBy(candidate.getLocalProperties())) {
final PlanNode previous = localMatches[k];
if (previous == null
|| previous.getCumulativeCosts()
.compareTo(candidate.getCumulativeCosts())
> 0) {
// this one is cheaper!
localMatches[k] = candidate;
}
}
}
}
}
}
// all plans are set now
plans.clear();
// add the cheapest plan
if (cheapest != null) {
plans.add(cheapest);
cheapest.setPruningMarker(); // remember that the plan is in the set
}
// add all others, which are optimal for some interesting properties
for (int i = 0; i < gps.length; i++) {
if (toKeep[i] != null) {
final PlanNode[] localMatches = toKeep[i];
for (final PlanNode n : localMatches) {
if (n != null && !n.isPruneMarkerSet()) {
n.setPruningMarker();
plans.add(n);
}
}
}
if (cheapestForGlobal[i] != null) {
final PlanNode n = cheapestForGlobal[i];
if (!n.isPruneMarkerSet()) {
n.setPruningMarker();
plans.add(n);
}
}
}
}
// --------------------------------------------------------------------------------------------
// Handling of branches
// --------------------------------------------------------------------------------------------
public boolean hasUnclosedBranches() {
return this.openBranches != null && !this.openBranches.isEmpty();
}
public Set getClosedBranchingNodes() {
return this.closedBranchingNodes;
}
public List getOpenBranches() {
return this.openBranches;
}
protected List getBranchesForParent(DagConnection toParent) {
if (this.outgoingConnections.size() == 1) {
// return our own stack of open branches, because nothing is added
if (this.openBranches == null || this.openBranches.isEmpty()) {
return Collections.emptyList();
} else {
return new ArrayList(this.openBranches);
}
} else if (this.outgoingConnections.size() > 1) {
// we branch add a branch info to the stack
List branches = new ArrayList(4);
if (this.openBranches != null) {
branches.addAll(this.openBranches);
}
// find out, which output number the connection to the parent
int num;
for (num = 0; num < this.outgoingConnections.size(); num++) {
if (this.outgoingConnections.get(num) == toParent) {
break;
}
}
if (num >= this.outgoingConnections.size()) {
throw new CompilerException(
"Error in compiler: "
+ "Parent to get branch info for is not contained in the outgoing connections.");
}
// create the description and add it
long bitvector = 0x1L << num;
branches.add(new UnclosedBranchDescriptor(this, bitvector));
return branches;
} else {
throw new CompilerException(
"Error in compiler: Cannot get branch info for successor in a node with no successors.");
}
}
protected void removeClosedBranches(List openList) {
if (openList == null
|| openList.isEmpty()
|| this.closedBranchingNodes == null
|| this.closedBranchingNodes.isEmpty()) {
return;
}
Iterator it = openList.iterator();
while (it.hasNext()) {
if (this.closedBranchingNodes.contains(it.next().getBranchingNode())) {
// this branch was already closed --> remove it from the list
it.remove();
}
}
}
protected void addClosedBranches(Set alreadyClosed) {
if (alreadyClosed == null || alreadyClosed.isEmpty()) {
return;
}
if (this.closedBranchingNodes == null) {
this.closedBranchingNodes = new HashSet(alreadyClosed);
} else {
this.closedBranchingNodes.addAll(alreadyClosed);
}
}
protected void addClosedBranch(OptimizerNode alreadyClosed) {
if (this.closedBranchingNodes == null) {
this.closedBranchingNodes = new HashSet();
}
this.closedBranchingNodes.add(alreadyClosed);
}
/**
* Checks whether to candidate plans for the sub-plan of this node are comparable. The two
* alternative plans are comparable, if
*
* a) There is no branch in the sub-plan of this node b) Both candidates have the same
* candidate as the child at the last open branch.
*
* @param plan1 The root node of the first candidate plan.
* @param plan2 The root node of the second candidate plan.
* @return True if the nodes are branch compatible in the inputs.
*/
protected boolean areBranchCompatible(PlanNode plan1, PlanNode plan2) {
if (plan1 == null || plan2 == null) {
throw new NullPointerException();
}
// if there is no open branch, the children are always compatible.
// in most plans, that will be the dominant case
if (this.hereJoinedBranches == null || this.hereJoinedBranches.isEmpty()) {
return true;
}
for (OptimizerNode joinedBrancher : hereJoinedBranches) {
final PlanNode branch1Cand = plan1.getCandidateAtBranchPoint(joinedBrancher);
final PlanNode branch2Cand = plan2.getCandidateAtBranchPoint(joinedBrancher);
if (branch1Cand != null && branch2Cand != null && branch1Cand != branch2Cand) {
return false;
}
}
return true;
}
/**
* The node IDs are assigned in graph-traversal order (pre-order), hence, each list is sorted by
* ID in ascending order and all consecutive lists start with IDs in ascending order.
*
* @param markJoinedBranchesAsPipelineBreaking True, if the
*/
protected final boolean mergeLists(
List child1open,
List child2open,
List result,
boolean markJoinedBranchesAsPipelineBreaking) {
// remove branches which have already been closed
removeClosedBranches(child1open);
removeClosedBranches(child2open);
result.clear();
// check how many open branches we have. the cases:
// 1) if both are null or empty, the result is null
// 2) if one side is null (or empty), the result is the other side.
// 3) both are set, then we need to merge.
if (child1open == null || child1open.isEmpty()) {
if (child2open != null && !child2open.isEmpty()) {
result.addAll(child2open);
}
return false;
}
if (child2open == null || child2open.isEmpty()) {
result.addAll(child1open);
return false;
}
int index1 = child1open.size() - 1;
int index2 = child2open.size() - 1;
boolean didCloseABranch = false;
// as both lists (child1open and child2open) are sorted in ascending ID order
// we can do a merge-join-like loop which preserved the order in the result list
// and eliminates duplicates
while (index1 >= 0 || index2 >= 0) {
int id1 = -1;
int id2 = index2 >= 0 ? child2open.get(index2).getBranchingNode().getId() : -1;
while (index1 >= 0 && (id1 = child1open.get(index1).getBranchingNode().getId()) > id2) {
result.add(child1open.get(index1));
index1--;
}
while (index2 >= 0 && (id2 = child2open.get(index2).getBranchingNode().getId()) > id1) {
result.add(child2open.get(index2));
index2--;
}
// match: they share a common branching child
if (id1 == id2) {
didCloseABranch = true;
// if this is the latest common child, remember it
OptimizerNode currBanchingNode = child1open.get(index1).getBranchingNode();
long vector1 = child1open.get(index1).getJoinedPathsVector();
long vector2 = child2open.get(index2).getJoinedPathsVector();
// check if this is the same descriptor, (meaning that it contains the same paths)
// if it is the same, add it only once, otherwise process the join of the paths
if (vector1 == vector2) {
result.add(child1open.get(index1));
} else {
// we merge (re-join) a branch
// mark the branch as a point where we break the pipeline
if (markJoinedBranchesAsPipelineBreaking) {
currBanchingNode.markAllOutgoingConnectionsAsPipelineBreaking();
}
if (this.hereJoinedBranches == null) {
this.hereJoinedBranches = new ArrayList(2);
}
this.hereJoinedBranches.add(currBanchingNode);
// see, if this node closes the branch
long joinedInputs = vector1 | vector2;
// this is 2^size - 1, which is all bits set at positions 0..size-1
long allInputs = (0x1L << currBanchingNode.getOutgoingConnections().size()) - 1;
if (joinedInputs == allInputs) {
// closed - we can remove it from the stack
addClosedBranch(currBanchingNode);
} else {
// not quite closed
result.add(new UnclosedBranchDescriptor(currBanchingNode, joinedInputs));
}
}
index1--;
index2--;
}
}
// merged. now we need to reverse the list, because we added the elements in reverse order
Collections.reverse(result);
return didCloseABranch;
}
@Override
public OptimizerNode getOptimizerNode() {
return this;
}
@Override
public PlanNode getPlanNode() {
return null;
}
@Override
public Iterable> getDumpableInputs() {
List> allInputs =
new ArrayList>();
allInputs.addAll(getIncomingConnections());
allInputs.addAll(getBroadcastConnections());
return allInputs;
}
@Override
public String toString() {
StringBuilder bld = new StringBuilder();
bld.append(getOperatorName());
bld.append(" (").append(getOperator().getName()).append(") ");
int i = 1;
for (DagConnection conn : getIncomingConnections()) {
String shipStrategyName =
conn.getShipStrategy() == null ? "null" : conn.getShipStrategy().name();
bld.append('(').append(i++).append(":").append(shipStrategyName).append(')');
}
return bld.toString();
}
// --------------------------------------------------------------------------------------------
/**
* Description of an unclosed branch. An unclosed branch is when the data flow branched (one
* operator's result is consumed by multiple targets), but these different branches (targets)
* have not been joined together.
*/
public static final class UnclosedBranchDescriptor {
protected OptimizerNode branchingNode;
protected long joinedPathsVector;
/**
* Creates a new branching descriptor.
*
* @param branchingNode The node where the branch occurred (the node with multiple outputs).
* @param joinedPathsVector A bit vector describing which branches are tracked by this
* descriptor. The bit vector is one, where the branch is tracked, zero otherwise.
*/
protected UnclosedBranchDescriptor(OptimizerNode branchingNode, long joinedPathsVector) {
this.branchingNode = branchingNode;
this.joinedPathsVector = joinedPathsVector;
}
public OptimizerNode getBranchingNode() {
return this.branchingNode;
}
public long getJoinedPathsVector() {
return this.joinedPathsVector;
}
@Override
public String toString() {
return "(" + this.branchingNode.getOperator() + ") [" + this.joinedPathsVector + "]";
}
}
}