org.apache.flink.optimizer.dag.OptimizerNode Maven / Gradle / Ivy

Go to download
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.optimizer.dag;

import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.ExecutionMode;
import org.apache.flink.api.common.operators.AbstractUdfOperator;
import org.apache.flink.api.common.operators.CompilerHints;
import org.apache.flink.api.common.operators.Operator;
import org.apache.flink.api.common.operators.SemanticProperties;
import org.apache.flink.api.common.operators.util.FieldSet;
import org.apache.flink.optimizer.CompilerException;
import org.apache.flink.optimizer.DataStatistics;
import org.apache.flink.optimizer.costs.CostEstimator;
import org.apache.flink.optimizer.dataproperties.InterestingProperties;
import org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties;
import org.apache.flink.optimizer.dataproperties.RequestedLocalProperties;
import org.apache.flink.optimizer.plan.PlanNode;
import org.apache.flink.optimizer.plandump.DumpableConnection;
import org.apache.flink.optimizer.plandump.DumpableNode;
import org.apache.flink.runtime.operators.shipping.ShipStrategyType;
import org.apache.flink.util.Visitable;
import org.apache.flink.util.Visitor;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
 * The OptimizerNode is the base class of all nodes in the optimizer DAG. The optimizer DAG is the
 * optimizer's representation of a program, created before the actual optimization (which creates
 * different candidate plans and computes their cost).
 *
 * Nodes in the DAG correspond (almost) one-to-one to the operators in a program. The optimizer
 * DAG is constructed to hold the additional information that the optimizer needs:
 *
 * 

 *   Estimates of the data size processed by each operator
 *   
Helper structures to track where the data flow "splits" and "joins", to support flows that
 *       are DAGs but not trees.
 *   
Tags and weights to differentiate between loop-variant and -invariant parts of an iteration
 *   
Interesting properties to be used during the enumeration of candidate plans
 * 
 */
public abstract class OptimizerNode
        implements Visitable, EstimateProvider, DumpableNode {

    public static final int MAX_DYNAMIC_PATH_COST_WEIGHT = 100;

    // --------------------------------------------------------------------------------------------
    //                                      Members
    // --------------------------------------------------------------------------------------------

    private final Operator operator; // The operator (Reduce / Join / DataSource / ...)

    private List broadcastConnectionNames =
            new ArrayList(); // the broadcast inputs names of this node

    private List broadcastConnections =
            new ArrayList(); // the broadcast inputs of this node

    private List outgoingConnections; // The links to succeeding nodes

    private InterestingProperties intProps; // the interesting properties of this node

    // --------------------------------- Branch Handling ------------------------------------------

    protected List
            openBranches; // stack of branches in the sub-graph that are not joined

    protected Set
            closedBranchingNodes; // stack of branching nodes which have already been closed

    protected List
            hereJoinedBranches; // the branching nodes (node with multiple outputs)
    // that are partially joined (through multiple inputs or broadcast vars)

    // ---------------------------- Estimates and Annotations -------------------------------------

    protected long estimatedOutputSize = -1; // the estimated size of the output (bytes)

    protected long estimatedNumRecords =
            -1; // the estimated number of key/value pairs in the output

    protected Set
            uniqueFields; // set of attributes that will always be unique after this node

    // --------------------------------- General Parameters ---------------------------------------

    private int parallelism =
            ExecutionConfig.PARALLELISM_DEFAULT; // the number of parallel instances of this node

    private long minimalMemoryPerSubTask = -1;

    protected int id = -1; // the id for this node.

    protected int costWeight = 1; // factor to weight the costs for dynamic paths

    protected boolean onDynamicPath;

    protected List
            cachedPlans; // cache candidates, because the may be accessed repeatedly

    // ------------------------------------------------------------------------
    //                      Constructor / Setup
    // ------------------------------------------------------------------------

    /**
     * Creates a new optimizer node that represents the given program operator.
     *
     * @param op The operator that the node represents.
     */
    public OptimizerNode(Operator op) {
        this.operator = op;
        readStubAnnotations();
    }

    protected OptimizerNode(OptimizerNode toCopy) {
        this.operator = toCopy.operator;
        this.intProps = toCopy.intProps;

        this.openBranches = toCopy.openBranches;
        this.closedBranchingNodes = toCopy.closedBranchingNodes;

        this.estimatedOutputSize = toCopy.estimatedOutputSize;
        this.estimatedNumRecords = toCopy.estimatedNumRecords;

        this.parallelism = toCopy.parallelism;
        this.minimalMemoryPerSubTask = toCopy.minimalMemoryPerSubTask;

        this.id = toCopy.id;
        this.costWeight = toCopy.costWeight;
        this.onDynamicPath = toCopy.onDynamicPath;
    }

    // ------------------------------------------------------------------------
    //  Methods specific to unary- / binary- / special nodes
    // ------------------------------------------------------------------------

    /**
     * Gets the name of this node, which is the name of the function/operator, or data source / data
     * sink.
     *
     * @return The node name.
     */
    public abstract String getOperatorName();

    /**
     * This function connects the predecessors to this operator.
     *
     * @param operatorToNode The map from program operators to optimizer nodes.
     * @param defaultExchangeMode The data exchange mode to use, if the operator does not specify
     *     one.
     */
    public abstract void setInput(
            Map, OptimizerNode> operatorToNode, ExecutionMode defaultExchangeMode);

    /**
     * This function connects the operators that produce the broadcast inputs to this operator.
     *
     * @param operatorToNode The map from program operators to optimizer nodes.
     * @param defaultExchangeMode The data exchange mode to use, if the operator does not specify
     *     one.
     * @throws CompilerException
     */
    public void setBroadcastInputs(
            Map, OptimizerNode> operatorToNode, ExecutionMode defaultExchangeMode) {
        // skip for Operators that don't support broadcast variables
        if (!(getOperator() instanceof AbstractUdfOperator)) {
            return;
        }

        // get all broadcast inputs
        AbstractUdfOperator operator = ((AbstractUdfOperator) getOperator());

        // create connections and add them
        for (Map.Entry> input : operator.getBroadcastInputs().entrySet()) {
            OptimizerNode predecessor = operatorToNode.get(input.getValue());
            DagConnection connection =
                    new DagConnection(
                            predecessor, this, ShipStrategyType.BROADCAST, defaultExchangeMode);
            addBroadcastConnection(input.getKey(), connection);
            predecessor.addOutgoingConnection(connection);
        }
    }

    /**
     * Gets all incoming connections of this node. This method needs to be overridden by subclasses
     * to return the children.
     *
     * @return The list of incoming connections.
     */
    public abstract List getIncomingConnections();

    /**
     * Tells the node to compute the interesting properties for its inputs. The interesting
     * properties for the node itself must have been computed before. The node must then see how
     * many of interesting properties it preserves and add its own.
     *
     * @param estimator The {@code CostEstimator} instance to use for plan cost estimation.
     */
    public abstract void computeInterestingPropertiesForInputs(CostEstimator estimator);

    /**
     * This method causes the node to compute the description of open branches in its sub-plan. An
     * open branch describes, that a (transitive) child node had multiple outputs, which have not
     * all been re-joined in the sub-plan. This method needs to set the openBranches
     * field to a stack of unclosed branches, the latest one top. A branch is considered closed, if
     * some later node sees all of the branching node's outputs, no matter if there have been more
     * branches to different paths in the meantime.
     */
    public abstract void computeUnclosedBranchStack();

    protected List computeUnclosedBranchStackForBroadcastInputs(
            List branchesSoFar) {
        // handle the data flow branching for the broadcast inputs
        for (DagConnection broadcastInput : getBroadcastConnections()) {
            OptimizerNode bcSource = broadcastInput.getSource();
            addClosedBranches(bcSource.closedBranchingNodes);

            List bcBranches =
                    bcSource.getBranchesForParent(broadcastInput);

            ArrayList mergedBranches =
                    new ArrayList();
            mergeLists(branchesSoFar, bcBranches, mergedBranches, true);
            branchesSoFar =
                    mergedBranches.isEmpty()
                            ? Collections.emptyList()
                            : mergedBranches;
        }

        return branchesSoFar;
    }

    /**
     * Computes the plan alternatives for this node, an implicitly for all nodes that are children
     * of this node. This method must determine for each alternative the global and local properties
     * and the costs. This method may recursively call getAlternatives() on its
     * children to get their plan alternatives, and build its own alternatives on top of those.
     *
     * @param estimator The cost estimator used to estimate the costs of each plan alternative.
     * @return A list containing all plan alternatives.
     */
    public abstract List getAlternativePlans(CostEstimator estimator);

    /**
     * This method implements the visit of a depth-first graph traversing visitor. Implementers must
     * first call the preVisit() method, then hand the visitor to their children, and
     * finally call the postVisit() method.
     *
     * @param visitor The graph traversing visitor.
     * @see org.apache.flink.util.Visitable#accept(org.apache.flink.util.Visitor)
     */
    @Override
    public abstract void accept(Visitor visitor);

    public abstract SemanticProperties getSemanticProperties();

    // ------------------------------------------------------------------------
    //                          Getters / Setters
    // ------------------------------------------------------------------------

    @Override
    public Iterable getPredecessors() {
        List allPredecessors = new ArrayList();

        for (DagConnection dagConnection : getIncomingConnections()) {
            allPredecessors.add(dagConnection.getSource());
        }

        for (DagConnection conn : getBroadcastConnections()) {
            allPredecessors.add(conn.getSource());
        }

        return allPredecessors;
    }

    /**
     * Gets the ID of this node. If the id has not yet been set, this method returns -1;
     *
     * @return This node's id, or -1, if not yet set.
     */
    public int getId() {
        return this.id;
    }

    /**
     * Sets the ID of this node.
     *
     * @param id The id for this node.
     */
    public void initId(int id) {
        if (id <= 0) {
            throw new IllegalArgumentException();
        }

        if (this.id == -1) {
            this.id = id;
        } else {
            throw new IllegalStateException("Id has already been initialized.");
        }
    }

    /**
     * Adds the broadcast connection identified by the given {@code name} to this node.
     *
     * @param broadcastConnection The connection to add.
     */
    public void addBroadcastConnection(String name, DagConnection broadcastConnection) {
        this.broadcastConnectionNames.add(name);
        this.broadcastConnections.add(broadcastConnection);
    }

    /** Return the list of names associated with broadcast inputs for this node. */
    public List getBroadcastConnectionNames() {
        return this.broadcastConnectionNames;
    }

    /** Return the list of inputs associated with broadcast variables for this node. */
    public List getBroadcastConnections() {
        return this.broadcastConnections;
    }

    /**
     * Adds a new outgoing connection to this node.
     *
     * @param connection The connection to add.
     */
    public void addOutgoingConnection(DagConnection connection) {
        if (this.outgoingConnections == null) {
            this.outgoingConnections = new ArrayList();
        } else {
            if (this.outgoingConnections.size() == 64) {
                throw new CompilerException(
                        "Cannot currently handle nodes with more than 64 outputs.");
            }
        }

        this.outgoingConnections.add(connection);
    }

    /**
     * The list of outgoing connections from this node to succeeding tasks.
     *
     * @return The list of outgoing connections.
     */
    public List getOutgoingConnections() {
        return this.outgoingConnections;
    }

    /**
     * Gets the operator represented by this optimizer node.
     *
     * @return This node's operator.
     */
    public Operator getOperator() {
        return this.operator;
    }

    /**
     * Gets the parallelism for the operator represented by this optimizer node. The parallelism
     * denotes how many parallel instances of the operator on will be spawned during the execution.
     * If this value is {@link ExecutionConfig#PARALLELISM_DEFAULT} then the system will take the
     * default number of parallel instances.
     *
     * @return The parallelism of the operator.
     */
    public int getParallelism() {
        return this.parallelism;
    }

    /**
     * Sets the parallelism for this optimizer node. The parallelism denotes how many parallel
     * instances of the operator will be spawned during the execution.
     *
     * @param parallelism The parallelism to set. If this value is {@link
     *     ExecutionConfig#PARALLELISM_DEFAULT} then the system will take the default number of
     *     parallel instances.
     * @throws IllegalArgumentException If the parallelism is smaller than one.
     */
    public void setParallelism(int parallelism) {
        if (parallelism < 1 && parallelism != ExecutionConfig.PARALLELISM_DEFAULT) {
            throw new IllegalArgumentException("Parallelism of " + parallelism + " is invalid.");
        }
        this.parallelism = parallelism;
    }

    /**
     * Gets the amount of memory that all subtasks of this task have jointly available.
     *
     * @return The total amount of memory across all subtasks.
     */
    public long getMinimalMemoryAcrossAllSubTasks() {
        return this.minimalMemoryPerSubTask == -1
                ? -1
                : this.minimalMemoryPerSubTask * this.parallelism;
    }

    public boolean isOnDynamicPath() {
        return this.onDynamicPath;
    }

    public void identifyDynamicPath(int costWeight) {
        boolean anyDynamic = false;
        boolean allDynamic = true;

        for (DagConnection conn : getIncomingConnections()) {
            boolean dynamicIn = conn.isOnDynamicPath();
            anyDynamic |= dynamicIn;
            allDynamic &= dynamicIn;
        }

        for (DagConnection conn : getBroadcastConnections()) {
            boolean dynamicIn = conn.isOnDynamicPath();
            anyDynamic |= dynamicIn;
            allDynamic &= dynamicIn;
        }

        if (anyDynamic) {
            this.onDynamicPath = true;
            this.costWeight = costWeight;
            if (!allDynamic) {
                // this node joins static and dynamic path.
                // mark the connections where the source is not dynamic as cached
                for (DagConnection conn : getIncomingConnections()) {
                    if (!conn.getSource().isOnDynamicPath()) {
                        conn.setMaterializationMode(conn.getMaterializationMode().makeCached());
                    }
                }

                // broadcast variables are always cached, because they stay unchanged available in
                // the
                // runtime context of the functions
            }
        }
    }

    public int getCostWeight() {
        return this.costWeight;
    }

    public int getMaxDepth() {
        int maxDepth = 0;
        for (DagConnection conn : getIncomingConnections()) {
            maxDepth = Math.max(maxDepth, conn.getMaxDepth());
        }
        for (DagConnection conn : getBroadcastConnections()) {
            maxDepth = Math.max(maxDepth, conn.getMaxDepth());
        }

        return maxDepth;
    }

    /**
     * Gets the properties that are interesting for this node to produce.
     *
     * @return The interesting properties for this node, or null, if not yet computed.
     */
    public InterestingProperties getInterestingProperties() {
        return this.intProps;
    }

    @Override
    public long getEstimatedOutputSize() {
        return this.estimatedOutputSize;
    }

    @Override
    public long getEstimatedNumRecords() {
        return this.estimatedNumRecords;
    }

    public void setEstimatedOutputSize(long estimatedOutputSize) {
        this.estimatedOutputSize = estimatedOutputSize;
    }

    public void setEstimatedNumRecords(long estimatedNumRecords) {
        this.estimatedNumRecords = estimatedNumRecords;
    }

    @Override
    public float getEstimatedAvgWidthPerOutputRecord() {
        if (this.estimatedOutputSize > 0 && this.estimatedNumRecords > 0) {
            return ((float) this.estimatedOutputSize) / this.estimatedNumRecords;
        } else {
            return -1.0f;
        }
    }

    /**
     * Checks whether this node has branching output. A node's output is branched, if it has more
     * than one output connection.
     *
     * @return True, if the node's output branches. False otherwise.
     */
    public boolean isBranching() {
        return getOutgoingConnections() != null && getOutgoingConnections().size() > 1;
    }

    public void markAllOutgoingConnectionsAsPipelineBreaking() {
        if (this.outgoingConnections == null) {
            throw new IllegalStateException(
                    "The outgoing connections have not yet been initialized.");
        }
        for (DagConnection conn : getOutgoingConnections()) {
            conn.markBreaksPipeline();
        }
    }

    // ------------------------------------------------------------------------
    //                              Miscellaneous
    // ------------------------------------------------------------------------

    /**
     * Checks, if all outgoing connections have their interesting properties set from their target
     * nodes.
     *
     * @return True, if on all outgoing connections, the interesting properties are set. False
     *     otherwise.
     */
    public boolean haveAllOutputConnectionInterestingProperties() {
        for (DagConnection conn : getOutgoingConnections()) {
            if (conn.getInterestingProperties() == null) {
                return false;
            }
        }
        return true;
    }

    /**
     * Computes all the interesting properties that are relevant to this node. The interesting
     * properties are a union of the interesting properties on each outgoing connection. However, if
     * two interesting properties on the outgoing connections overlap, the interesting properties
     * will occur only once in this set. For that, this method deduplicates and merges the
     * interesting properties. This method returns copies of the original interesting properties
     * objects and leaves the original objects, contained by the connections, unchanged.
     */
    public void computeUnionOfInterestingPropertiesFromSuccessors() {
        List conns = getOutgoingConnections();
        if (conns.size() == 0) {
            // no incoming, we have none ourselves
            this.intProps = new InterestingProperties();
        } else {
            this.intProps = conns.get(0).getInterestingProperties().clone();
            for (int i = 1; i < conns.size(); i++) {
                this.intProps.addInterestingProperties(conns.get(i).getInterestingProperties());
            }
        }
        this.intProps.dropTrivials();
    }

    public void clearInterestingProperties() {
        this.intProps = null;
        for (DagConnection conn : getIncomingConnections()) {
            conn.clearInterestingProperties();
        }
        for (DagConnection conn : getBroadcastConnections()) {
            conn.clearInterestingProperties();
        }
    }

    /**
     * Causes this node to compute its output estimates (such as number of rows, size in bytes)
     * based on the inputs and the compiler hints. The compiler hints are instantiated with
     * conservative default values which are used if no other values are provided. Nodes may access
     * the statistics to determine relevant information.
     *
     * @param statistics The statistics object which may be accessed to get statistical information.
     *     The parameter may be null, if no statistics are available.
     */
    public void computeOutputEstimates(DataStatistics statistics) {
        // sanity checking
        for (DagConnection c : getIncomingConnections()) {
            if (c.getSource() == null) {
                throw new CompilerException(
                        "Bug: Estimate computation called before inputs have been set.");
            }
        }

        // let every operator do its computation
        computeOperatorSpecificDefaultEstimates(statistics);

        if (this.estimatedOutputSize < 0) {
            this.estimatedOutputSize = -1;
        }
        if (this.estimatedNumRecords < 0) {
            this.estimatedNumRecords = -1;
        }

        // overwrite default estimates with hints, if given
        if (getOperator() == null || getOperator().getCompilerHints() == null) {
            return;
        }

        CompilerHints hints = getOperator().getCompilerHints();
        if (hints.getOutputSize() >= 0) {
            this.estimatedOutputSize = hints.getOutputSize();
        }

        if (hints.getOutputCardinality() >= 0) {
            this.estimatedNumRecords = hints.getOutputCardinality();
        }

        if (hints.getFilterFactor() >= 0.0f) {
            if (this.estimatedNumRecords >= 0) {
                this.estimatedNumRecords =
                        (long) (this.estimatedNumRecords * hints.getFilterFactor());

                if (this.estimatedOutputSize >= 0) {
                    this.estimatedOutputSize =
                            (long) (this.estimatedOutputSize * hints.getFilterFactor());
                }
            } else if (this instanceof SingleInputNode) {
                OptimizerNode pred = ((SingleInputNode) this).getPredecessorNode();
                if (pred != null && pred.getEstimatedNumRecords() >= 0) {
                    this.estimatedNumRecords =
                            (long) (pred.getEstimatedNumRecords() * hints.getFilterFactor());
                }
            }
        }

        // use the width to infer the cardinality (given size) and vice versa
        if (hints.getAvgOutputRecordSize() >= 1) {
            // the estimated number of rows based on size
            if (this.estimatedNumRecords == -1 && this.estimatedOutputSize >= 0) {
                this.estimatedNumRecords =
                        (long) (this.estimatedOutputSize / hints.getAvgOutputRecordSize());
            } else if (this.estimatedOutputSize == -1 && this.estimatedNumRecords >= 0) {
                this.estimatedOutputSize =
                        (long) (this.estimatedNumRecords * hints.getAvgOutputRecordSize());
            }
        }
    }

    protected abstract void computeOperatorSpecificDefaultEstimates(DataStatistics statistics);

    // ------------------------------------------------------------------------
    // Reading of stub annotations
    // ------------------------------------------------------------------------

    /**
     * Reads all stub annotations, i.e. which fields remain constant, what cardinality bounds the
     * functions have, which fields remain unique.
     */
    protected void readStubAnnotations() {
        readUniqueFieldsAnnotation();
    }

    protected void readUniqueFieldsAnnotation() {
        if (this.operator.getCompilerHints() != null) {
            Set uniqueFieldSets = operator.getCompilerHints().getUniqueFields();
            if (uniqueFieldSets != null) {
                if (this.uniqueFields == null) {
                    this.uniqueFields = new HashSet();
                }
                this.uniqueFields.addAll(uniqueFieldSets);
            }
        }
    }

    // ------------------------------------------------------------------------
    // Access of stub annotations
    // ------------------------------------------------------------------------

    /** Gets the FieldSets which are unique in the output of the node. */
    public Set getUniqueFields() {
        return this.uniqueFields == null ? Collections.emptySet() : this.uniqueFields;
    }

    // --------------------------------------------------------------------------------------------
    //                                    Pruning
    // --------------------------------------------------------------------------------------------

    protected void prunePlanAlternatives(List plans) {
        if (plans.isEmpty()) {
            throw new CompilerException(
                    "No plan meeting the requirements could be created @ "
                            + this
                            + ". Most likely reason: Too restrictive plan hints.");
        }
        // shortcut for the simple case
        if (plans.size() == 1) {
            return;
        }

        // we can only compare plan candidates that made equal choices
        // at the branching points. for each choice at a branching point,
        // we need to keep the cheapest (wrt. interesting properties).
        // if we do not keep candidates for each branch choice, we might not
        // find branch compatible candidates when joining the branches back.

        // for pruning, we are quasi AFTER the node, so in the presence of
        // branches, we need form the per-branch-choice groups by the choice
        // they made at the latest un-joined branching node. Note that this is
        // different from the check for branch compatibility of candidates, as
        // this happens on the input sub-plans and hence BEFORE the node (therefore
        // it is relevant to find the latest (partially) joined branch point.

        if (this.openBranches == null || this.openBranches.isEmpty()) {
            prunePlanAlternativesWithCommonBranching(plans);
        } else {
            // partition the candidates into groups that made the same sub-plan candidate
            // choice at the latest unclosed branch point

            final OptimizerNode[] branchDeterminers = new OptimizerNode[this.openBranches.size()];

            for (int i = 0; i < branchDeterminers.length; i++) {
                branchDeterminers[i] =
                        this.openBranches.get(this.openBranches.size() - 1 - i).getBranchingNode();
            }

            // this sorter sorts by the candidate choice at the branch point
            Comparator sorter =
                    new Comparator() {

                        @Override
                        public int compare(PlanNode o1, PlanNode o2) {
                            for (OptimizerNode branchDeterminer : branchDeterminers) {
                                PlanNode n1 = o1.getCandidateAtBranchPoint(branchDeterminer);
                                PlanNode n2 = o2.getCandidateAtBranchPoint(branchDeterminer);
                                int hash1 = System.identityHashCode(n1);
                                int hash2 = System.identityHashCode(n2);

                                if (hash1 != hash2) {
                                    return hash1 - hash2;
                                }
                            }
                            return 0;
                        }
                    };
            Collections.sort(plans, sorter);

            List result = new ArrayList();
            List turn = new ArrayList();

            final PlanNode[] determinerChoice = new PlanNode[branchDeterminers.length];

            while (!plans.isEmpty()) {
                // take one as the determiner
                turn.clear();
                PlanNode determiner = plans.remove(plans.size() - 1);
                turn.add(determiner);

                for (int i = 0; i < determinerChoice.length; i++) {
                    determinerChoice[i] =
                            determiner.getCandidateAtBranchPoint(branchDeterminers[i]);
                }

                // go backwards through the plans and find all that are equal
                boolean stillEqual = true;
                for (int k = plans.size() - 1; k >= 0 && stillEqual; k--) {
                    PlanNode toCheck = plans.get(k);

                    for (int i = 0; i < branchDeterminers.length; i++) {
                        PlanNode checkerChoice =
                                toCheck.getCandidateAtBranchPoint(branchDeterminers[i]);

                        if (checkerChoice != determinerChoice[i]) {
                            // not the same anymore
                            stillEqual = false;
                            break;
                        }
                    }

                    if (stillEqual) {
                        // the same
                        plans.remove(k);
                        turn.add(toCheck);
                    }
                }

                // now that we have only plans with the same branch alternatives, prune!
                if (turn.size() > 1) {
                    prunePlanAlternativesWithCommonBranching(turn);
                }
                result.addAll(turn);
            }

            // after all turns are complete
            plans.clear();
            plans.addAll(result);
        }
    }

    protected void prunePlanAlternativesWithCommonBranching(List plans) {
        // for each interesting property, which plans are cheapest
        final RequestedGlobalProperties[] gps =
                this.intProps
                        .getGlobalProperties()
                        .toArray(
                                new RequestedGlobalProperties
                                        [this.intProps.getGlobalProperties().size()]);
        final RequestedLocalProperties[] lps =
                this.intProps
                        .getLocalProperties()
                        .toArray(
                                new RequestedLocalProperties
                                        [this.intProps.getLocalProperties().size()]);

        final PlanNode[][] toKeep = new PlanNode[gps.length][];
        final PlanNode[] cheapestForGlobal = new PlanNode[gps.length];

        PlanNode cheapest = null; // the overall cheapest plan

        // go over all plans from the list
        for (PlanNode candidate : plans) {
            // check if that plan is the overall cheapest
            if (cheapest == null
                    || (cheapest.getCumulativeCosts().compareTo(candidate.getCumulativeCosts())
                            > 0)) {
                cheapest = candidate;
            }

            // find the interesting global properties that this plan matches
            for (int i = 0; i < gps.length; i++) {
                if (gps[i].isMetBy(candidate.getGlobalProperties())) {
                    // the candidate meets the global property requirements. That means
                    // it has a chance that its local properties are re-used (they would be
                    // destroyed if global properties need to be established)

                    if (cheapestForGlobal[i] == null
                            || (cheapestForGlobal[i]
                                            .getCumulativeCosts()
                                            .compareTo(candidate.getCumulativeCosts())
                                    > 0)) {
                        cheapestForGlobal[i] = candidate;
                    }

                    final PlanNode[] localMatches;
                    if (toKeep[i] == null) {
                        localMatches = new PlanNode[lps.length];
                        toKeep[i] = localMatches;
                    } else {
                        localMatches = toKeep[i];
                    }

                    for (int k = 0; k < lps.length; k++) {
                        if (lps[k].isMetBy(candidate.getLocalProperties())) {
                            final PlanNode previous = localMatches[k];
                            if (previous == null
                                    || previous.getCumulativeCosts()
                                                    .compareTo(candidate.getCumulativeCosts())
                                            > 0) {
                                // this one is cheaper!
                                localMatches[k] = candidate;
                            }
                        }
                    }
                }
            }
        }

        // all plans are set now
        plans.clear();

        // add the cheapest plan
        if (cheapest != null) {
            plans.add(cheapest);
            cheapest.setPruningMarker(); // remember that the plan is in the set
        }

        // add all others, which are optimal for some interesting properties
        for (int i = 0; i < gps.length; i++) {
            if (toKeep[i] != null) {
                final PlanNode[] localMatches = toKeep[i];
                for (final PlanNode n : localMatches) {
                    if (n != null && !n.isPruneMarkerSet()) {
                        n.setPruningMarker();
                        plans.add(n);
                    }
                }
            }
            if (cheapestForGlobal[i] != null) {
                final PlanNode n = cheapestForGlobal[i];
                if (!n.isPruneMarkerSet()) {
                    n.setPruningMarker();
                    plans.add(n);
                }
            }
        }
    }

    // --------------------------------------------------------------------------------------------
    //                       Handling of branches
    // --------------------------------------------------------------------------------------------

    public boolean hasUnclosedBranches() {
        return this.openBranches != null && !this.openBranches.isEmpty();
    }

    public Set getClosedBranchingNodes() {
        return this.closedBranchingNodes;
    }

    public List getOpenBranches() {
        return this.openBranches;
    }

    protected List getBranchesForParent(DagConnection toParent) {
        if (this.outgoingConnections.size() == 1) {
            // return our own stack of open branches, because nothing is added
            if (this.openBranches == null || this.openBranches.isEmpty()) {
                return Collections.emptyList();
            } else {
                return new ArrayList(this.openBranches);
            }
        } else if (this.outgoingConnections.size() > 1) {
            // we branch add a branch info to the stack
            List branches = new ArrayList(4);
            if (this.openBranches != null) {
                branches.addAll(this.openBranches);
            }

            // find out, which output number the connection to the parent
            int num;
            for (num = 0; num < this.outgoingConnections.size(); num++) {
                if (this.outgoingConnections.get(num) == toParent) {
                    break;
                }
            }
            if (num >= this.outgoingConnections.size()) {
                throw new CompilerException(
                        "Error in compiler: "
                                + "Parent to get branch info for is not contained in the outgoing connections.");
            }

            // create the description and add it
            long bitvector = 0x1L << num;
            branches.add(new UnclosedBranchDescriptor(this, bitvector));
            return branches;
        } else {
            throw new CompilerException(
                    "Error in compiler: Cannot get branch info for successor in a node with no successors.");
        }
    }

    protected void removeClosedBranches(List openList) {
        if (openList == null
                || openList.isEmpty()
                || this.closedBranchingNodes == null
                || this.closedBranchingNodes.isEmpty()) {
            return;
        }

        Iterator it = openList.iterator();
        while (it.hasNext()) {
            if (this.closedBranchingNodes.contains(it.next().getBranchingNode())) {
                // this branch was already closed --> remove it from the list
                it.remove();
            }
        }
    }

    protected void addClosedBranches(Set alreadyClosed) {
        if (alreadyClosed == null || alreadyClosed.isEmpty()) {
            return;
        }

        if (this.closedBranchingNodes == null) {
            this.closedBranchingNodes = new HashSet(alreadyClosed);
        } else {
            this.closedBranchingNodes.addAll(alreadyClosed);
        }
    }

    protected void addClosedBranch(OptimizerNode alreadyClosed) {
        if (this.closedBranchingNodes == null) {
            this.closedBranchingNodes = new HashSet();
        }

        this.closedBranchingNodes.add(alreadyClosed);
    }

    /**
     * Checks whether to candidate plans for the sub-plan of this node are comparable. The two
     * alternative plans are comparable, if
     *
     * a) There is no branch in the sub-plan of this node b) Both candidates have the same
     * candidate as the child at the last open branch.
     *
     * @param plan1 The root node of the first candidate plan.
     * @param plan2 The root node of the second candidate plan.
     * @return True if the nodes are branch compatible in the inputs.
     */
    protected boolean areBranchCompatible(PlanNode plan1, PlanNode plan2) {
        if (plan1 == null || plan2 == null) {
            throw new NullPointerException();
        }

        // if there is no open branch, the children are always compatible.
        // in most plans, that will be the dominant case
        if (this.hereJoinedBranches == null || this.hereJoinedBranches.isEmpty()) {
            return true;
        }

        for (OptimizerNode joinedBrancher : hereJoinedBranches) {
            final PlanNode branch1Cand = plan1.getCandidateAtBranchPoint(joinedBrancher);
            final PlanNode branch2Cand = plan2.getCandidateAtBranchPoint(joinedBrancher);

            if (branch1Cand != null && branch2Cand != null && branch1Cand != branch2Cand) {
                return false;
            }
        }
        return true;
    }

    /**
     * The node IDs are assigned in graph-traversal order (pre-order), hence, each list is sorted by
     * ID in ascending order and all consecutive lists start with IDs in ascending order.
     *
     * @param markJoinedBranchesAsPipelineBreaking True, if the
     */
    protected final boolean mergeLists(
            List child1open,
            List child2open,
            List result,
            boolean markJoinedBranchesAsPipelineBreaking) {

        // remove branches which have already been closed
        removeClosedBranches(child1open);
        removeClosedBranches(child2open);

        result.clear();

        // check how many open branches we have. the cases:
        // 1) if both are null or empty, the result is null
        // 2) if one side is null (or empty), the result is the other side.
        // 3) both are set, then we need to merge.
        if (child1open == null || child1open.isEmpty()) {
            if (child2open != null && !child2open.isEmpty()) {
                result.addAll(child2open);
            }
            return false;
        }

        if (child2open == null || child2open.isEmpty()) {
            result.addAll(child1open);
            return false;
        }

        int index1 = child1open.size() - 1;
        int index2 = child2open.size() - 1;

        boolean didCloseABranch = false;

        // as both lists (child1open and child2open) are sorted in ascending ID order
        // we can do a merge-join-like loop which preserved the order in the result list
        // and eliminates duplicates
        while (index1 >= 0 || index2 >= 0) {
            int id1 = -1;
            int id2 = index2 >= 0 ? child2open.get(index2).getBranchingNode().getId() : -1;

            while (index1 >= 0 && (id1 = child1open.get(index1).getBranchingNode().getId()) > id2) {
                result.add(child1open.get(index1));
                index1--;
            }
            while (index2 >= 0 && (id2 = child2open.get(index2).getBranchingNode().getId()) > id1) {
                result.add(child2open.get(index2));
                index2--;
            }

            // match: they share a common branching child
            if (id1 == id2) {
                didCloseABranch = true;

                // if this is the latest common child, remember it
                OptimizerNode currBanchingNode = child1open.get(index1).getBranchingNode();

                long vector1 = child1open.get(index1).getJoinedPathsVector();
                long vector2 = child2open.get(index2).getJoinedPathsVector();

                // check if this is the same descriptor, (meaning that it contains the same paths)
                // if it is the same, add it only once, otherwise process the join of the paths
                if (vector1 == vector2) {
                    result.add(child1open.get(index1));
                } else {
                    // we merge (re-join) a branch

                    // mark the branch as a point where we break the pipeline
                    if (markJoinedBranchesAsPipelineBreaking) {
                        currBanchingNode.markAllOutgoingConnectionsAsPipelineBreaking();
                    }

                    if (this.hereJoinedBranches == null) {
                        this.hereJoinedBranches = new ArrayList(2);
                    }
                    this.hereJoinedBranches.add(currBanchingNode);

                    // see, if this node closes the branch
                    long joinedInputs = vector1 | vector2;

                    // this is 2^size - 1, which is all bits set at positions 0..size-1
                    long allInputs = (0x1L << currBanchingNode.getOutgoingConnections().size()) - 1;

                    if (joinedInputs == allInputs) {
                        // closed - we can remove it from the stack
                        addClosedBranch(currBanchingNode);
                    } else {
                        // not quite closed
                        result.add(new UnclosedBranchDescriptor(currBanchingNode, joinedInputs));
                    }
                }

                index1--;
                index2--;
            }
        }

        // merged. now we need to reverse the list, because we added the elements in reverse order
        Collections.reverse(result);
        return didCloseABranch;
    }

    @Override
    public OptimizerNode getOptimizerNode() {
        return this;
    }

    @Override
    public PlanNode getPlanNode() {
        return null;
    }

    @Override
    public Iterable> getDumpableInputs() {
        List> allInputs =
                new ArrayList>();

        allInputs.addAll(getIncomingConnections());
        allInputs.addAll(getBroadcastConnections());

        return allInputs;
    }

    @Override
    public String toString() {
        StringBuilder bld = new StringBuilder();

        bld.append(getOperatorName());
        bld.append(" (").append(getOperator().getName()).append(") ");

        int i = 1;
        for (DagConnection conn : getIncomingConnections()) {
            String shipStrategyName =
                    conn.getShipStrategy() == null ? "null" : conn.getShipStrategy().name();
            bld.append('(').append(i++).append(":").append(shipStrategyName).append(')');
        }

        return bld.toString();
    }

    // --------------------------------------------------------------------------------------------

    /**
     * Description of an unclosed branch. An unclosed branch is when the data flow branched (one
     * operator's result is consumed by multiple targets), but these different branches (targets)
     * have not been joined together.
     */
    public static final class UnclosedBranchDescriptor {

        protected OptimizerNode branchingNode;

        protected long joinedPathsVector;

        /**
         * Creates a new branching descriptor.
         *
         * @param branchingNode The node where the branch occurred (the node with multiple outputs).
         * @param joinedPathsVector A bit vector describing which branches are tracked by this
         *     descriptor. The bit vector is one, where the branch is tracked, zero otherwise.
         */
        protected UnclosedBranchDescriptor(OptimizerNode branchingNode, long joinedPathsVector) {
            this.branchingNode = branchingNode;
            this.joinedPathsVector = joinedPathsVector;
        }

        public OptimizerNode getBranchingNode() {
            return this.branchingNode;
        }

        public long getJoinedPathsVector() {
            return this.joinedPathsVector;
        }

        @Override
        public String toString() {
            return "(" + this.branchingNode.getOperator() + ") [" + this.joinedPathsVector + "]";
        }
    }
}