org.voltdb.plannodes.AbstractPlanNode Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of voltdb Show documentation
VoltDB server libraries
There is a newer version: 10.1.1
/* This file is part of VoltDB.
 * Copyright (C) 2008-2018 VoltDB Inc.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with VoltDB.  If not, see .
 */

package org.voltdb.plannodes;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.json_voltpatches.JSONArray;
import org.json_voltpatches.JSONException;
import org.json_voltpatches.JSONObject;
import org.json_voltpatches.JSONString;
import org.json_voltpatches.JSONStringer;
import org.voltdb.catalog.Database;
import org.voltdb.compiler.DatabaseEstimates;
import org.voltdb.compiler.ScalarValueHints;
import org.voltdb.expressions.AbstractExpression;
import org.voltdb.expressions.AbstractSubqueryExpression;
import org.voltdb.expressions.TupleValueExpression;
import org.voltdb.planner.PlanStatistics;
import org.voltdb.planner.PlanningErrorException;
import org.voltdb.planner.StatsField;
import org.voltdb.planner.parseinfo.StmtTableScan;
import org.voltdb.planner.parseinfo.StmtTargetTableScan;
import org.voltdb.types.PlanNodeType;
import org.voltdb.types.SortDirectionType;

public abstract class AbstractPlanNode implements JSONString, Comparable {

    /**
     * Internal PlanNodeId counter. Note that this member is static, which means
     * all PlanNodes will have a unique id
     */
    private static int NEXT_PLAN_NODE_ID = 1;

    // Keep this flag turned off in production or when testing user-accessible EXPLAIN output or when
    // using EXPLAIN output to validate plans.
    protected static boolean m_verboseExplainForDebugging = false; // CODE REVIEWER! KEEP false on master!
    public static boolean enableVerboseExplainForDebugging()
    {
        boolean was = m_verboseExplainForDebugging;
        m_verboseExplainForDebugging = true;
        return was;
    }
    public static boolean disableVerboseExplainForDebugging()
    {
        boolean was = m_verboseExplainForDebugging;
        m_verboseExplainForDebugging = false;
        return was;
    }
    public static void restoreVerboseExplainForDebugging(boolean was) { m_verboseExplainForDebugging = was; }

    /*
     * IDs only need to be unique for a single plan.
     * Reset between plans
     */
    public static final void resetPlanNodeIds() {
        NEXT_PLAN_NODE_ID = 1;
    }

    public enum Members {
        ID,
        PLAN_NODE_TYPE,
        INLINE_NODES,
        CHILDREN_IDS,
        PARENT_IDS,
        OUTPUT_SCHEMA;
    }

    protected int m_id = -1;
    protected List m_children = new ArrayList<>();
    protected List m_parents = new ArrayList<>();
    protected Set m_dominators = new HashSet<>();

    // TODO: planner accesses this data directly. Should be protected.
    protected List m_outputColumnHints = new ArrayList<>();
    protected long m_estimatedOutputTupleCount = 0;
    protected long m_estimatedProcessedTupleCount = 0;
    protected boolean m_hasComputedEstimates = false;

    // The output schema for this node
    protected boolean m_hasSignificantOutputSchema;
    protected NodeSchema m_outputSchema;

    /**
     * Some PlanNodes can take advantage of inline PlanNodes to perform
     * certain additional tasks while performing their main operation, rather than
     * having to re-read tuples from intermediate results
     */
    protected Map m_inlineNodes =
        new LinkedHashMap<>();
    protected boolean m_isInline = false;

    /**
     * The textual explanation of why the plan may fail to have a deterministic result or effect when replayed.
     */
    protected String  m_nondeterminismDetail = "the query result does not guarantee a consistent ordering";

    /**
     * Instantiates a new plan node.
     */
    protected AbstractPlanNode() {
        m_id = NEXT_PLAN_NODE_ID++;
    }

    public int resetPlanNodeIds(int nextId) {
        nextId = overrideId(nextId);
        for (AbstractPlanNode inNode : getInlinePlanNodes().values()) {
            // Inline nodes also need their ids to be overridden to make sure
            // the subquery node ids are also globaly unique
            nextId = inNode.resetPlanNodeIds(nextId);
        }

        for (int i = 0; i < getChildCount(); i++) {
            AbstractPlanNode child = getChild(i);
            assert(child != null);
            nextId = child.resetPlanNodeIds(nextId);
        }

        return nextId;
    }

    public int overrideId(int newId) {
        m_id = newId++;
        // Override subqueries ids
        Collection subqueries = findAllSubquerySubexpressions();
        for (AbstractExpression expr : subqueries) {
            assert(expr instanceof AbstractSubqueryExpression);
            AbstractSubqueryExpression subquery = (AbstractSubqueryExpression) expr;
            // overrideSubqueryNodeIds(newId) will get an NPE if the subquery
            // has not been planned, presumably the effect of hitting a bug
            // earlier in the planner. If that happens again, it MAY be useful
            // to preempt those cases here and single-step through a replay of
            // findAllSubquerySubexpressions. Determining where in the parent
            // plan this subquery expression was found MAY provide a clue
            // as to why the subquery was not planned. It has helped before.
            //REDO to debug*/ if (subquery instanceof SelectSubqueryExpression) {
            //REDO to debug*/     CompiledPlan subqueryPlan = ((SelectSubqueryExpression)subquery)
            //REDO to debug*/             .getSubqueryScan().getBestCostPlan();
            //REDO to debug*/     if (subqueryPlan == null) {
            //REDO to debug*/         findAllSubquerySubexpressions();
            //REDO to debug*/     }
            //REDO to debug*/ }
            newId = subquery.overrideSubqueryNodeIds(newId);
        }
        return newId;
    }

    /**
     * Create a PlanNode that clones the configuration information but
     * is not inserted in the plan graph and has a unique plan node id.
     */
    protected void produceCopyForTransformation(AbstractPlanNode copy) {
        copy.m_outputSchema = m_outputSchema;
        copy.m_hasSignificantOutputSchema = m_hasSignificantOutputSchema;
        copy.m_outputColumnHints = m_outputColumnHints;
        copy.m_estimatedOutputTupleCount = m_estimatedOutputTupleCount;
        copy.m_estimatedProcessedTupleCount = m_estimatedProcessedTupleCount;

        // clone is not yet implemented for every node.
        assert(m_inlineNodes.size() == 0);
        assert(m_isInline == false);

        // the api requires the copy is not (yet) connected
        assert (copy.m_parents.size() == 0);
        assert (copy.m_children.size() == 0);
    }

    public abstract PlanNodeType getPlanNodeType();

    /**
     * Generate the output schema for this node based on the
     * output schemas of its children.  The generated schema consists of
     * the complete set of columns but is not yet ordered.
     *
     * Right now it's best to call this on every node after it gets added
     * and linked to the top of the current plan graph.
     * FIXME: "it's best to call this" means "to be on the paranoid safe side".
     * It used to be that there was a hacky dependency in some non-critical aggregate code,
     * so it would crash if generateOutputSchema had not been run earlier on its subtree.
     * Historically, there may have been other dependencies like this, too,
     * but they are mostly gone or otherwise completely avoidable.
     * This means that one definitive depth-first recursive call that combines the effects
     * of resolveColumnIndexes and then generateOutputSchema should suffice,
     * if applied to a complete plan tree just before it gets fragmentized.
     * The newest twist is that most of this repeated effort goes into generating
     * redundant pass-thorugh structures that get ignored by the serializer.
     *
     * Many nodes will need to override this method in order to take whatever
     * action is appropriate (so, joins will combine two schemas, projections
     * will already have schemas defined and do nothing, etc).
     * They should set m_hasSignificantOutputSchema to true so that the serialization knows
     * not to ignore their work.
     *
     * @param db  A reference to the Database object from the catalog.
     */
    public void generateOutputSchema(Database db)
    {
        // default behavior: just copy the input schema
        // to the output schema
        assert(m_children.size() == 1);
        AbstractPlanNode childNode = m_children.get(0);
        childNode.generateOutputSchema(db);
        // Replace the expressions in our children's columns with TVEs.  When
        // we resolve the indexes in these TVEs they will point back at the
        // correct input column, which we are assuming that the child node
        // has filled in with whatever expression was here before the replacement.
        // Output schemas defined using this standard algorithm
        // are just cached "fillers" that satisfy the legacy
        // resolveColumnIndexes/generateOutputSchema/getOutputSchema protocol
        // until it can be fixed up  -- see the FIXME comment on generateOutputSchema.
        m_hasSignificantOutputSchema = false;
        m_outputSchema = childNode.getOutputSchema().copyAndReplaceWithTVE();
    }

    /**
     * Recursively iterate through the plan and resolve the column_idx value for
     * every TupleValueExpression in every AbstractExpression in every PlanNode.
     * Few enough common cases so we force every AbstractPlanNode subclass to
     * implement this.  After index resolution, this method also sorts
     * the columns in the output schema appropriately, depending upon what
     * sort of node it is, so that its parent will be able to resolve
     * its indexes successfully.
     *
     * Should get called on the plan graph after any optimizations but before
     * the plan gets fragmented.
     * FIXME: This needs to be reworked with generateOutputSchema to eliminate redundancies.
     */
    public abstract void resolveColumnIndexes();

    protected void resolveSubqueryColumnIndexes() {
        // Possible subquery expressions
        Collection exprs = findAllSubquerySubexpressions();
        for (AbstractExpression expr: exprs) {
            ((AbstractSubqueryExpression) expr).resolveColumnIndexes();
        }
    }

    public void validate() throws Exception {
        //
        // Make sure our children have us listed as their parents
        //
        for (AbstractPlanNode child : m_children) {
            if (!child.m_parents.contains(this)) {
                throw new Exception("ERROR: The child PlanNode '" + child.toString() + "' does not " +
                                    "have its parent PlanNode '" + toString() + "' in its parents list");
            }
            child.validate();
        }
        //
        // Inline PlanNodes
        //
        if (!m_inlineNodes.isEmpty()) {
            for (AbstractPlanNode node : m_inlineNodes.values()) {
                //
                // Make sure that we're not attached to some kind of tree somewhere...
                //
                if (!node.m_children.isEmpty()) {
                    throw new Exception("ERROR: The inline PlanNode '" + node + "' has children inside of PlanNode '" + this + "'");
                } else if (!node.m_parents.isEmpty()) {
                    throw new Exception("ERROR: The inline PlanNode '" + node + "' has parents inside of PlanNode '" + this + "'");
                } else if (!node.isInline()) {
                    throw new Exception("ERROR: The inline PlanNode '" + node + "' was not marked as inline for PlanNode '" + this + "'");
                } else if (!node.getInlinePlanNodes().isEmpty()) {
                    throw new Exception("ERROR: The inline PlanNode '" + node + "' has its own inline PlanNodes inside of PlanNode '" + this + "'");
                }
                node.validate();
            }
        }
    }

    public boolean hasReplicatedResult()
    {
        Map tablesRead = new TreeMap<>();
        getTablesAndIndexes(tablesRead, null);
        for (StmtTableScan tableScan : tablesRead.values()) {
            if ( ! tableScan.getIsReplicated()) {
                return false;
            }
        }
        return true;
    }

    /**
     * Recursively build sets of read tables read and index names used.
     *
     * @param tablesRead Set of table aliases read potentially added to at each recursive level.
     * @param indexes Set of index names used in the plan tree
     * Only the current fragment is of interest.
     */
    public void getTablesAndIndexes(Map tablesRead,
            Collection indexes)
    {
        for (AbstractPlanNode node : m_inlineNodes.values()) {
            node.getTablesAndIndexes(tablesRead, indexes);
        }
        for (AbstractPlanNode node : m_children) {
            node.getTablesAndIndexes(tablesRead, indexes);
        }
        getTablesAndIndexesFromSubqueries(tablesRead, indexes);
    }

    /**
     * Collect read tables read and index names used in the current node subquery expressions.
     *
     * @param tablesRead Set of table aliases read potentially added to at each recursive level.
     * @param indexes Set of index names used in the plan tree
     * Only the current node is of interest.
     */
    protected void getTablesAndIndexesFromSubqueries(Map tablesRead,
            Collection indexes) {
        for(AbstractExpression expr : findAllSubquerySubexpressions()) {
            assert(expr instanceof AbstractSubqueryExpression);
            AbstractSubqueryExpression subquery = (AbstractSubqueryExpression) expr;
            AbstractPlanNode subqueryNode = subquery.getSubqueryNode();
            assert(subqueryNode != null);
            subqueryNode.getTablesAndIndexes(tablesRead, indexes);
        }
    }

    /**
     * Recursively find the target table name for a DML statement.
     * The name will be attached to the AbstractOperationNode child
     * of a Send Node, in all cases, so the "recursion" can be very limited.
     * Most plan nodes can quickly stub out this recursion and return null.
     * @return
     */
    @SuppressWarnings("static-method")
    public String getUpdatedTable() {
        return null;
    }

    /**
     * Does the (sub)plan guarantee an identical result/effect when "replayed"
     * against the same database state, such as during replication or CL recovery.
     * @return
     */
    public boolean isOrderDeterministic() {
        // Leaf nodes need to re-implement this test.
        assert(m_children != null);
        for (AbstractPlanNode child : m_children) {
            if (! child.isOrderDeterministic()) {
                m_nondeterminismDetail = child.m_nondeterminismDetail;
                return false;
            }
        }
        return true;
    }

    /**
     * Does the plan guarantee a result sorted according to the required sort order.
     * The default implementation delegates the question to its child if there is only one child.
     *
     *@param sortExpressions list of ordering columns expressions
     *@param sortDirections list of corresponding sort orders
     *
     * @return TRUE if the node's output table is ordered. FALSE otherwise
     */
    public boolean isOutputOrdered (List sortExpressions, List sortDirections) {
        assert(sortExpressions.size() == sortDirections.size());
        if (m_children.size() == 1) {
            return m_children.get(0).isOutputOrdered(sortExpressions, sortDirections);
        }
        return false;
    }

    /**
     * Accessor for description of plan non-determinism.
     * @return the field
     */
    public String nondeterminismDetail() {
        return m_nondeterminismDetail;
    }

    @Override
    public final String toString() {
        return getPlanNodeType() + "[" + m_id + "]";
    }

    /**
     * Called to compute cost estimates and statistics on a plan graph. Computing of the costs
     * should be idempotent, but updating the PlanStatistics instance isn't, so this should
     * be called once per finished graph, and once per PlanStatistics instance.
     * TODO(XIN): It takes at least 14% planner CPU. Optimize it.
     */
    public final void computeEstimatesRecursively(PlanStatistics stats,
                                                  DatabaseEstimates estimates,
                                                  ScalarValueHints[] paramHints)
    {
        assert(stats != null);

        m_outputColumnHints.clear();
        m_estimatedOutputTupleCount = 0;

        // recursively compute and collect stats from children
        long childOutputTupleCountEstimate = 0;
        for (AbstractPlanNode child : m_children) {
            child.computeEstimatesRecursively(stats, estimates, paramHints);
            m_outputColumnHints.addAll(child.m_outputColumnHints);
            childOutputTupleCountEstimate += child.m_estimatedOutputTupleCount;
        }

        // make sure any inlined scans (for NLIJ mostly) are costed as well
        for (Entry entry : m_inlineNodes.entrySet()) {
            AbstractPlanNode inlineNode = entry.getValue();
            if (inlineNode instanceof AbstractScanPlanNode) {
                inlineNode.computeCostEstimates(0, estimates, paramHints);
            }
        }

        computeCostEstimates(childOutputTupleCountEstimate, estimates, paramHints);
        stats.incrementStatistic(0, StatsField.TUPLES_READ, m_estimatedProcessedTupleCount);
    }

    /**
     * Given the number of tuples expected as input to this node, compute an estimate
     * of the number of tuples read/processed and the number of tuples output.
     * This will be called by
     * {@see AbstractPlanNode#computeEstimatesRecursively(PlanStatistics, Cluster, Database, DatabaseEstimates, ScalarValueHints[])}.
     */
    protected void computeCostEstimates(long childOutputTupleCountEstimate,
                                        DatabaseEstimates estimates,
                                        ScalarValueHints[] paramHints)
    {
        m_estimatedOutputTupleCount = childOutputTupleCountEstimate;
        m_estimatedProcessedTupleCount = childOutputTupleCountEstimate;
    }

    public long getEstimatedOutputTupleCount() {
        return m_estimatedOutputTupleCount;
    }

    public long getEstimatedProcessedTupleCount() {
        return m_estimatedProcessedTupleCount;
    }

    /**
     * Gets the id.
     *
     * @return the id
     */
    public Integer getPlanNodeId() {
        return m_id;
    }

    /**
     * Get this PlanNode's output schema
     * FIXME: This needs to be reworked with generateOutputSchema to eliminate redundancies.
     * In short, if generateOutputSchema was called definitively ONCE and returned the child's
     * effective outputSchema to its parent -- possibly without even caching it as m_outputSchema,
     * m_outputSchema could be used to cache only significant non-redundant output schemas.
     * For now, the m_hasSignificantOutputSchema flag is checked separately to determine whether
     * m_outputSchema is worth looking at.
     * @return the NodeSchema which represents this node's output schema
     */
    public NodeSchema getOutputSchema()
    {
        return m_outputSchema;
    }

    /**
     * If the output schema of this node is a cheap copy of
     * some other node's schema, and we decide to change the
     * output true output schema, we can change this to the
     * updated schema.
     *
     * @param childSchema
     */
    private void setOutputSchema(NodeSchema childSchema) {
        assert( ! m_hasSignificantOutputSchema);
        m_outputSchema = childSchema;
    }

    /**
     * Find the true output schema.  This may be in some child
     * node.  This seems to be the search order when constructing
     * a plan node in the EE.
     *
     * There are several cases.
     * 1.) If the child has an output schema, and if it is
     *     not a copy of one of its children's schemas,
     *     that's the one we want.  We know it's a copy
     *     if m_hasSignificantOutputSchema is false.
     * 2.) If the child has no significant output schema but it
     *     has an inline projection node, then
     *     a.) If it does not have an inline insert
     *         node then the output schema of the child is
     *         the output schema of the inline projection node.
     *     b.) If the output schema has an inline insert node
     *         then the output schema is the usual DML output
     *         schema, which will be the schema of the inline
     *         insert node.  I don't think we will ever see.
     *         this case in this function.  This function is
     *         only called from the microoptimizer to remove
     *         projection nodes.  So we don't see a projection
     *         node on top of a node with an inlined insert node.
     *  3.) Otherwise, the output schema is the output schema
     *      of the child's first child.  We should be able to
     *      follow the first children until we get something
     *      usable.
     *
     * Just for the record, if the child node has an inline
     * insert and a projection node, the projection node's
     * output schema is the schema of the tuples we will be
     * inserting into the target table.  The output schema of
     * the child node will be the output schema of the insert
     * node, which will be the usual DML schema.  This has one
     * long integer column counting the number of rows inserted.
     *
     * @param resetBack If this is true, we walk back the
     *                  chain of parent plan nodes, updating
     *                  the output schema in ancestor nodes
     *                  with the true output schema.  If we
     *                  somehow change the true output schema
     *                  we want to be able to change all the
     *                  ones which are copies of the true
     *                  output schema.
     * @return The true output schema.  This will never return null.
     */
    public final NodeSchema getTrueOutputSchema(boolean resetBack) throws PlanningErrorException {
        AbstractPlanNode child;
        NodeSchema answer = null;
        //
        // Note: This code is translated from the C++ code in
        //       AbstractPlanNode::getOutputSchema.  It's considerably
        //       different there, but I think this has the corner
        //       cases covered correctly.
        for (child = this;
                child != null;
                child = (child.getChildCount() == 0) ? null : child.getChild(0)) {
            NodeSchema childSchema;
            if (child.m_hasSignificantOutputSchema) {
                childSchema = child.getOutputSchema();
                assert(childSchema != null);
                answer = childSchema;
                break;
            }
            AbstractPlanNode childProj = child.getInlinePlanNode(PlanNodeType.PROJECTION);
            if (childProj != null) {
                AbstractPlanNode schemaSrc = null;
                AbstractPlanNode inlineInsertNode = childProj.getInlinePlanNode(PlanNodeType.INSERT);
                if (inlineInsertNode != null) {
                    schemaSrc = inlineInsertNode;
                } else {
                    schemaSrc = childProj;
                }
                childSchema = schemaSrc.getOutputSchema();
                if (childSchema != null) {
                    answer = childSchema;
                    break;
                }
            }
        }
        if (child == null) {
            // We've gone to the end of the plan.  This is a
            // failure in the EE.
            assert(false);
            throw new PlanningErrorException("AbstractPlanNode with no true output schema.  Please notify VoltDB Support.");
        }
        // Trace back the chain of parents and reset the
        // output schemas of the parent.  These will all be
        // exactly the same.  Note that the source of the
        // schema may be an inline plan node.  So we need
        // to set the child's output schema to be the answer.
        // If the schema source is the child node itself, this will
        // set the the output schema to itself, so no harm
        // will be done.
        if (resetBack) {
            do {
                if (child instanceof AbstractJoinPlanNode) {
                    // In joins with inlined aggregation, the inlined
                    // aggregate node is the one that determines the schema.
                    // (However, the enclosing join node still has its
                    // "m_hasSignificantOutputSchema" bit set.)
                    //
                    // The method resolveColumnIndexes will overwrite
                    // a join node's schema if there is aggregation.  In order
                    // to avoid undoing the work we've done here, we must
                    // also update the inlined aggregate node.
                    AggregatePlanNode aggNode = AggregatePlanNode.getInlineAggregationNode(child);
                    if (aggNode != null) {
                        aggNode.setOutputSchema(answer);
                    }
                }

                if (! child.m_hasSignificantOutputSchema) {
                    child.setOutputSchema(answer);
                }

                child = (child.getParentCount() == 0) ? null : child.getParent(0);
            } while (child != null);
        }
        return answer;
    }

    /**
     * Add a child and link this node child's parent.
     * @param child The node to add.
     */
    public void addAndLinkChild(AbstractPlanNode child) {
        assert(child != null);
        m_children.add(child);
        child.m_parents.add(this);
    }

    /**
     * Used to re-link the child without changing the order.
     *
     * This is called by PushDownLimit and RemoveUnnecessaryProjectNodes.
     * @param index
     * @param child
     */
    public void setAndLinkChild(int index, AbstractPlanNode child) {
        assert(child != null);
        m_children.set(index, child);
        child.m_parents.add(this);
    }

    /** Remove child from this node.
     * @param child to remove.
     */
    public void unlinkChild(AbstractPlanNode child) {
        assert(child != null);
        m_children.remove(child);
        child.m_parents.remove(this);
    }

    /**
     * Replace an existing child with a new one preserving the child's position.
     * @param oldChild The node to replace.
     * @param newChild The new node.
     * @return true if the child was replaced
     */
    public boolean replaceChild(AbstractPlanNode oldChild, AbstractPlanNode newChild) {
        assert(oldChild != null);
        assert(newChild != null);
        int idx = 0;
        for (AbstractPlanNode child : m_children) {
            if (child.equals(oldChild)) {
                oldChild.m_parents.clear();
                setAndLinkChild(idx, newChild);
                return true;
            }
            ++idx;
        }
        return false;
    }

    public void replaceChild(int oldChildIdx, AbstractPlanNode newChild) {
        assert(oldChildIdx >= 0 && oldChildIdx < getChildCount());
        assert(newChild != null);
        AbstractPlanNode oldChild = m_children.get(oldChildIdx);
        assert(oldChild != null);
        oldChild.m_parents.clear();
        setAndLinkChild(oldChildIdx, newChild);
    }


    /**
     * Gets the children.
     * @return the children
     */
    public int getChildCount() {
        return m_children.size();
    }

    /**
     * @param index
     * @return The child node of this node at a given index or null if none exists.
     */
    public AbstractPlanNode getChild(int index) {
        return m_children.get(index);
    }

    public void clearChildren() {
        m_children.clear();
    }

    public boolean hasChild(AbstractPlanNode receive) {
        return m_children.contains(receive);
    }

    /**
     * Gets the parents.
     * @return the parents
     */
    public int getParentCount() {
        return m_parents.size();
    }

    public AbstractPlanNode getParent(int index) {
        return m_parents.get(index);
    }

    public void clearParents() {
        m_parents.clear();
    }

    public void removeFromGraph() {
        disconnectParents();
        disconnectChildren();
    }

    public void disconnectParents() {
        for (AbstractPlanNode parent : m_parents)
            parent.m_children.remove(this);
        m_parents.clear();
     }

    public void disconnectChildren() {
        for (AbstractPlanNode child : m_children)
            child.m_parents.remove(this);
        m_children.clear();
     }

    /** Interject the provided node between this node and this node's current children */
    public void addIntermediary(AbstractPlanNode node) {

        // transfer this node's children to node
        Iterator it = m_children.iterator();
        while (it.hasNext()) {
            AbstractPlanNode child = it.next();
            it.remove();                          // remove this.child from m_children
            assert child.getParentCount() == 1;
            child.clearParents();                 // and reset child's parents list
            node.addAndLinkChild(child);          // set node.child and child.parent
        }

        // and add node to this node's children
        assert(m_children.size() == 0);
        addAndLinkChild(node);
    }

    /**
     * @return The map of inlined nodes.
     */
    public Map getInlinePlanNodes() {
        return m_inlineNodes;
    }

    /**
     * @param node
     */
    public void addInlinePlanNode(AbstractPlanNode node) {
        node.m_isInline = true;
        m_inlineNodes.put(node.getPlanNodeType(), node);
        node.m_children.clear();
        node.m_parents.clear();
    }

    /**
     *
     * @param type
     */
    public void removeInlinePlanNode(PlanNodeType type) {
        if (m_inlineNodes.containsKey(type)) {
            m_inlineNodes.remove(type);
        }
    }

    /**
     *
     * @param type
     * @return An inlined node of the given type or null if none.
     */
    public AbstractPlanNode getInlinePlanNode(PlanNodeType type) {
        return m_inlineNodes.get(type);
    }

    /**
     *
     * @return Is this node inlined in another node.
     */
    public Boolean isInline() {
        return m_isInline;
    }

    public boolean isSubQuery() {
        return false;
    }

    public boolean hasSubquery() {
        if (isSubQuery()) {
            return true;
        }
        for (AbstractPlanNode n : m_children) {
            if (n.hasSubquery()) {
                return true;
            }
        }
        for (AbstractPlanNode inlined : m_inlineNodes.values()) {
            if (inlined.hasSubquery()) {
                return true;
            }
        }

        return false;
    }

    /**
     * Refer to the override implementation on NestLoopIndexJoin node.
     * @param tableName
     * @return whether this node has an inlined index scan node or not.
     */
    public boolean hasInlinedIndexScanOfTable(String tableName) {
        for (int i = 0; i < getChildCount(); i++) {
            AbstractPlanNode child = getChild(i);
            if (child.hasInlinedIndexScanOfTable(tableName) == true) {
                return true;
            }
        }

        return false;
    }


    /**
     * @return the dominator list for a node
     */
    public Set getDominators() {
        return m_dominators;
    }

    /**
    *   Initialize a hashset for each node containing that node's dominators
    *   (the set of predecessors that *always* precede this node in a traversal
    *   of the plan-graph in reverse-execution order (from root to leaves)).
    */
    public void calculateDominators() {
        HashSet visited = new HashSet<>();
        calculateDominators_recurse(visited);
    }

    private void calculateDominators_recurse(HashSet visited) {
        if (visited.contains(this)) {
            assert(false): "do not expect loops in plangraph.";
            return;
        }

        visited.add(this);
        m_dominators.clear();
        m_dominators.add(this);

        // find nodes that are in every parent's dominator set.

        HashMap union = new HashMap<>();
        for (AbstractPlanNode n : m_parents) {
            for (AbstractPlanNode d : n.getDominators()) {
                if (union.containsKey(d))
                    union.put(d, union.get(d) + 1);
                else
                    union.put(d, 1);
            }
        }

        for (AbstractPlanNode pd : union.keySet() ) {
            if (union.get(pd) == m_parents.size())
                m_dominators.add(pd);
        }

        for (AbstractPlanNode n : m_children)
            n.calculateDominators_recurse(visited);
    }

    /**
     * @param type plan node type to search for
     * @return a list of nodes that are eventual successors of this node of the desired type
     */
    public ArrayList findAllNodesOfType(PlanNodeType type) {
        HashSet visited = new HashSet<>();
        ArrayList collected = new ArrayList<>();
        findAllNodesOfType_recurse(type, null, collected, visited);
        return collected;
    }

    /**
     * @param pnClass plan node class to search for
     * @return a list of nodes that are eventual successors of this node of the desired class
     */
    public ArrayList findAllNodesOfClass(Class< ? extends AbstractPlanNode> pnClass) {
        HashSet visited = new HashSet<>();
        ArrayList collected = new ArrayList<>();
        findAllNodesOfType_recurse(null, pnClass, collected, visited);
        return collected;
    }

    private void findAllNodesOfType_recurse(PlanNodeType type, Class< ? extends AbstractPlanNode> pnClass, ArrayList collected,
        HashSet visited)
    {
        if (visited.contains(this)) {
            assert(false): "do not expect loops in plangraph.";
            return;
        }
        visited.add(this);
        if (getPlanNodeType() == type) {
                collected.add(this);
        } else if (pnClass != null && pnClass.isAssignableFrom(getClass())) {
            collected.add(this);
        }

        for (AbstractPlanNode child : m_children)
            child.findAllNodesOfType_recurse(type, pnClass, collected, visited);

        for (AbstractPlanNode inlined : m_inlineNodes.values())
            inlined.findAllNodesOfType_recurse(type, pnClass, collected, visited);
    }

    final public Collection findAllSubquerySubexpressions() {
        Set collected = new HashSet<>();
        findAllExpressionsOfClass(AbstractSubqueryExpression.class, collected);
        return collected;
    }

    /**
     * Collect a unique list of expressions of a given type that this node has including its inlined nodes
     * @param aeClass AbstractExpression class to search for
     * @param collection set to populate with expressions that this node has
     */
    protected void findAllExpressionsOfClass(Class< ? extends AbstractExpression> aeClass,
            Set collected) {
        // Check the inlined plan nodes
        for (AbstractPlanNode inlineNode: getInlinePlanNodes().values()) {
            // For inline node we MUST go recursive to its children!!!!!
            inlineNode.findAllExpressionsOfClass(aeClass, collected);
        }

        // and the output column expressions if there were no projection
        NodeSchema schema = getOutputSchema();
        if (schema != null) {
            schema.addAllSubexpressionsOfClassFromNodeSchema(collected, aeClass);
        }
    }

    /**
     * @param type plan node type to search for
     * @return whether a node of that type is contained in the plan tree
     */
    public boolean hasAnyNodeOfType(PlanNodeType type) {
        if (getPlanNodeType() == type) {
            return true;
        }

        for (AbstractPlanNode n : m_children) {
            if (n.hasAnyNodeOfType(type)) {
                return true;
            }
        }

        for (AbstractPlanNode inlined : m_inlineNodes.values()) {
            if (inlined.hasAnyNodeOfType(type)) {
                return true;
            }
        }

        return false;
    }

    /**
     * @param pnClass plan node class to search for
     * @return whether a node of that type is contained in the plan tree
     */
    public boolean hasAnyNodeOfClass(Class< ? extends AbstractPlanNode> pnClass) {
        if (pnClass.isAssignableFrom(getClass())) {
            return true;
        }

        for (AbstractPlanNode n : m_children) {
            if (n.hasAnyNodeOfClass(pnClass)) {
                return true;
            }
        }

        for (AbstractPlanNode inlined : m_inlineNodes.values()) {
            if (inlined.hasAnyNodeOfClass(pnClass)) {
                return true;
            }
        }

        return false;
    }

    @Override
    public int compareTo(AbstractPlanNode other) {
        int diff = 0;

        // compare child nodes
        HashMap nodesById = new HashMap<>();
        for (AbstractPlanNode node : m_children)
            nodesById.put(node.getPlanNodeId(), node);
        for (AbstractPlanNode node : other.m_children) {
            AbstractPlanNode myNode = nodesById.get(node.getPlanNodeId());
            diff = myNode.compareTo(node);
            if (diff != 0) return diff;
        }

        // compare inline nodes
        HashMap> inlineNodesById =
               new HashMap<>();
        for (Entry e : m_inlineNodes.entrySet())
            inlineNodesById.put(e.getValue().getPlanNodeId(), e);
        for (Entry e : other.m_inlineNodes.entrySet()) {
            Entry myE = inlineNodesById.get(e.getValue().getPlanNodeId());
            if (myE.getKey() != e.getKey()) return -1;

            diff = myE.getValue().compareTo(e.getValue());
            if (diff != 0) return diff;
        }

        diff = m_id - other.m_id;
        return diff;
    }

    // produce a file that can imported into graphviz for easier visualization
    public String toDOTString() {
        StringBuilder sb = new StringBuilder();

        // id [label=id: value-type ];
        // id -> child_id;
        // id -> child_id;

        sb.append(m_id).append(" [label=\"").append(m_id).append(": ").append(getPlanNodeType()).append("\" ");
        sb.append(getValueTypeDotString());
        sb.append("];\n");
        for (AbstractPlanNode node : m_inlineNodes.values()) {
            sb.append(m_id).append(" -> ").append(node.getPlanNodeId().intValue()).append(";\n");
            sb.append(node.toDOTString());
        }
        for (AbstractPlanNode node : m_children) {
           sb.append(m_id).append(" -> ").append(node.getPlanNodeId().intValue()).append(";\n");
        }

        return sb.toString();
    }

    // maybe not worth polluting
    private String getValueTypeDotString() {
        PlanNodeType pnt = getPlanNodeType();
        if (isInline()) {
            return "fontcolor=\"white\" style=\"filled\" fillcolor=\"red\"";
        }
        if (pnt == PlanNodeType.SEND || pnt == PlanNodeType.RECEIVE || pnt == PlanNodeType.MERGERECEIVE) {
            return "fontcolor=\"white\" style=\"filled\" fillcolor=\"black\"";
        }
        return "";
    }

    @Override
    public String toJSONString() {
        JSONStringer stringer = new JSONStringer();
        try
        {
            stringer.object();
            toJSONString(stringer);
            stringer.endObject();
        }
        catch (JSONException e)
        {
            e.printStackTrace();
            throw new RuntimeException(e.getMessage(), e);
        }
        return stringer.toString();
    }

    public void toJSONString(JSONStringer stringer) throws JSONException {
        stringer.keySymbolValuePair(Members.ID.name(), m_id);
        stringer.keySymbolValuePair(Members.PLAN_NODE_TYPE.name(), getPlanNodeType().toString());

        if (m_inlineNodes.size() > 0) {
            PlanNodeType types[] = new PlanNodeType[m_inlineNodes.size()];
            int i = 0;
            for (PlanNodeType type : m_inlineNodes.keySet()) {
                types[i++] = type;
            }
            Arrays.sort(types);

            stringer.key(Members.INLINE_NODES.name()).array();
            for (PlanNodeType type : types) {
                AbstractPlanNode node = m_inlineNodes.get(type);
                assert(node != null);
                stringer.value(node);
            }
            stringer.endArray();
        }

        if (m_children.size() > 0) {
            stringer.key(Members.CHILDREN_IDS.name()).array();
            for (AbstractPlanNode node : m_children) {
                stringer.value(node.getPlanNodeId().intValue());
            }
            stringer.endArray();
        }

        outputSchemaToJSON(stringer);
    }

    private void outputSchemaToJSON(JSONStringer stringer) throws JSONException {
        if (m_hasSignificantOutputSchema) {
            stringer.key(Members.OUTPUT_SCHEMA.name());
            stringer.array();
            for (int colNo = 0; colNo < m_outputSchema.size(); colNo += 1) {
                SchemaColumn column = m_outputSchema.getColumn(colNo);
                column.toJSONString(stringer, true, colNo);
            }
            stringer.endArray();
        }
    }

    public String toExplainPlanString() {
        StringBuilder sb = new StringBuilder();
        explainPlan_recurse(sb, "");
        String fullExpalinString = sb.toString();
        // Extract subqueries into a map to explain them separately. Each subquery is
        // surrounded by the 'Subquery_[SubqueryId]' tags. Example:
        // Subquery_1SEQUENTIAL SCAN of "R1"Subquery_1
        Pattern subqueryPattern = Pattern.compile(
                String.format("(%s)([0-9]+)(.*)(\\s*)%s(\\2)", AbstractSubqueryExpression.SUBQUERY_TAG, AbstractSubqueryExpression.SUBQUERY_TAG),
                Pattern.DOTALL);
        Map subqueries = new TreeMap<>();
        String topStmt = extractExplainedSubquries(fullExpalinString, subqueryPattern, subqueries);
        StringBuilder fullSb = new StringBuilder(topStmt);
        for (Map.Entry subquery : subqueries.entrySet()) {
            fullSb.append("\n").append(subquery.getKey()).append('\n').append(subquery.getValue());
        }
        return fullSb.toString();
    }

    private String extractExplainedSubquries(String explainedSubquery, Pattern pattern, Map subqueries) {
        Matcher matcher = pattern.matcher(explainedSubquery);
        int pos = 0;
        StringBuilder sb = new StringBuilder();
        // Find all the subqueries from the input string
        while(matcher.find()) {
            sb.append(explainedSubquery.substring(pos, matcher.end(2)));
            pos = matcher.end();
            // Recurse into the subquery string to extract its own subqueries if any
            String nextExplainedStmt = extractExplainedSubquries(matcher.group(3), pattern, subqueries);
            subqueries.put(AbstractSubqueryExpression.SUBQUERY_TAG + matcher.group(2), nextExplainedStmt);
        }
        // Append the rest of the input string
        if (pos < explainedSubquery.length()) {
            sb.append(explainedSubquery.substring(pos));
        }
        return sb.toString();
    }

    public void explainPlan_recurse(StringBuilder sb, String indent) {
        String extraIndent = " ";
        // Except when verbosely debugging,
        // skip projection nodes basically (they're boring as all get out)
        boolean skipCurrentNode = ! m_verboseExplainForDebugging
                                 && getPlanNodeType() == PlanNodeType.PROJECTION;
        if (skipCurrentNode) {
            extraIndent = "";
        }
        else {
            if ( ! m_skipInitalIndentationForExplain) {
                sb.append(indent);
            }
            String nodePlan = explainPlanForNode(indent);
            sb.append(nodePlan);

            if (m_verboseExplainForDebugging && m_outputSchema != null) {
                sb.append(indent + " " + m_outputSchema.toExplainPlanString());
            }

            sb.append("\n");
        }

        // Agg < Proj < Limit < Scan
        // Order the inline nodes with integer in ascending order
        TreeMap sort_inlineNodes =
                new TreeMap<>();

        // every inline plan node is unique
        int ii = 4;
        for (AbstractPlanNode inlineNode : m_inlineNodes.values()) {
            if (inlineNode instanceof AggregatePlanNode) {
                sort_inlineNodes.put(0, inlineNode);
            } else if (inlineNode instanceof ProjectionPlanNode) {
                sort_inlineNodes.put(1, inlineNode);
            } else if (inlineNode instanceof LimitPlanNode) {
                sort_inlineNodes.put(2, inlineNode);
            } else if (inlineNode instanceof AbstractScanPlanNode) {
                sort_inlineNodes.put(3, inlineNode);
            } else {
                // any other inline nodes currently ?  --xin
                sort_inlineNodes.put(ii++, inlineNode);
            }
        }
        // inline nodes with ascending order as their integer keys
        for (AbstractPlanNode inlineNode : sort_inlineNodes.values()) {
            // don't bother with inlined projections
            if (( ! m_verboseExplainForDebugging) &&
                (inlineNode.getPlanNodeType() == PlanNodeType.PROJECTION)) {
                continue;
            }
            inlineNode.setSkipInitalIndentationForExplain(true);

            sb.append(indent + extraIndent + "inline ");
            inlineNode.explainPlan_recurse(sb, indent + extraIndent);
        }

        for (AbstractPlanNode node : m_children) {
            // inline nodes shouldn't have children I hope
            assert(m_isInline == false);
            if (skipCurrentNode) {
                // If the current node is skipped, I would like to pass the skip indentation
                // flag on to the next level.
                node.setSkipInitalIndentationForExplain(m_skipInitalIndentationForExplain);
            }
            node.explainPlan_recurse(sb, indent + extraIndent);
        }
    }

    private boolean m_skipInitalIndentationForExplain = false;
    public void setSkipInitalIndentationForExplain(boolean skip) {
        m_skipInitalIndentationForExplain = skip;
    }

    protected abstract String explainPlanForNode(String indent);

    public ArrayList getScanNodeList () {
        HashSet visited = new HashSet<>();
        ArrayList collected = new ArrayList<>();
        getScanNodeList_recurse( collected, visited);
        return collected;
    }

    //postorder adding scan nodes
    public void getScanNodeList_recurse(ArrayList collected,
            HashSet visited) {
        if (visited.contains(this)) {
            assert(false): "do not expect loops in plangraph.";
            return;
        }
        visited.add(this);
        for (AbstractPlanNode n : m_children) {
            n.getScanNodeList_recurse(collected, visited);
        }

        for (AbstractPlanNode node : m_inlineNodes.values()) {
            node.getScanNodeList_recurse(collected, visited);
        }
    }

    public ArrayList getPlanNodeList () {
        HashSet visited = new HashSet<>();
        ArrayList collected = new ArrayList<>();
        getPlanNodeList_recurse( collected, visited);
        return collected;
    }

    //postorder add nodes
    public void getPlanNodeList_recurse(ArrayList collected,
            HashSet visited) {
        if (visited.contains(this)) {
            assert(false): "do not expect loops in plangraph.";
            return;
        }
        visited.add(this);

        for (AbstractPlanNode n : m_children) {
            n.getPlanNodeList_recurse(collected, visited);
        }
        collected.add(this);
    }

    abstract protected void loadFromJSONObject(JSONObject obj, Database db)
            throws JSONException;

    protected static void loadBooleanArrayFromJSONObject(JSONObject jobj, String key, List target) throws JSONException {
        if ( ! jobj.isNull(key)) {
            JSONArray jarray = jobj.getJSONArray(key);
            int numCols = jarray.length();
            for (int ii = 0; ii < numCols; ++ii) {
                target.add(jarray.getBoolean(ii));
            }
        }
    }

    protected static void booleanArrayToJSONString(JSONStringer stringer, String key, List array) throws JSONException {
        stringer.key(key).array();
        for (Boolean arrayElement : array) {
            stringer.value(arrayElement);
        }
        stringer.endArray();
    }

    protected static NodeSchema loadSchemaFromJSONObject(JSONObject jobj,
            String jsonKey) throws JSONException {
        NodeSchema nodeSchema = new NodeSchema();
        JSONArray jarray = jobj.getJSONArray(jsonKey);
        int size = jarray.length();
        for (int i = 0; i < size; ++i) {
            nodeSchema.addColumn(SchemaColumn.fromJSONObject(
                    jarray.getJSONObject(i)) );
        }
        return nodeSchema;
    }

    protected final void helpLoadFromJSONObject(JSONObject jobj, Database db)
            throws JSONException {
        assert(jobj != null);
        m_id = jobj.getInt(Members.ID.name());

        JSONArray jarray = null;
        //load inline nodes
        if ( ! jobj.isNull(Members.INLINE_NODES.name())) {
            jarray = jobj.getJSONArray( Members.INLINE_NODES.name() );
            PlanNodeTree pnt = new PlanNodeTree();
            pnt.loadPlanNodesFromJSONArrays(jarray, db);
            List list = pnt.getNodeList();
            for( AbstractPlanNode pn : list ) {
                m_inlineNodes.put( pn.getPlanNodeType(), pn);
            }
        }
        //children and parents list loading implemented in planNodeTree.loadFromJsonArray

        // load the output schema if it was marked significant.
        if ( ! jobj.isNull(Members.OUTPUT_SCHEMA.name())) {
            m_hasSignificantOutputSchema = true;
            m_outputSchema = loadSchemaFromJSONObject(jobj,
                    Members.OUTPUT_SCHEMA.name());
        }
    }

    /**
     * @param jobj
     * @param key
     * @return
     * @throws JSONException
     */
    List loadStringListMemberFromJSON(JSONObject jobj, String key)
            throws JSONException {
        if (jobj.isNull(key)) {
            return null;
        }
        JSONArray jarray = jobj.getJSONArray(key);
        int numElems = jarray.length();
        List result = new ArrayList<>(numElems);
        for (int ii = 0; ii < numElems; ++ii) {
            result.add(jarray.getString(ii));
        }
        return result;
    }

    /**
     * @param stringer
     * @param key
     * @param stringList
     * @throws JSONException
     */
    void toJSONStringArrayString(JSONStringer stringer, String key,
            List stringList) throws JSONException {
        stringer.key(key).array();
        for (String elem : stringList) {
            stringer.value(elem);
        }
        stringer.endArray();
    }

    /**
     * @param jobj
     * @param key
     * @return
     * @throws JSONException
     */
    int[] loadIntArrayMemberFromJSON(JSONObject jobj, String key)
            throws JSONException {
        if (jobj.isNull(key)) {
            return null;
        }
        JSONArray jarray = jobj.getJSONArray(key);
        int numElems = jarray.length();
        int[] result = new int[numElems];
        for (int ii = 0; ii < numElems; ++ii) {
            result[ii] = jarray.getInt(ii);
        }
        return result;
    }

    /**
     * @param stringer
     * @param key
     * @param intArray
     * @throws JSONException
     */
    void toJSONIntArrayString(JSONStringer stringer, String key, int[] intArray) throws JSONException {
        stringer.key(key).array();
        for (int i : intArray) {
            stringer.value(i);
        }
        stringer.endArray();
    }

    public boolean reattachFragment(AbstractPlanNode child) {
        for (AbstractPlanNode pn : m_children) {
            if (pn.reattachFragment(child)) {
                return true;
            }
        }
        return false;
    }

    public boolean planNodeClassNeedsProjectionNode() {
        return true;
    }

    /**
     * When a project node is added to the top of the plan, we need to adjust
     * the differentiator field of TVEs to reflect differences in the scan
     * schema vs the storage schema of a table, so that fields with duplicate names
     * produced by expanding "SELECT *" can resolve correctly.
     *
     * We recurse until we find either a join node or a scan node.
     *
     * @param  existing differentiator field of a TVE
     * @return new differentiator value
     */
    public void adjustDifferentiatorField(TupleValueExpression tve) {
        assert (m_children.size() == 1);
        m_children.get(0).adjustDifferentiatorField(tve);
    }
}