com.bigdata.rdf.sparql.ast.StaticAnalysis Maven / Gradle / Ivy

Go to download
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Sep 14, 2011
 */

package com.bigdata.rdf.sparql.ast;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;

import org.apache.log4j.Logger;
import org.openrdf.model.URI;

import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IConstraint;
import com.bigdata.bop.IValueExpression;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.aggregate.IAggregate;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.constraints.INeedsMaterialization;
import com.bigdata.rdf.internal.constraints.INeedsMaterialization.Requirement;
import com.bigdata.rdf.internal.constraints.IPassesMaterialization;
import com.bigdata.rdf.internal.impl.literal.FullyInlineTypedLiteralIV;
import com.bigdata.rdf.sparql.ast.cache.CacheConnectionImpl;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext;
import com.bigdata.rdf.sparql.ast.eval.IEvaluationContext;
import com.bigdata.rdf.sparql.ast.optimizers.ASTBottomUpOptimizer;
import com.bigdata.rdf.sparql.ast.optimizers.ASTLiftPreFiltersOptimizer;
import com.bigdata.rdf.sparql.ast.optimizers.ASTOptimizerList;
import com.bigdata.rdf.sparql.ast.service.ServiceNode;
import com.bigdata.rdf.sparql.ast.ssets.ISolutionSetManager;

/**
 * Methods for static analysis of a query. There is one method which looks "up".
 * This corresponds to how we actually evaluation things (left to right in the
 * query plan). There are two methods which look "down". This corresponds to the
 * bottom-up evaluation semantics of SPARQL.
 * 
 * When determining the "known" bound variables on entry to a node we have to
 * look "up" the tree until we reach the outer most group. Note that named
 * subqueries DO NOT receive bindings from the places where they are INCLUDEd
 * into the query.
 * 
 * 
Analysis of Incoming "Known" Bound Variables (Looking Up)
 * 
 * Static analysis of the incoming "known" bound variables does NOT reflect
 * bottom up evaluation semantics. If a variable binding would not be observed
 * for bottom up evaluation semantics due to a badly designed left join pattern
 * then the AST MUST be rewritten to lift the badly designed left join into a
 * named subquery where it will enjoy effective bottom up evaluation semantics.
 * 
 * Analysis of "must" and "maybe" Bound Variables (Looking Down).
 * 
 * The following classes are producers of bindings and need to be handled by
 * static analysis when looking down the AST tree:
 * 
 * {@link QueryBase}
 * The static analysis of the definitely and maybe bound variables depends
 * on the projection and where clauses.
 * 
 * {@link SubqueryRoot}
 * SPARQL 1.1 subquery. This is just the static analysis of the QueryBase
 * for that subquery.
 * 
 * {@link NamedSubqueryRoot}
 * This is just the static analysis of the QueryBase for that named
 * subquery. Named subqueries are run without any visible bindings EXCEPT those
 * which are exogenous.
 * 
 * {@link NamedSubqueryInclude}
 * The static analysis of the INCLUDE is really the static analysis of the
 * NamedSubqueryRoot which produces that solution set. The incoming known
 * variables are ignored when doing the static analysis of the named subquery
 * root.
 * 
 * {@link ServiceNode}
 * The static analysis of the definitely and maybe bound variables depends
 * on the graph pattern for that service call. This is analyzed like a normal
 * graph pattern. Everything visible in the graph pattern is considered to be
 * projected. As far as I can tell, ServiceNodes are not run "as-bound" and
 * their static analysis is as if they were named subqueries (they have no known
 * bound incoming variables other than those communicated by their
 * BindingsClause).
 * 
 * {@link StatementPatternNode}
 * All variables are definitely bound UNLESS
 * {@link StatementPatternNode#isOptional()} is true.
 * 
 * Note: we sometimes attach a simple optional join to the parent group for
 * efficiency, at which point it becomes an "optional" statement pattern. An
 * optional statement pattern may also have zero or more {@link FilterNode}s
 * associated with it.
 * 
 * {@link JoinGroupNode}
 * 
 * 
 * {@link UnionNode}
 * The definitely bound variables is the intersection of the definitely
 * bound variables in the child join groups. The maybe bound variables is the
 * union of the maybe bound variables in the child join groups.
 * 
 * {@link AssignmentNode}
 * BIND(expr AS var) in a group will not bind the variable if there is an
 * error when evaluating the value expression and does not fail the solution.
 * Thus BIND() in a group contributes to "maybe" bound variables.
 * 
 * Note: BIND() in a PROJECTION is handled differently as it is non-optional (if
 * the value expression results in an error the solution is dropped).
 * Projections are handled when we do the analysis of a QueryBase node since we
 * can see both the WHERE clause and the PROJECTION clauses at the same time.
 * 

 * See  If the
 * evaluation of the expression produces an error, the variable remains unbound
 * for that solution.
 * 
 * IF()
 * 
 * * IF semantics : If evaluating the first argument raises an
 * error, then an error is raised for the evaluation of the IF expression. (This
 * greatly simplifies the analysis of the EBV of the IF value expressions, but
 * there is still uncertainty concerning whether the THEN or the ELSE is
 * executed for a given solution.) However, IF is not allowed to
 * conditionally bind a variable in the THEN/ELSE expressions so we do not have
 * to consider it here.
 * 
 * BOUND(var)
 * Filters which use BOUND() can not be pruned unless we can prove that the
 * variable is not (or is not) bound and also collapse the filter to a constant
 * after substituting either true or false in for the
 * BOUND() expression.
 * 
 * 
 * 
 * FILTERs
 * 
 * FILTERs are groups based on whether they can run before any required joins
 * (pre-), with the required join (join-), or after all joins (post-).
 * 
 * pre-
 * The pre-filters have all their required variables bound on entry to the
 * join group. They should be lifted into the parent join group.
 * join-
 * The join-filters will have all their required variables bound by the time
 * the required joins are done. These filters will wind up attached to the
 * appropriate required join. The specific filter/join attachments depend on the
 * join evaluation order.
 * post-
 * The post-filters might not have all of their required variables bound. We
 * have to wait until the last of the optionals joins has been evaluated before
 * we can evaluate any post-filters, so they run "last".
 * prune-
 * The prune-filters are those whose required variables CAN NOT be bound.
 * They should be pruned from the AST.
 * 
 * 
 * TODO We can probably cache the heck out of things on this class. There is no
 * reason to recompute the SA of the know or maybe/must bound variables until
 * there is an AST change, and the caller can build a new SA when that happens.
 * However, note that we must make the cache sets unmodifiable since there are a
 * lot of patterns which rely on computing the difference between two sets and
 * those can not have a side-effect on the cache.
 * 
 * We could also attach the {@link StaticAnalysis} as an annotation on the
 * {@link QueryRoot} and provide a factory method for accessing it. That way we
 * would have reuse of the cached static analysis data. Each AST optimizer (or
 * the {@link ASTOptimizerList}) would have to clear the cached
 * {@link StaticAnalysis} when producing a new {@link QueryRoot}. Do this when
 * we add an ASTContainer to provide a better home for the queryStr, the parse
 * tree, the original AST, and the optimized AST.
 * 
 * @author Bryan Thompson
 * @version $Id$
 */
public class StaticAnalysis extends StaticAnalysis_CanJoin {

    private static final Logger log = Logger.getLogger(StaticAnalysis.class);

    /**
     * 
     * @param queryRoot
     *            The root of the query. We need to have this on hand in order
     *            to resolve {@link NamedSubqueryInclude}s during static
     *            analysis.
     * 
     * @deprecated By the other form of this constructor. The constructor should
     *             have access to the {@link ISolutionSetStats}, which are on the
     *             {@link AST2BOpContext}. It also needs access to the
     *             {@link CacheConnectionImpl} for named solution sets.
     */
    // Note: Only exposed to the same package for unit tests.
    StaticAnalysis(final QueryRoot queryRoot) {
        
        this(queryRoot, null/* evaluationContext */);

    }

    /**
	 * 
	 * @param queryRoot
	 *            The root of the query. We need to have this on hand in order
	 *            to resolve {@link NamedSubqueryInclude}s during static
	 *            analysis.
	 * @param evaluationContext
	 *            The evaluation context provides access to the
	 *            {@link ISolutionSetStats} and the {@link ISolutionSetManager} for
	 *            named solution sets.
	 * 
	 * @see https://sourceforge.net/apps/trac/bigdata/ticket/412
	 *      (StaticAnalysis#getDefinitelyBound() ignores exogenous variables.)
	 */
    public StaticAnalysis(final QueryRoot queryRoot,
            final IEvaluationContext evaluationContext) {

        super(queryRoot, evaluationContext);

    }

    /**
     * Find and return the parent {@link JoinGroupNode} which is the lowest such
     * {@link JoinGroupNode} dominating the given {@link GraphPatternGroup}.
     * This will search the tree to locate the parent when the
     * {@link GraphPatternGroup} appears as the annotation of a
     * {@link QueryBase}, {@link ServiceNode}, or a {@link FilterNode} having a
     * {@link ExistsNode} or {@link NotExistsNode}.
     * 
     * @param group
     *            The given group.
     * 
     * @return The lowest dominating {@link JoinGroupNode} above that group.
     */
    public JoinGroupNode findParentJoinGroup(final GraphPatternGroup group) {

        final IQueryNode p = findParent(group);

        if (p instanceof JoinGroupNode) {

            return (JoinGroupNode) p;

        } else if (p instanceof UnionNode) {

            return ((UnionNode) p).getParentJoinGroup();

        } else if (p instanceof SubqueryRoot) {

            return ((SubqueryRoot) p).getParentJoinGroup();

        } else if (p instanceof NamedSubqueryRoot || p instanceof QueryRoot) {

            // top level.
            return null;

        } else if (p instanceof ServiceNode) {

            return ((ServiceNode) p).getParentJoinGroup();

        } else if (p instanceof FilterNode) {

            return ((FilterNode) p).getParentJoinGroup();

        }

        throw new UnsupportedOperationException();
        
    }
    
    /**
     * Return the parent of the {@link GraphPatternGroup}. When the group has an
     * explicit parent reference, that reference is returned immediately.
     * Otherwise the {@link QueryRoot} is searched for a node having the given
     * group as an annotation. This makes it possible to locate a
     * {@link QueryBase}, {@link ServiceNode}, {@link ExistsNode}, or
     * {@link NotExistsNode} given its {@link GraphPatternGroup}.
     * 

     * Note: The parent of a {@link SubqueryRoot} is obtained by
     * {@link SubqueryRoot#getParent()} and is simply the {@link JoinGroupNode}
     * in which the {@link SubqueryRoot} appears.
     * 
     * @param group
     *            The group.
     * 
     * @return The parent of that group. This can be any of
     *         {@link GraphPatternGroup}, {@link QueryBase}, {@link ServiceNode}
     *         , or a {@link FilterNode}. This will be null iff the
     *         group does not appear anywhere in the {@link QueryRoot}.
     * 
     *         TODO The parent of a {@link NamedSubqueryRoot} is less well
     *         defined. A {@link NamedSubqueryRoot} may be included in multiple
     *         positions within the AST. Each of those could be considered a
     *         parent of the {@link NamedSubqueryRoot} in the sense that it
     *         provides a context within which the result of the query may be
     *         included. However, for the purposes of bottom up analysis, there
     *         is no parent of a {@link NamedSubqueryRoot}. It runs as if it
     *         were a top-level query (except that it might not have visibility
     *         into exogenous variables?).
     */
    public IQueryNode findParent(final GraphPatternGroup group) {

        return findParent(queryRoot, group);

    }

    public static IQueryNode findParent(final QueryRoot queryRoot,
            final GraphPatternGroup group) {

        if (group == null)
            throw new IllegalArgumentException();

        IQueryNode p = group.getParentGraphPatternGroup();

        if (p != null) {

            return p;

        }

        if (queryRoot.getNamedSubqueries() != null) {

            for (NamedSubqueryRoot namedSubquery : queryRoot
                    .getNamedSubqueries()) {

                @SuppressWarnings("unchecked")
                final GraphPatternGroup whereClause = (GraphPatternGroup) namedSubquery
                        .getWhereClause();

                if (whereClause == group) {

                    return namedSubquery;

                }

                // Check the where clause.
                if ((p = findParent2(whereClause, group)) != null) {

                    return p;

                }

            }

        }

        {

            @SuppressWarnings("unchecked")
            final GraphPatternGroup whereClause = (GraphPatternGroup) queryRoot
                    .getWhereClause();

            if (whereClause == group) {

                return queryRoot;

            }

            // Check the where clause.
            if ((p = findParent2(whereClause, group)) != null) {

                return p;

            }

        }

        // Not found.
        return p;
    }

    /**
     * Search in aGroup for theGroup, peeking into
     * {@link QueryBase#getWhereClause()}, {@link ServiceNode#getGraphPattern()},
     * and all {@link SubqueryFunctionNodeBase} instances for any
     * {@link FilterNode}s.
     * 
     * @param aGroup
     *            A group which might be the "parent" of the group you are
     *            looking for.
     * @param theGroup
     *            The group which you are looking for.
     * 
     * @return The {@link QueryBase}, {@link ServiceNode}, or {@link FilterNode}
     *         which is the "parent" of theGroup.
     */
    static public IQueryNode findParent2(
            final GraphPatternGroup aGroup,
            final GraphPatternGroup theGroup) {

        if (aGroup == theGroup) {
            /*
             * The caller should have reported this. Now we no longer have the
             * context on hand.
             */
            throw new AssertionError();
        }
        
        final int arity = aGroup.arity();

        for (int i = 0; i < arity; i++) {

            final IGroupMemberNode child = (IGroupMemberNode) aGroup.get(i);            
            
            if (child instanceof QueryBase) {

                final QueryBase queryBase = (QueryBase) child;

                if (queryBase.getWhereClause() == theGroup) {

                    return queryBase;
                    
                }

            } else if (child instanceof ServiceNode) {

                final ServiceNode serviceNode = (ServiceNode) child;

                if (serviceNode.getGraphPattern() == theGroup) {

                    return serviceNode;
                    
                }

            } else if (child instanceof FilterNode) {

                final FilterNode filter = (FilterNode) child;
                
                final Iterator itr = BOpUtility
                        .visitAll(filter, SubqueryFunctionNodeBase.class);

                while (itr.hasNext()) {

                    final SubqueryFunctionNodeBase tmp = itr.next();

                    if (tmp.getGraphPattern() == theGroup) {

                        return filter;
                        
                    }
                
                }

            } else if (child instanceof ArbitraryLengthPathNode) {

                final ArbitraryLengthPathNode alpNode = (ArbitraryLengthPathNode) child;
                
                if (alpNode.subgroup() == theGroup) {
                    
                    return alpNode;
                    
                }
                
            }

        }
        
        // Not found.
        return null;

    }

//    /**
//     * Return the set of variables which are "in-scope" for a given node. This
//     * is based on bottom up evaluation semantics rather than the top-down,
//     * left-to-right evaluation order. The "in-scope" variables are the
//     * variables which are locally produced, which are produced in a child
//     * group, or which are produced in the parent when the parent's variables
//     * are in scope for the child (e.g., a FILTER in an OPTIONAL group can see
//     * the variables in the parent group).
//     * 

//     * Note: This method does NOT need to consider exogenous bindings. The scope
//     * of a variable is a completely different thing from whether or not the
//     * variable is must be bound in a given scope. If a variable has an
//     * exogenous binding but is not projected into a query, then it is still not
//     * visible in that query. If it is projected into the query, then it is in
//     * scope regardless of whether or not it has an exogenous binding and
//     * regardless of whether it MUST or MIGHT be bound.
//     * 

//     * This method should be used for bottom up analysis. It SHOULD NOT be used
//     * when you have a specific evaluation order and want to know whether or not
//     * a given variable is incoming bound or produced by a node in the query.
//     * 
//     * @param node
//     *            The node.
//     * @param vars
//     *            The caller's collection.
//     * 
//     * @return The caller's collection.
//     * 
//     * @see http://www.w3.org/TR/sparql11-query/#variableScope
//     * 
//     *      FIXME Test suite and implementation for "in-scope".
//     */
//    public Set> getInScopeVariables(final IGroupMemberNode node,
//            final Set> vars) {
//
//        final GraphPatternGroup tmp;
//        
//        if (node instanceof GraphPatternGroup) {
//
//            /*
//             * When given a group, report on the in-scope variable for this
//             * group.
//             */
//            tmp = (GraphPatternGroup) node;
//
//        } else {
//
//            /*
//             * Report on the in-scope variables
//             */
//            tmp = (GraphPatternGroup) node
//                .getParent();
//
//        }
//
//        getInScopeVars(tmp, vars);
//
//        return vars;
//        
//    }
//
//    /**
//     * Reports on all in-scope variables for a {@link JoinGroupNode} or
//     * {@link UnionNode}.
//     */
//    private Set> getInScopeVars(
//            final GraphPatternGroup group,
//            final Set> vars) {
//
//        for(IGroupMemberNode child : group ) {
//
//            // TODO In scope means produced locally or in scope in the parent
//            // and visible locally.
//            getDefinitelyProducedBindings(sp, vars, false/* recursive */);
//
//        }
//
//        // Plus anything which is in scope in the parent.
//        {
//            
//            final JoinGroupNode p = findParentJoinGroup(group);
//
//            if (p != null) {
//
//                getInScopeVars(p, vars);
//
//            }
//
//        }
//        
//        return vars;
//
//    }
    
    /**
     * Return the set of variables which MUST be bound coming into this group
     * during top-down, left-to-right evaluation. The returned set is based on a
     * non-recursive analysis of the definitely (MUST) bound variables in each
     * of the parent groups. The analysis is non-recursive for each parent
     * group, but all parents of this group are considered. This approach
     * excludes information about variables which MUST or MIGHT be bound from
     * both this group and child groups.
     * 

     * This method DOES NOT pay attention to bottom up variable scoping rules.
     * Queries which are badly designed MUST be rewritten (by lifting out named
     * subqueries) such that they become well designed and adhere to bottom-up
     * evaluation semantics.
     * 
     * @param vars
     *            Where to store the "MUST" bound variables.
     * 
     * @return The argument.
     * 
     *         FIXME Both this and
     *         {@link #getMaybeIncomingBindings(IGroupMemberNode, Set)} need to
     *         consider the exogenous variables. Perhaps modify the
     *         StaticAnalysis constructor to pass in the exogenous
     *         IBindingSet[]?
     * 
     *         FIXME For some purposes we need to consider the top-down,
     *         left-to-right evaluation order. However, for others, such as when
     *         considering whether a variable appearing in a filter will be in
     *         scope, we need to consider whether there exists some evaluation
     *         order for which the variable would be in scope.
     * 
     * @see https://sourceforge.net/apps/trac/bigdata/ticket/412
     *      (StaticAnalysis#getDefinitelyBound() ignores exogenous variables.)
     */
    public Set> getDefinitelyIncomingBindings(
            final IGroupMemberNode node, final Set> vars) {
    
    	/*
    	 * Start by adding globally scoped and exogenous variables.
    	 */
    	if (evaluationContext != null) {
    	   
    	   vars.addAll(evaluationContext.getGloballyScopedVariables());
    		
    	   if (locatedInToplevelQuery(node)) {
       		final ISolutionSetStats stats = evaluationContext.getSolutionSetStats();
       		
       		// only add the vars that are always bound
       		vars.addAll(stats.getAlwaysBound());
    	   }    		
    	}
    	
        final GraphPatternGroup parent = node.getParentGraphPatternGroup();
        
        /*
         * We've reached the root.
         */
        if (parent == null) {
            
            /*
             * FIXME This is unable to look upwards when the group is the graph
             * pattern of a subquery, a service, or a (NOT) EXISTS filter. Unit
             * tests. This could be fixed using a method which searched the
             * QueryRoot for the node having a given join group as its
             * annotation. However, that would not resolve the question of
             * evaluation order versus "in scope" visibility.
             * 
             * Use findParent(...) to fix this, but build up the test coverage
             * before making the code changes.
             */
            return vars;
            
        }

        /*
         * Do the siblings of the node first.  Unless it is a Union.  Siblings
         * don't see each other's bindings in a Union. 
         */
        if (!(parent instanceof UnionNode)) {
            
            for (IGroupMemberNode child : parent) {
                
                /*
                 * We've found ourself. Stop collecting vars.
                 */
                if (child == node) {
                    
                    break;
                    
                }
                
                if (child instanceof IBindingProducerNode) {
                    
                    final boolean optional = child instanceof IJoinNode
                            && ((IJoinNode) child).isOptional();

                    final boolean minus = child instanceof IJoinNode
                            && ((IJoinNode) child).isMinus();
                    
                    if (!optional && !minus) {
                        getDefinitelyProducedBindings(
                                (IBindingProducerNode) child, vars, true/* recursive */);
                    }
                    
                }
                
            }
            
        }
        
        /*
         * Next we recurse upwards to figure out what is definitely bound 
         * coming into the parent.  
         */
        return getDefinitelyIncomingBindings(parent, vars);
        
    }

    /**
     * Returns true if the current node is located (recursively) inside the
     * top-level query, false if it is nested inside a subquery or a
     * named subquery. The method does not look into {@link FilterNode}s,
     * but only recurses into {@link GroupNodeBase} nodes. 
     * 
     * @param node
     * @return
     */
    public boolean locatedInToplevelQuery(IGroupMemberNode node) {
       
       return locatedInGroupNode(queryRoot.getWhereClause(), node);

   }

    /**
     * Returns true if the current node is identical or (recursively) located
     * inside the given group scope or is the group node itself, but not a
     * subquery referenced in the node. The method does not look into
     * {@link FilterNode}s, but only recurses into {@link GroupNodeBase} nodes.
     * 
     * @param theNode the group we're looking in
     * @param theNode the node we're looking for
     * @return
     */
   public boolean locatedInGroupNode(
      final GroupNodeBase theGroup, IGroupMemberNode theNode) {
      
      if (theGroup==null || theNode==null) {
         return false; // not found
      }
      
      if (theGroup==theNode)
         return true;
      
      for (IGroupMemberNode child : theGroup) {
         
         if (child instanceof GroupNodeBase) {
            
            if (locatedInGroupNode((GroupNodeBase)child, theNode))
               return true;
         }
      }
      
      return false; // not found
   }

   
   /**
     * Return the set of variables which MIGHT be bound coming into this group
     * during top-down, left-to-right evaluation. The returned set is based on a
     * non-recursive analysis of the "maybe" bound variables in each of the
     * parent groups. The analysis is non-recursive for each parent group, but
     * all parents of this group are considered. This approach excludes
     * information about variables which MUST or MIGHT be bound from both
     * this group and child groups.
     * 

     * This method DOES NOT pay attention to bottom up variable scoping rules.
     * Queries which are badly designed MUST be rewritten (by lifting out named
     * subqueries) such that they become well designed and adhere to bottom-up
     * evaluation semantics.
     * 
     * @param vars
     *            Where to store the "maybe" bound variables. This includes ANY
     *            variable which MIGHT or MUST be bound.
     * 
     * @return The argument.
     * 
     *         FIXME Both this and
     *         {@link #getDefinitelyIncomingBindings(IGroupMemberNode, Set)}
     *         need to consider the exogenous variables. Perhaps modify the
     *         StaticAnalysis constructor to pass in the exogenous
     *         IBindingSet[]?
     * 
     *         FIXME This is unable to look upwards when the group is the graph
     *         pattern of a subquery, a service, or a (NOT) EXISTS filter.
     *         
     * @see https://sourceforge.net/apps/trac/bigdata/ticket/412
     */
    public Set> getMaybeIncomingBindings(
            final IGroupMemberNode node, final Set> vars) {

    	/*
    	 * Start by adding the exogenous variables.
    	 */
    	if (evaluationContext != null) {
    	   
    	   vars.addAll(evaluationContext.getGloballyScopedVariables());

    	   if (locatedInToplevelQuery(node)) {
    	      
       		final ISolutionSetStats stats = evaluationContext.getSolutionSetStats();
       		
       		// add the vars that are always bound and those that might be bound
       		vars.addAll(stats.getAlwaysBound());
       		vars.addAll(stats.getNotAlwaysBound());
    	   }
    		
    	}
    	
        final GraphPatternGroup parent = node.getParentGraphPatternGroup();
        
        /*
         * We've reached the root.
         */
        if (parent == null) {
            
            return vars;
            
        }

        /*
         * Do the siblings of the node first.  Unless it is a Union.  Siblings
         * don't see each other's bindings in a Union.
         */
        if (!(parent instanceof UnionNode)) {
            
            for (IGroupMemberNode child : parent) {
                
                /*
                 * We've found ourself. Stop collecting vars.
                 */
                if (child == node) {
                    
                    break;
                    
                }
                
                if (child instanceof IBindingProducerNode) {
                    
//                    final boolean optional = child instanceof IJoinNode
//                            && ((IJoinNode) child).isOptional();

                    final boolean minus = child instanceof IJoinNode
                            && ((IJoinNode) child).isMinus();

                    if (/* !optional && */!minus) {
                        /*
                         * MINUS does not produce any bindings, it just removes
                         * solutions. On the other hand, OPTIONAL joins DO
                         * produce bindings, they are just "maybe" bindings.
                         */
                        getMaybeProducedBindings(
                                (IBindingProducerNode) child, vars, true/* recursive */);
                    }
                    
                }
                
            }
            
        }
        
        /*
         * Next we recurse upwards to figure out what is definitely bound 
         * coming into the parent.  
         */
        return getMaybeIncomingBindings(parent, vars);
        
    }

    /**
     * Return the set of variables which MUST be bound for solutions after the
     * evaluation of this group. A group will produce "MUST" bindings for
     * variables from its statement patterns and a LET based on an expression
     * whose variables are known bound.
     * 

     * The returned collection reflects "bottom-up" evaluation semantics. This
     * method does NOT consider variables which are already bound on entry to
     * the group.
     * 

     * Note: When invoked for an OPTIONAL or MINUS join group, the variables
     * which would become bound during the evaluation of the join group are
     * reported. Caller's who wish to NOT have variables reported for OPTIONAL
     * or MINUS groups MUST NOT invoke this method for those groups.
     * 

     * Note: The recursive analysis does not throw out variables when part of
     * the tree will provably fail to bind anything. It is the role of query
     * optimizers to identify those situations and prune the AST appropriately.
     * 
     * @param node
     *            The node to be analyzed.
     * @param vars
     *            Where to store the "MUST" bound variables.
     * @param recursive
     *            When true, the child groups will be recursively
     *            analyzed. When false, only this group will
     *            be analyzed.
     * 
     * @return The argument.
     */
    public Set> getDefinitelyProducedBindings(
            final IBindingProducerNode node, final Set> vars,
            final boolean recursive) {

        if (node instanceof GraphPatternGroup) {
        
            if (node instanceof JoinGroupNode) {
            
                getDefinitelyProducedBindings((JoinGroupNode) node, vars,
                        recursive);
                
            } else if (node instanceof UnionNode) {
                
                getDefinitelyProducedBindings((UnionNode) node, vars, recursive);
                
            } else {
                
                throw new AssertionError(node.toString());
                
            }

        } else if(node instanceof StatementPatternNode) {

            final StatementPatternNode sp = (StatementPatternNode) node;
            
//            if(!sp.isOptional()) {
//
//                // Only if the statement pattern node is a required join.
                vars.addAll(sp.getProducedBindings());
//                
//            }
        } else if (node instanceof PropertyPathNode) {
            
            final PropertyPathNode ppn = (PropertyPathNode) node;
            vars.addAll(ppn.getProducedBindings());
            
        } else if (node instanceof ArbitraryLengthPathNode) {
        	
        	vars.addAll(((ArbitraryLengthPathNode) node).getDefinitelyProducedBindings());
        	
        } else if (node instanceof ZeroLengthPathNode) {
        	
        	vars.addAll(((ZeroLengthPathNode) node).getProducedBindings());
        	
        } else if(node instanceof SubqueryRoot) {

            final SubqueryRoot subquery = (SubqueryRoot) node;

            vars.addAll(getDefinitelyProducedBindings(subquery));

        } else if (node instanceof NamedSubqueryInclude) {

            final NamedSubqueryInclude nsi = (NamedSubqueryInclude) node;

            final String name = nsi.getName();
            
			final NamedSubqueryRoot nsr = getNamedSubqueryRoot(name);

			if (nsr != null) {

				vars.addAll(getDefinitelyProducedBindings(nsr));

			} else {

                final ISolutionSetStats stats = getSolutionSetStats(name);

                /*
                 * Note: This is all variables which are bound in ALL solutions.
                 */

                vars.addAll(stats.getAlwaysBound());

			}

        } else if(node instanceof ServiceNode) {

            final ServiceNode service = (ServiceNode) node;

            vars.addAll(getDefinitelyProducedBindings(service));

        } else if(node instanceof AssignmentNode) {
            
            /*
             * Note: BIND() in a group is only a "maybe" because the spec says
             * that an error when evaluating a BIND() in a group will not fail
             * the solution.
             * 
             * @see http://www.w3.org/TR/sparql11-query/#assignment (
             * "If the evaluation of the expression produces an error, the
             * variable remains unbound for that solution.")
             */

        } else if(node instanceof FilterNode) {

            // NOP.

        } else if(node instanceof BindingsClause) {

            final BindingsClause bc = (BindingsClause) node;
            
            vars.addAll(bc.getDeclaredVariables());
            
        } else {

            throw new AssertionError(node.toString());
            
        }

        return vars;
      
    }

    /**
     * Collect all variables appearing in the group. This DOES NOT descend
     * recursively into groups. It DOES report variables projected out of named
     * subqueries, SPARQL 1.1 subqueries, and SERVICE calls.
     * 

     * This has the same behavior as a non-recursive call obtain the definitely
     * bound variables PLUS the variables used by the filters in the group.
     * 
     * @param vars
     *            The variables are added to this set.
     * @param group
     *            The group whose variables will be reported.
     * @param includeFilters
     *            When true, variables appearing in FILTERs are
     *            also reported.
     * 
     * @return The caller's set.
     */
    public Set> getDefinitelyProducedBindingsAndFilterVariables( 
            final IGroupNode group,
            final Set> vars) {

        getDefinitelyProducedBindings((IBindingProducerNode) group, vars, false/* recursive */);

        for (IGroupMemberNode op : group) {

            if (op instanceof FilterNode) {

                addAll(vars, op);

            }
            
        }

        return vars;
        
    }

    /**
     * Return the set of variables which MUST or MIGHT be bound after the
     * evaluation of this join group.
     * 

     * The returned collection reflects "bottom-up" evaluation semantics. This
     * method does NOT consider variables which are already bound on entry to
     * the group.
     * 
     * @param vars
     *            Where to store the "MUST" and "MIGHT" be bound variables.
     * @param recursive
     *            When true, the child groups will be recursively
     *            analyzed. When false, only this group will
     *            be analyzed.
     *            
     * @return The caller's set.
     */
    public Set> getMaybeProducedBindings(
            final IBindingProducerNode node,//
            final Set> vars,//
            final boolean recursive) {

        if (node instanceof GraphPatternGroup) {
        
            if (node instanceof JoinGroupNode) {
            
                getMaybeProducedBindings((JoinGroupNode) node, vars,
                        recursive);

            } else if (node instanceof UnionNode) {

                getMaybeProducedBindings((UnionNode) node, vars, recursive);

            } else {

                throw new AssertionError(node.toString());
                
            }

        } else if( node instanceof StatementPatternNode) {

            final StatementPatternNode sp = (StatementPatternNode) node;

//            if(sp.isOptional()) {
//
//                // Only if the statement pattern node is an optional join.
                vars.addAll(sp.getProducedBindings());
//                
//            }

        } else if (node instanceof PropertyPathNode) {
            
            final PropertyPathNode ppn = (PropertyPathNode) node;
            vars.addAll(ppn.getProducedBindings());
            
        } else if (node instanceof ArbitraryLengthPathNode) {
        	
        	vars.addAll(((ArbitraryLengthPathNode) node).getMaybeProducedBindings());
        	
        } else if (node instanceof ZeroLengthPathNode) {
        	
        	vars.addAll(((ZeroLengthPathNode) node).getProducedBindings());
        	
        } else if(node instanceof SubqueryRoot) {

            final SubqueryRoot subquery = (SubqueryRoot) node;

            vars.addAll(getMaybeProducedBindings(subquery));

        } else if (node instanceof NamedSubqueryInclude) {

            final NamedSubqueryInclude nsi = (NamedSubqueryInclude) node;

            final String name = nsi.getName();
            
			final NamedSubqueryRoot nsr = getNamedSubqueryRoot(name);

			if (nsr != null) {

				vars.addAll(getMaybeProducedBindings(nsr));
				
			} else {
				
                final ISolutionSetStats stats = getSolutionSetStats(name);

                /*
                 * Note: This is all variables bound in ANY solution. It MAY
                 * include variables which are NOT bound in some solutions.
                 */

                vars.addAll(stats.getUsedVars());

			}

        } else if(node instanceof ServiceNode) {

            final ServiceNode service = (ServiceNode) node;

            vars.addAll(getMaybeProducedBindings(service));

        } else if(node instanceof AssignmentNode) {

            /*
             * Note: BIND() in a group is only a "maybe" because the spec says
             * that an error when evaluating a BIND() in a group will not fail
             * the solution.
             * 
             * @see http://www.w3.org/TR/sparql11-query/#assignment (
             * "If the evaluation of the expression produces an error, the
             * variable remains unbound for that solution.")
             */

            vars.add(((AssignmentNode) node).getVar());
            
        } else if(node instanceof FilterNode) {

            // NOP

        } else if(node instanceof BindingsClause) {

            final BindingsClause bc = (BindingsClause) node;
            
            vars.addAll(bc.getDeclaredVariables());

        } else {
            
            throw new AssertionError(node.toString());
            
        }

        return vars;
      
    }

    /*
     * Private type specific helper methods.
     */

    // MUST : JOIN GROUP
    Set> getDefinitelyProducedBindings(
            final JoinGroupNode node, final Set> vars,
            final boolean recursive) {
        // Note: always report what is bound when we enter a group. The caller
        // needs to avoid entering a group which is optional if they do not want
        // it's bindings.
//        if(node.isOptional())
//            return vars;
        
        for (IGroupMemberNode child : node) {

            if(!(child instanceof IBindingProducerNode))
                continue;
            
            if (child instanceof StatementPatternNode) {

                final StatementPatternNode sp = (StatementPatternNode) child;

                if (!sp.isOptional()) {
                    
                    /*
                     * Required JOIN (statement pattern).
                     */

                    getDefinitelyProducedBindings(sp, vars, recursive);

                }
                
            } else if (child instanceof ArbitraryLengthPathNode) {
            	
            	vars.addAll(((ArbitraryLengthPathNode) child).getDefinitelyProducedBindings());
            	
            } else if (child instanceof ZeroLengthPathNode) {
            	
            	vars.addAll(((ZeroLengthPathNode) child).getProducedBindings());
            	
            } else if (child instanceof NamedSubqueryInclude
                    || child instanceof SubqueryRoot
                    || child instanceof ServiceNode) {

                /*
                 * Required JOIN (Named solution set, SPARQL 1.1 subquery,
                 * EXISTS, or SERVICE).
                 * 
                 * Note: We have to descend recursively into these structures in
                 * order to determine anything.
                 */

                vars.addAll(getDefinitelyProducedBindings(
                        (IBindingProducerNode) child,
                        new LinkedHashSet>(), true/* recursive */));

            } else if (child instanceof GraphPatternGroup) {

                if (recursive) {

                    // Add anything bound by a child group.

                    final GraphPatternGroup group = (GraphPatternGroup) child;

                    if (!group.isOptional() && !group.isMinus()) {

                        getDefinitelyProducedBindings(group, vars, recursive);

                    }

                }
                
            } else if (child instanceof AssignmentNode) {

                /*
                 * Note: BIND() in a group is only a "maybe" because the spec says
                 * that an error when evaluating a BIND() in a group will not fail
                 * the solution.
                 * 
                 * @see http://www.w3.org/TR/sparql11-query/#assignment (
                 * "If the evaluation of the expression produces an error, the
                 * variable remains unbound for that solution.")
                 */

            } else if(child instanceof FilterNode) {

                // NOP
                
            } else if(child instanceof BindingsClause) {

                final BindingsClause bc = (BindingsClause) child;
                
                vars.addAll(bc.getDeclaredVariables());

            } else if (child instanceof PropertyPathNode) {
                
                getDefinitelyProducedBindings((PropertyPathNode)child, vars, recursive);

            } else {

                throw new AssertionError(child.toString());

            }

        }

        /*
         * Note: Assignments which have an error cause the variable to be left
         * unbound rather than failing the solution. Therefore assignment nodes
         * are handled as "maybe" bound, not "must" bound.
         */

        return vars;

    }

    // MAYBE : JOIN GROUP
    private Set> getMaybeProducedBindings(
            final JoinGroupNode node, final Set> vars,
            final boolean recursive) {

        // Add in anything definitely produced by this group (w/o recursion).
        getDefinitelyProducedBindings(node, vars, false/* recursive */);

        /*
         * Note: Assignments which have an error cause the variable to be left
         * unbound rather than failing the solution. Therefore assignment nodes
         * are handled as "maybe" bound, not "must" bound.
         */

        for (AssignmentNode bind : node.getAssignments()) {

            vars.add(bind.getVar());

        }

        if (recursive) {

            /*
             * Add in anything "maybe" produced by a child group.
             */

            for (IGroupMemberNode child : node) {

                if (child instanceof IBindingProducerNode) {

                    final IBindingProducerNode tmp = (IBindingProducerNode) child;
                    
                    if(tmp instanceof IJoinNode && ((IJoinNode)tmp).isMinus()) {
                        
                        // MINUS never contributes bindings, it only removes
                        // solutions.
                        continue;
                        
                    }

//                    vars.addAll(
                    getMaybeProducedBindings(tmp, vars, recursive)
//                            )
                    ;

                }
                
            }

        }

        return vars;

    }

    // MUST : UNION
    private Set> getDefinitelyProducedBindings(
            final UnionNode node,
            final Set> vars, final boolean recursive) {

        if (!recursive || node.isOptional() || node.isMinus()) {

            // Nothing to contribute
            return vars;
            
        }

        /*
         * Collect all definitely produced bindings from each of the children.
         */
        final Set> all = new LinkedHashSet>();

        final List>> perChildSets = new LinkedList>>();

        for (JoinGroupNode child : node) {

            final Set> childSet = new LinkedHashSet>();
            
            perChildSets.add(childSet);

            getDefinitelyProducedBindings(child, childSet, recursive);

            all.addAll(childSet);
            
        }

        /*
         * Now retain only those bindings which are definitely produced by each
         * child of the union.
         */
        for(Set> childSet : perChildSets) {
            
            all.retainAll(childSet);
            
        }
        
        // These are the variables which are definitely bound by the union.
        vars.addAll(all);
        
        return vars;

    }

    // MAYBE : UNION
    private Set> getMaybeProducedBindings(final UnionNode node,
            final Set> vars, final boolean recursive) {

        if (!recursive) {

            // Nothing to contribute.
            return vars;

        }

        /*
         * Collect all "maybe" bindings from each of the children.
         */
        for (JoinGroupNode child : node) {

            getMaybeProducedBindings(child, vars, recursive);

        }

        return vars;

    }

    /**
     * Report "MUST" bound bindings projected by the query. This involves
     * checking the WHERE clause and the {@link ProjectionNode} for the query.
     * Note that the projection can rename variables. It can also bind a
     * constant on a variable. Variables which are not projected by the query
     * will NOT be reported.
     * 
     * FIXME For a top-level query, any exogenously bound variables are also
     * definitely bound (in a subquery they are definitely bound if they are
     * projected into the subquery).
     * 
     * TODO  In the case when the variable is bound to an expression
     *       and the expression may execute with an error, this
     *       method incorrectly reports that variable as definitely bound
     *       see trac 750
     * 
     * @see https://sourceforge.net/apps/trac/bigdata/ticket/412
     *      (StaticAnalysis#getDefinitelyBound() ignores exogenous variables.)
     *      
     * @see http://sourceforge.net/apps/trac/bigdata/ticket/430 (StaticAnalysis
     *      does not follow renames of projected variables)
     *      
     * @see http://sourceforge.net/apps/trac/bigdata/ticket/750
     *      artificial test case fails, currently wontfix
     */
    // MUST : QueryBase
    public Set> getDefinitelyProducedBindings(final QueryBase queryBase) {

        final ProjectionNode projection = queryBase.getProjection();
        
        if(projection == null) {

            // If there is no projection then there is nothing to report.
            return new LinkedHashSet>();

        }

        // The set of definitely bound variables in the query.
        final Set> definitelyBound = new LinkedHashSet>();
        
        @SuppressWarnings("unchecked")
        final GraphPatternGroup whereClause = queryBase.getWhereClause();

        if (whereClause != null) {

            getDefinitelyProducedBindings(whereClause, definitelyBound, true/* recursive */);
            
            if (log.isInfoEnabled()) {
            	log.info(whereClause);
            	log.info(definitelyBound);
            }

        }

        /*
         * Now, we need to consider each select expression in turn. There are
         * several cases:
         * 
         * 1. Projection of a constant.
         * 
         * 2. Projection of a variable under the same name.
         * 
         * 3. Projection of a variable under a different name.
         * 
         * 4. Projection of a select expression which is not an aggregate.
         * 
         * This case is the one explored in trac750, and the code
         * below while usually correct is incorrect if the expression
         * can evaluate with an error - in which case the variable
         * will remain unbound.
         * 
         * 5. Projection of a select expression which is an aggregate. This case
         * is tricky. A select expression that is an aggregate which evaluates
         * to an error will cause an unbound value for to be reported for the
         * projected variable for the solution in which the error is computed.
         * Therefore, we must not assume that aggregation expressions MUST be
         * bound. (Given the schema flexible nature of RDF data, it is very
         * difficult to prove that an aggregate expression will never result in
         * an error without actually running the aggregation query.)
         * 
         * 6. Projection of an exogenously bound variable which is in scope.
         * 
         * TODO (6) is not yet handled! We need to know what variables are in
         * scope at each level as we descend into subqueries. Even if we know
         * the set of exogenous variables, the in scope exogenous varaibles are
         * not available in the typical invocation context.
         */
        {

            final boolean isAggregate = isAggregate(queryBase);
            
            /*
             * The set of projected variables which are definitely bound.
             */
            final Set> tmp = new LinkedHashSet>();

            for (AssignmentNode bind : projection) {

                if (bind.getValueExpression() instanceof IConstant) {

                    /*
                     * 1. The projection of a constant.
                     * 
                     * Note: This depends on pre-evaluation of constant
                     * expressions. If the expression has not been reduced to a
                     * constant then it will not be detected by this test!
                     */

                    tmp.add(bind.getVar());

                    continue;

                }

                if (bind.getVar().equals(bind.getValueExpression())) {

                    if (definitelyBound.contains(bind.getVar())) {

                        /*
                         * 2. The projection of a definitely bound variable
                         * under the same name.
                         */

                        tmp.add(bind.getVar());

                    }
                    
                    continue;

                }

                if (bind.getValueExpression() instanceof IVariable) {

                    if (definitelyBound.contains(bind.getValueExpression())) {

                        /*
                         * 3. The projection of a definitely bound variable
                         * under a different name.
                         */

                        tmp.add(bind.getVar());

                    }

                    continue;

                }

                if (!isAggregate) {

                    /*
                     * 4. The projection of a select expression which is not an
                     * aggregate. Normally, the projected variable will be 
                     * bound if all components of the select expression are
                     * definitely bound: this comment ignores the possibility
                     * that the expression may raise an error, in which case
                     * this block of code is incorrect.
                     * As of Oct 11, 2013 - we are no-fixing this
                     * because of caution about the performance impact, 
                     * and it seeming to be a corner case. See trac 750.
                     * 
                     * TODO Does coalesce() change the semantics for this
                     * analysis? If any of the values for coalesce() is
                     * definitely bound, then the coalesce() will produce a
                     * value. Can coalesce() be used to propagate an unbound
                     * value? If so, then we must either not assume that any
                     * value expression involving coalesce() is definitely bound
                     * or we must do a more detailed analysis of the value
                     * expression.
                     */
                    final Set> usedVars = getSpannedVariables(
                            (BOp) bind.getValueExpression(),
                            new LinkedHashSet>());

                    usedVars.removeAll(definitelyBound);

                    if (!usedVars.isEmpty()) {

                        /*
                         * There is at least one variable which is used by the
                         * select expression which is not definitely bound.
                         */
                        continue;

                    }

                    /*
                     * All variables used by the select expression are
                     * definitely bound so the projected variable for that
                     * select expression will be definitely bound.
                     */
                    tmp.add(bind.getVar());

                } else {
                	/* 5. Projection of a select expression which is an aggregate.
                	 * We do nothing
                	 */
                }
            	/* 6. Projection of an exogenously bound variable which is in scope.
            	 * We incorrectly do nothing
            	 */
                
            }

            return tmp;

        }

    }

    /**
     * Report the "MUST" and "MAYBE" bound bindings projected by the query. This
     * reduces to reporting the projected variables. We do not need to analyze
     * the whereClause or projection any further in order to know what "might"
     * be projected.
     */
    // MAYBE : QueryBase
    public Set> getMaybeProducedBindings(final QueryBase node) {

        final Set> vars = new LinkedHashSet>();

        final ProjectionNode projection = node.getProjection();
        
        if(projection == null) {

            // If there is no projection then there is nothing to report.
            return vars;

        }

        return projection.getProjectionVars(vars);
        
    }

    /**
     * Report "MUST" bound bindings projected by the SERVICE. This involves
     * checking the graph pattern reported by
     * {@link ServiceNode#getGraphPattern()}.
     * 

     * Note: If the SERVICE URI is a variable, then it can only become bound
     * through some other operation. If the SERVICE variable never becomes
     * bound, then the SERVICE call can not run.
     */
    // MUST : ServiceNode
    public Set> getDefinitelyProducedBindings(
            final ServiceNode node) {

        final Set> vars = new LinkedHashSet>();

        final GraphPatternGroup graphPattern = (GraphPatternGroup) node
                .getGraphPattern();

        if (graphPattern != null) {

            getDefinitelyProducedBindings(graphPattern, vars, true/* recursive */);

        }

        return vars;

    }

    /**
     * Report the "MUST" and "MAYBE" bound variables projected by the service.
     * This involves checking the graph pattern reported by
     * {@link ServiceNode#getGraphPattern()}. A SERVICE does NOT have an
     * explicit PROJECTION so it can not rename the projected bindings.
     */
    // MAY : ServiceNode
    public Set> getMaybeProducedBindings(final ServiceNode node) {

        final Set> vars = new LinkedHashSet>();
        
        final GraphPatternGroup graphPattern = (GraphPatternGroup) node.getGraphPattern();

        if (graphPattern != null) {

            getMaybeProducedBindings(graphPattern, vars, true/* recursive */);

        }

        return vars;

    }

    /*
     * FILTERS analysis for JoinGroupNodes
     */
    
    /**
     * Return only the filter child nodes in this group that will be fully bound
     * before running any of the joins in this group.
     * 

     * Note: Anything returned by this method should be lifted into the parent
     * group since it can be run before this group is evaluated. By lifting the
     * pre-filters into the parent group we can avoid issuing as many as-bound
     * subqueries for this group since those which fail the filter will not be
     * issued.
     * 
     * @param group
     *            The {@link JoinGroupNode}.
     * 
     * @return The filters which should either be run before the non-optional
     *         join graph or (preferably) lifted into the parent group.
     * 
     * @see ASTLiftPreFiltersOptimizer
     */
    public List getPreFilters(final JoinGroupNode group) {

        /*
         * Get the variables known to be bound starting out.
         */
        final Set> knownBound = getDefinitelyIncomingBindings(group,
                new LinkedHashSet>());

        /*
         * Get the filters that are bound by this set of known bound variables.
         */
        final List filters = getBoundFilters(group,
                knownBound);

        return filters;

    }

    /**
     * Return only the filter child nodes in this group whose variables were not
     * fully bound on entry into the join group but which will be fully bound no
     * later than once we have run the required joins in this group.
     * 
     * @param group
     *            The {@link JoinGroupNode}.
     * 
     * @return The filters to be attached to the non-optional join graph for
     *         this group.
     */
    public List getJoinFilters(final JoinGroupNode group) {

        /*
         * Get the variables known to be bound starting out.
         */
        final Set> knownBound = getDefinitelyIncomingBindings(group,
                new LinkedHashSet>());

        /*
         * Add all the "must" bound variables for this group.
         * 
         * Note: We do not recursively compute the "must" bound variables for
         * this step because we are only interested in a FILTER which can be
         * attached to a non-optional JOIN run within this group.
         */
        getDefinitelyProducedBindings(group, knownBound, false/* recursive */);
        
        /*
         * Get the filters that are bound by this set of known bound variables.
         */
        final List filters = getBoundFilters(group,
                knownBound);

        /*
         * Remove the preConditional filters (those fully bound by just incoming
         * bindings).
         */
        filters.removeAll(getPreFilters(group));
        
        return filters;
        
    }

    /**
     * Return only the filter child nodes in this group that will not be fully
     * bound even after running the required joins in this group.
     * 

     * Note: It is possible that some of these filters will be fully bound due
     * to nested optionals and unions.
     * 

     * Note: This will report any filters which are not pre-filters and are
     * not-join filters, including filters which are prune-filters. An AST
     * optimizer is responsible for identifying and removing filters which
     * should be pruned. Until they have been pruned, they will continue to be
     * reported by this method.
     * 
     * @param group
     *            The {@link JoinGroupNode}.
     * 
     * @return The filters to be run last in the group (after the nested
     *         optionals and unions).
     */
    public List getPostFilters(final JoinGroupNode group) {

        /*
         * Start with all the filters in this group.
         */
        final List filters = group.getAllFiltersInGroup();

        /*
         * Get the variables known to be bound starting out.
         */
        final Set> knownBound = getDefinitelyIncomingBindings(group,
                new LinkedHashSet>());

        /*
         * Add all the "must" bound variables for this group.
         * 
         * Note: We do not recursively compute the "must" bound variables for
         * this step because we are only interested in FILTERs which can be
         * attached to a required JOIN run within this group. However, this
         * SHOULD consider statement pattern joins, named subquery include
         * joins, SPARQL 1.1 subquery joins, and service call joins -- all of
         * which are required joins.
         */
        getDefinitelyProducedBindings(group, knownBound, false/* recursive */);

        /*
         * Get the filters that are bound by this set of known bound variables.
         */
        final Collection preAndJoinFilters = getBoundFilters(group,
                knownBound);

        /*
         * Remove the preFilters and joinFilters, leaving only the postFilters.
         * 
         * Note: This approach deliberately will report any filter which would
         * not have already been run for the group.
         */
        filters.removeAll(preAndJoinFilters);

        return filters;
        
    }

    /**
     * Return any filters can not succeed based on the "incoming", "must" and
     * "may" bound variables for this group. These filters are candidates for
     * pruning.
     * 

     * Note: Filters containing a {@link FunctionNode} for
     * {@link FunctionRegistry#BOUND} MUST NOT be pruned and are NOT reported by
     * this method.
     * 
     * @param group
     *            The {@link JoinGroupNode}.
     * 
     * @return The filters which are known to fail.
     * 
     *         TODO It is possible to prune a BOUND(?x) or NOT BOUND(?x) filter
     *         through a more detailed analysis of the value expression. If the
     *         variable ?x simply does not appear in the group or
     *         any child of that group, then BOUND(?x) can be replaced by
     *         false and NOT BOUND(?x) by true.
     *         

     *         However, in order to do this we must also look at any exogenous
     *         solution(s) (those supplied with the query when it is being
     *         evaluated). If the variable is bound in some exogenous solutions
     *         then it could be bound when the FILTER is run and the filter can
     *         not be pruned.
     * 
     * @deprecated This is now handled by {@link ASTBottomUpOptimizer}. I think
     *             that we will not need this method (it is only invoked from
     *             the test suite at this point).
     */
    public List getPruneFilters(final JoinGroupNode group) {

        /*
         * Start with all the filters in this group.
         */
        final List filters = group.getAllFiltersInGroup();

        /*
         * Get the variables known to be bound starting out.
         */
        final Set> maybeBound = getDefinitelyIncomingBindings(group, new LinkedHashSet>());

        /*
         * Add all "must" / "may" bound variables for this group (recursively).
         */
        getMaybeProducedBindings(group, maybeBound, true/* recursive */);

        /*
         * Get the filters that are bound by this set of "maybe" bound variables.
         */
        final Collection maybeFilters = getBoundFilters(group,
                maybeBound);

        /*
         * Remove the maybe bound filters, leaving only those which can not
         * succeed.
         */
        filters.removeAll(maybeFilters);
        
        /*
         * Collect all maybeFilters which use BOUND(). These can not be failed
         * as easily.
         */
        
        final Set isBoundFilters = new LinkedHashSet();
        
        for (FilterNode filter : maybeFilters) {

            final IValueExpressionNode node = filter.getValueExpressionNode();
            
            if (node instanceof FunctionNode) {
            
                if (((FunctionNode) node).isBound()) {
                
                    isBoundFilters.add(filter);
                    
                }
                
            }
            
        }

        // Remove filters which use BOUND().
        filters.removeAll(isBoundFilters);
        
        return filters;
        
    }
    
    /**
     * Helper method to determine the set of filters that will be fully bound
     * assuming the specified set of variables is bound.
     */
    private final List getBoundFilters(
            final JoinGroupNode group, final Set> knownBound) {

        final List filters = new LinkedList();

        for (IQueryNode node : group) {

            if (!(node instanceof FilterNode))
                continue;

            final FilterNode filter = (FilterNode) node;

            final Set> filterVars = filter.getConsumedVars();

            boolean allBound = true;

            for (IVariable v : filterVars) {

                allBound &= knownBound.contains(v);

            }

            if (allBound) {

                filters.add(filter);

            }

        }

        return filters;

    }

    /*
     * Materialization pipeline support.
     */
    
    /**
     * Use the {@link INeedsMaterialization} interface to find and collect
     * variables that need to be materialized for this constraint.
     */
    @SuppressWarnings("rawtypes")
    public static boolean requiresMaterialization(final IConstraint c) {
    
        return StaticAnalysis.gatherVarsToMaterialize(c,
                new LinkedHashSet>()) != Requirement.NEVER;
    
    }
    
    /**
     * Static helper used to determine materialization requirements.
     * 
     * TODO This should also reason about datatype constraints on variables. If
     * we know that a variable is constrained in a given scope to only take on a
     * data type which is associated with an {@link FullyInlineTypedLiteralIV}
     * or a specific numeric data type, then some operators may be able to
     * operate directly on that {@link IV}. This is especially interesting for
     * aggregates.
     */
    @SuppressWarnings("rawtypes")
    public static INeedsMaterialization.Requirement gatherVarsToMaterialize(
            final BOp c, final Set> terms) {
    
        boolean materialize = false;
        boolean always = false;
        
        final Iterator it = BOpUtility.preOrderIterator(c);
        
        while (it.hasNext()) {
            
            final BOp bop = it.next();
            
            if (bop instanceof INeedsMaterialization) {
                
                final INeedsMaterialization bop2 = (INeedsMaterialization) bop;
                
                final Set> t = getVarsFromArguments(bop);
                
                if (t.size() > 0) {
                    
                    terms.addAll(t);
                    
                    materialize = true;
                    
                    // if any bops have terms that always needs materialization
                    // then mark the whole constraint as such
                    if (bop2.getRequirement() == Requirement.ALWAYS) {
                        
                        always = true;
                        
                    }
                    
                }
                
            }
    
        }
    
        return materialize ? (always ? Requirement.ALWAYS
                : Requirement.SOMETIMES) : Requirement.NEVER;
    
    }

    @SuppressWarnings({ "rawtypes", "unchecked" })
    private static Set> getVarsFromArguments(final BOp c) {
    
        final int arity = c.arity();
        
        final Set> terms = new LinkedHashSet>(arity);
    
        for (int i = 0; i < arity; i++) {
    
            final BOp arg = c.get(i);
    
            if (arg != null) {
    
                if (arg instanceof IValueExpression
                        && arg instanceof IPassesMaterialization) {
    
                    terms.addAll(getVarsFromArguments(arg));
    
                } else if (arg instanceof IVariable) {
    
                    terms.add((IVariable) arg);
    
                }
    
            }
    
        }
    
        return terms;
    
    }

    /**
     * Identify the join variables for the specified INCLUDE for the position
     * within the query in which it appears.
     * 
     * @param aNamedSubquery
     *            The named subquery.
     * @param anInclude
     *            An include for that subquery.
     */
    public Set> getJoinVars(
            final NamedSubqueryRoot aNamedSubquery,
            final NamedSubqueryInclude anInclude, final Set> vars) {

        return _getJoinVars(aNamedSubquery, anInclude, vars);

    }
    
    /**
     * Identify the join variables for the specified subquery for the position
     * within the query in which it appears.
     * 
     * @param aSubquery
     *            The subquery.
     * @param vars
     * 
     * @return The join variables.
     */
    public Set> getJoinVars(final SubqueryRoot subquery,
            final Set> vars) {

        return _getJoinVars(subquery, subquery, vars);

    }
    
    /**
     * Identify the join variables for the specified subquery for the position
     * within the query in which it appears. For a named subquery, it considers
     * the position in which the INCLUDE appears.
     * 
     * @param aSubquery
     *            Either a {@link NamedSubqueryRoot} or a {@link SubqueryRoot}.
     * @param theNode
     *            The node which represents the subquery in the join group. For
     *            a named subquery, this will be a {@link NamedSubqueryInclude}.
     *            For a {@link SubqueryRoot}, it is just the
     *            {@link SubqueryRoot} itself.
     * 
     * @return The join variables.
     */
    private Set> _getJoinVars(final SubqueryBase aSubquery,
            final IGroupMemberNode theNode, final Set> vars) {

        /*
         * The variables which are projected by the subquery which will be
         * definitely bound based on an analysis of the subquery.
         */
        final Set> boundBySubquery = getDefinitelyProducedBindings(aSubquery);

        if (log.isInfoEnabled()) {
        	log.info(boundBySubquery);
        }
        
        /*
         * The variables which are possibly bound on entry to the join group
         * in which the subquery appears.
         */
        final Set> incomingBindings = getDefinitelyIncomingBindings(
                theNode, new LinkedHashSet>());
        
        if (log.isInfoEnabled()) {
        	log.info(incomingBindings);
        }
        
        /*
         * This is only those variables which are bound on entry into the group
         * in which the subquery join appears *and* which are "must" bound
         * variables projected by the subquery.
         */
        boundBySubquery.retainAll(incomingBindings);
            
        if (log.isInfoEnabled()) {
        	log.info(boundBySubquery);
        }
        
        vars.addAll(boundBySubquery);

        if (log.isInfoEnabled()) {
        	log.info(vars);
        }
        
        return vars;

    }
    
    /**
     * Return the join variables for a SERVICE.
     * 
     * @param serviceNode
     * @param vars
     * @return 
     */
    public Set> getJoinVars(final ServiceNode serviceNode,
            final Set> vars) {

        /*
         * The variables which will be definitely bound based on an analysis of
         * the SERVICE.
         */
        final Set> boundByService = getDefinitelyProducedBindings(serviceNode);

        /*
         * The variables which are definitely bound on entry to the join group
         * in which the SERVICE appears.
         */
        final Set> incomingBindings = getDefinitelyIncomingBindings(
                serviceNode, new LinkedHashSet>());
        
        /*
         * This is only those variables which are bound on entry into the group
         * in which the SERVICE join appears *and* which are "must" bound
         * variables projected by the SERVICE.
         */
        boundByService.retainAll(incomingBindings);
            
        vars.addAll(boundByService);

        return vars;

    }
    
    /**
     * Return the join variables for a VALUES clause (embedded only - not
     * top-level).
     * 
     * @param bc The VALUES clause (a bunch of solutions)
     * @param stats A static analysis of those solutions.
     * @param vars
     * @return 
     */
    public Set> getJoinVars(final BindingsClause bc,
            final ISolutionSetStats stats,
            final Set> vars) {

        /*
         * The variables which will be definitely bound based on the solutions
         * in the VALUES clause.
         * 
         * Note: Collection is not modifyable, so we copy it.
         */
        final Set> boundByBindingsClause = new LinkedHashSet>(
                stats.getAlwaysBound());

        /*
         * The variables which are definitely bound on entry to the join group
         * in which the VALUES clause appears.
         */
        final Set> incomingBindings = getDefinitelyIncomingBindings(
                bc, new LinkedHashSet>());
        
        /*
         * This is only those variables which are bound on entry into the group
         * in which the VALUES join appears *and* which are "must" bound
         * variables projected by the VALUES.
         * 
         * FIXME Is this the correct semantics? I followed the pattern for SERVICE.
         */
        boundByBindingsClause.retainAll(incomingBindings);
            
        vars.addAll(boundByBindingsClause);

        return vars;

    }
    

    /**
	 * Return the join variables for an INCLUDE of a pre-existing named solution
	 * set.
	 * 
	 * @param nsi
	 *            The {@link NamedSubqueryInclude}
	 * @param solutionSet
	 *            The name of a pre-existing solution set.
	 * @param vars
	 *            The caller's collection.
	 *            
	 * @return The caller's collection.
	 */
	public Set> getJoinVars(final NamedSubqueryInclude nsi,
			final String solutionSet, final Set> vars) {

		final String name = solutionSet;
		
        /*
         * The variables which will be definitely bound based on the statistics
         * collected for that solution set.
         */
		final ISolutionSetStats stats = getSolutionSetStats(name);
		
		/*
		 * All variables which are bound in each solution of this solution set.
		 * 
		 * Note: The summary data for a named solution set is typically
		 * immutable, so we insert the variables into a mutable collection in
		 * order to make changes to that collection below.
		 */
		final Set> boundInSolutionSet = new LinkedHashSet>(
				stats.getAlwaysBound());

		/*
		 * The variables which are definitely bound on entry to the INCLUDE
		 * operator based on the static analysis of the query, including where
		 * it appears in the join order of the query.
		 */
		final Set> incomingBindings = getDefinitelyIncomingBindings(
				nsi, new LinkedHashSet>());
        
        /*
		 * This is only those variables which are bound on entry into the
		 * INCLUDE *and* which are "must" bound variables projected by the
		 * pre-existing named solution set.
		 */
        boundInSolutionSet.retainAll(incomingBindings);
            
        vars.addAll(boundInSolutionSet);

        return vars;

    }
    
    /**
     * Return any variables which are used after the given node in the current
     * ordering of its parent {@link JoinGroupNode} but DOES NOT consider the
     * parent or the PROJECTION for the query in which this group appears.
     * 
     * @param node
     *            A node which is a direct child of some {@link JoinGroupNode}.
     * @param vars
     *            Where to store the variables.
     * 
     * @return The caller's set.
     * 
     * @throws IllegalArgumentException
     *             if the node is not the direct child of some
     *             {@link JoinGroupNode}.
     */
    public Set> getAfterVars(final IGroupMemberNode node,
            final Set> vars) {

        if (node.getParent() == null) {
            // Immediate parent MUST be defined.
            throw new IllegalArgumentException();
        }

        if (!(node.getParent() instanceof JoinGroupNode)) {
            // Immediate parent MUST be a join group node.
            throw new IllegalArgumentException();
        }
        
        final JoinGroupNode p = node.getParentJoinGroup();
        
        boolean found = false;
        
        for (IGroupMemberNode c : p) {
        
            if (found) {
            
                // Add in any variables referenced after this proxy node.
                getSpannedVariables((BOp) c, true/* filters */, vars);
                
            }

            if (c == node) {
            
                // Found the position of the proxy node in the group.
                found = true;
                
            }

        }

        assert found;
        
        return vars;
        
    }

    /**
     * Return the set of variables which must be projected if the group is to be
     * converted into a sub-query. This method identifies variables which are
     * either MUST or MIGHT bound outside of the group which are also used
     * within the group and includes them in the projection. It also identified
     * variables used after the group (in the current evaluation order) which
     * are also used within the group and include them in the projection.
     * 

     * When considering the projection of the (sub-)query in which the group
     * appears, the SELECT EXPRESSIONS are consulted to identify variables which
     * we need to project out of the group.
     * 
     * @param proxy
     *            The join group which will be replaced by a sub-query. This is
     *            used to decide which variables are known bound (and hence
     *            should be projected into the WHERE clause if they are used
     *            within that WHERE clause). It is also used to decide which
     *            variables which become bound in the WHERE clause will be used
     *            outside of its scope and hence must be projected out of the
     *            WHERE clause. (The parent of this proxy MUST be a
     *            {@link JoinGroupNode}, not a {@link UnionNode} and not
     *            null. This condition is readily satisified if the
     *            rewrite is considering the children of some join group node as
     *            the parent of the proxy will be that join group node.)
     * @param groupToLift
     *            The group which is being lifted out and whose projection will
     *            be computed.
     * @param query
     *            The query (or sub-query) in which that proxy node exists. This
     *            is used to identify anything which is PROJECTed out of the
     *            query.
     * @param exogenousVars
     *            Any variables which are bound outside of the query AND known
     *            to be in scope (exogenous variables in a sub-select are only
     *            in scope if they are projected into the sub-select).
     * @param projectedVars
     *            The variables which must be projected will be added to this
     *            collection.
     * @return The projection.
     * 
     *         TODO We should recognize conditions under which this can be made
     *         into a DISTINCT projection. This involves a somewhat tricky
     *         analysis of the context in which each projected variable is used.
     *         There is *substantial* benefit to be gained from this analysis as
     *         a DISTINCT projection can radically reduce the size of the
     *         intermediate solution sets and the work performed by the overall
     *         query. However, if the analysis is incorrect and we mark the
     *         PROJECTION as DISTINCT when that is not allowed by the semantics
     *         of the query, then the query will not have the same behavior. So,
     *         getting this analysis correct is very important.
     */
    public Set> getProjectedVars(
            final IGroupMemberNode proxy,
            final GraphPatternGroup groupToLift,//
            final QueryBase query,// 
            final Set> exogenousVars,//
            final Set> projectedVars) {

        // All variables which are used within the WHERE clause.
        final Set> groupVars = getSpannedVariables(groupToLift,
                new LinkedHashSet>());

        /*
         * Figure out what we need to project INTO the group.
         */
        
        // All variables which might be incoming bound into the proxy node.
        final Set> beforeVars = getMaybeIncomingBindings(
                proxy, new LinkedHashSet>());

        // Add in anything which is known to be bound outside of the query.
        beforeVars.addAll(exogenousVars);

        // Drop anything not used within the group.
        beforeVars.retainAll(groupVars);

        /*
         * Figure out what we need to project FROM the group.
         */

        // All variables used after the proxy node in its's parent join group.
        final Set> afterVars = getAfterVars(proxy,
                new LinkedHashSet>());
        
        // Gather the variables used by the SELECT EXPRESSIONS which are
        // projected out of the query in which this group appears.
        query.getSelectExprVars(afterVars);

        // Drop anything not used within the group.
        afterVars.retainAll(groupVars);
        
        /*
         * The projection for the group is anything MAYBE bound on entry to the
         * group which is also used within the group PLUS anything used after
         * the group which is used within the group.
         */
        projectedVars.addAll(beforeVars);
        projectedVars.addAll(afterVars);
        
        return projectedVars;

    }

    /**
     * Return true if any of the {@link ProjectionNode},
     * {@link GroupByNode}, or {@link HavingNode} indicate that this is an
     * aggregation query.
     * 
     * @param query
     *            The query.
     * 
     * @return trueif it is an aggregation query.
     */
    public static boolean isAggregate(final QueryBase query) {

        return isAggregate(query.getProjection(), query.getGroupBy(),
                query.getHaving());

    }

    /**
     * Return true if any of the {@link ProjectionNode},
     * {@link GroupByNode}, or {@link HavingNode} indicate that this is an
     * aggregation query. All arguments are optional.
     */
    public static boolean isAggregate(final ProjectionNode projection,
            final GroupByNode groupBy, final HavingNode having) {

        if (groupBy != null && !groupBy.isEmpty())
            return true;

        if (having != null && !having.isEmpty())
            return true;

        if (projection != null) {

            for (IValueExpressionNode exprNode : projection) {

                if (isAggregateExpressionNode(exprNode)) {
                    return true;
                }

            }

        }

        return false;

    }

    /**
     * Checks if given expression node is or contains any aggregates
     * 


     * After refactoring of SPARQL parser (https://jira.blazegraph.com/browse/BLZG-1176),
     * AggregationNode needs to be checked recuresively, as its value expression is not completely parsed, but could be an aggregate, that should result in failing checks while preparing queries.
     * For example, following test is failing without this check: com.bigdata.rdf.sail.sparql.BigdataSPARQL2ASTParserTest.test_agg10() 
     * 
     * @param exprNode - expression node to be checked
     */
    private static boolean isAggregateExpressionNode(IValueExpressionNode exprNode) {
        
                final IValueExpression expr = exprNode.getValueExpression();

                if (expr == null) {
                    
                    /*
                     * The value expression is not cached....
                     */
                    
                    if (exprNode instanceof AssignmentNode) {
                        return isAggregateExpressionNode(((AssignmentNode) exprNode).getValueExpressionNode());
                    }

                    if (exprNode instanceof FunctionNode) {

                        /*
                         * Hack used when the BigdataExprBuilder needs to decide
                         * on the validity of aggregate expressions before we
                         * get around to caching the value expressions during
                         * evaluation (i.e., to pass the compliance tests for
                         * the parser).
                         */
                        final FunctionNode functionNode = (FunctionNode) exprNode;

                        if (FunctionRegistry.isAggregate(functionNode
                                .getFunctionURI()))

                            return true;

                    }
                    
                    return false;

                }

                if (isObviousAggregate(expr)) {

                    return true;

                }
                return false;
    }

    /**
     * Return true iff the {@link IValueExpression} is an obvious
     * aggregate (it uses an {@link IAggregate} somewhere within it). This is
     * used to identify projections which are aggregates when they are used
     * without an explicit GROUP BY or HAVING clause.
     * 

     * Note: Value expressions can be "non-obvious" aggregates when considered
     * in the context of a GROUP BY, HAVING, or even a SELECT expression where
     * at least one argument is a known aggregate. For example, a constant is an
     * aggregate when it appears in a SELECT expression for a query which has a
     * GROUP BY clause. Another example: any value expression used in a GROUP BY
     * clause is an aggregate when the same value expression appears in the
     * SELECT clause.
     * 

     * This method is only to find the "obvious" aggregates which signal that a
     * bare SELECT clause is in fact an aggregation.
     * 
     * @param expr
     *            The expression.
     * 
     * @return true iff it is an obvious aggregate.
     */
    private static boolean isObviousAggregate(final IValueExpression expr) {

        if (expr instanceof IAggregate)
            return true;

        final Iterator itr = expr.argIterator();

        while (itr.hasNext()) {

            final IValueExpression arg = (IValueExpression) itr.next();

            if (arg != null) {

                if (isObviousAggregate(arg)) // recursion.
                    return true;

            }

        }

        return false;

    }
    
    /**
     * Extract the set of variables contained in a binding set.
     * @param bss
     * @return
     */
    @SuppressWarnings("rawtypes")
    public Set> getVarsInBindingSet(final List bss) {
       Set> bssVars = new HashSet>();
       for (int i=0; i bsVars = bs.vars();
          
          while (bsVars.hasNext()) {
             bssVars.add(bsVars.next());
          }
          
       }
       return bssVars;
    }
    
    /**
     * Checks whether the filter node's value expression node is in CNF.
     */
    static public boolean isCNF(final FilterNode filter) {
       return isCNF(filter.getValueExpressionNode());
    }

    /**
     * Checks whether the given value expression node is in CNF.
     * 
     * @param vexpr
     * 
     */
    static public boolean isCNF(final IValueExpressionNode vexpr) {
       
       if(!(vexpr instanceof FunctionNode)) {
          return true; 
       }
       
       final FunctionNode functionNode = (FunctionNode)vexpr;
       final URI functionURI = functionNode.getFunctionURI();
       
       if (functionURI.equals(FunctionRegistry.NOT)) {
          
          return isCNFNegationOrTerminal(functionNode);
          
       } else if (functionURI.equals(FunctionRegistry.OR)) {
          
          return isCNFDisjunct(functionNode);
          
       } else if (functionURI.equals(FunctionRegistry.AND)) {
          
          return isCNF((ValueExpressionNode)functionNode.get(0)) &&
                   isCNF((ValueExpressionNode)functionNode.get(1));
          
       } else {

          return true;  // everything else is a terminal

       }
    }


    /**
     * Check if filter node is an inner disjunct within a CNF. In particular,
     * it must not contain any other conjunctive nodes.
     * 
     * @param functionNode
     * @return
     */
    static public boolean isCNFDisjunct(final FunctionNode functionNode) {

       final URI functionURI = functionNode.getFunctionURI();
       
       if (functionURI.equals(FunctionRegistry.NOT)) {
          
          return isCNFNegationOrTerminal(functionNode);
          
       } else if (functionURI.equals(FunctionRegistry.OR)) {
          
          boolean isCNFDisjunct = 
             !(functionNode.get(0) instanceof FunctionNode) ||
             isCNFDisjunct((FunctionNode)functionNode.get(0));
          
          isCNFDisjunct &= 
                !(functionNode.get(1) instanceof FunctionNode) ||
                isCNFDisjunct((FunctionNode)functionNode.get(1));
          
          return isCNFDisjunct;        
          
       } else if (functionURI.equals(FunctionRegistry.AND)) {
          
          return false; // not allowed
          
       } else {
          
          return true; // everything else is a terminal
       }
    }


    /**
     * Check if filter node is a negation (possibly recursive) or terminal
     * within a CNF. In particular, it must not contain any other disjuncts
     * or conjuncts.
     * 
     * @param functionNode
     */
    static public boolean isCNFNegationOrTerminal(final FunctionNode functionNode) {

       final URI functionURI = functionNode.getFunctionURI();
       if (functionURI.equals(FunctionRegistry.AND) || 
           functionURI.equals(FunctionRegistry.OR)) {
          
          return false;
          
       } else if (functionURI.equals(FunctionRegistry.NOT)) {
          
          final BOp bop = functionNode.get(0);
          if (!(bop instanceof FunctionNode)) {

             return true; // terminal
             
          } else {
          
             return isCNFNegationOrTerminal((FunctionNode)bop);
             
          }
          
       } else {
          
          return true;  // everything else is a terminal
       }
    }


    /**
     * Returns the corresponding (equivalent) value expression in CNF. Makes
     * a copy of the original value expression, leaving it unmodified.
     * 
     * @param vexpr
     * @return null if the value expression is already in CNF, an equivalent
     *         value expression in CNF otherwise
     */
    static public IValueExpressionNode toCNF(final IValueExpressionNode vexpr) {
       
       final IValueExpressionNode copy = 
          (IValueExpressionNode)BOpUtility.deepCopy((BOp) vexpr);
       
       return pushDisjuncts(pushNegations(copy));
    }


    /**
     * Recursively pushes negations down the operator tree, such that in the
     * returned node, negations are always at the bottom of the tree. In
     * particular, all AND and OR value expressions will be situated above
     * negations. 
     * 
     * The resulting {@link IValueExpressionNode} is logically equivalent.
     */
    static public IValueExpressionNode pushNegations(IValueExpressionNode vexp) {

       if(!(vexp instanceof FunctionNode)) {
          return vexp;
       }
       
       final FunctionNode functionNode = (FunctionNode)vexp;
       final URI functionURI = functionNode.getFunctionURI();
       
       if (functionURI.equals(FunctionRegistry.NOT)) {
          
          final IValueExpressionNode inner = 
             (IValueExpressionNode) functionNode.get(0);
          
          if(inner instanceof FunctionNode) {
             
             final FunctionNode innerFunctionNode = (FunctionNode)inner;
             final URI innerFunctionURI = innerFunctionNode.getFunctionURI();
             
             if (innerFunctionURI.equals(FunctionRegistry.AND)) {

                final IValueExpressionNode negLeft = 
                   pushNegations(
                      FunctionNode.NOT(
                         (ValueExpressionNode)innerFunctionNode.get(0)));
                final IValueExpressionNode negRight = 
                   pushNegations(
                      FunctionNode.NOT(
                         (ValueExpressionNode)innerFunctionNode.get(1)));
                
                return FunctionNode.OR(
                   (ValueExpressionNode)negLeft, 
                   (ValueExpressionNode)negRight);
                
             } else if (innerFunctionURI.equals(FunctionRegistry.OR)) {

                final IValueExpressionNode negLeft = 
                   pushNegations(
                      FunctionNode.NOT(
                         (ValueExpressionNode)innerFunctionNode.get(0)));
                final IValueExpressionNode negRight = 
                   pushNegations(
                      FunctionNode.NOT(
                         (ValueExpressionNode)innerFunctionNode.get(1)));
                   
                return FunctionNode.AND(
                   (ValueExpressionNode)negLeft, 
                   (ValueExpressionNode)negRight);
                   
             } else if (innerFunctionURI.equals(FunctionRegistry.NOT)) {
               
                // drop double negation
                final BOp innerInner = innerFunctionNode.get(0);
                functionNode.setArg(0, innerInner);
                
                // recurse if necessary
                if (innerInner instanceof IValueExpressionNode) {
                   return pushNegations((IValueExpressionNode)innerInner);
                }
                
             } else if (innerFunctionURI.equals(FunctionRegistry.EQ)) {
                
                // invert: = -> !=
                return FunctionNode.NE(
                   (ValueExpressionNode)innerFunctionNode.get(0), 
                   (ValueExpressionNode)innerFunctionNode.get(1));
                
             } else if (innerFunctionURI.equals(FunctionRegistry.NE)) {
                
                // invert: != -> =
                return FunctionNode.EQ(
                   (ValueExpressionNode)innerFunctionNode.get(0), 
                   (ValueExpressionNode)innerFunctionNode.get(1));
                              
             } else if (innerFunctionURI.equals(FunctionRegistry.LE)) {
                
                // invert: <= -> >
                return FunctionNode.GT(
                   (ValueExpressionNode)innerFunctionNode.get(0), 
                   (ValueExpressionNode)innerFunctionNode.get(1));

                
             } else if (innerFunctionURI.equals(FunctionRegistry.LT)) {
                
                // invert: < -> >=
                return FunctionNode.GE(
                   (ValueExpressionNode)innerFunctionNode.get(0), 
                   (ValueExpressionNode)innerFunctionNode.get(1));
                
             } else if (innerFunctionURI.equals(FunctionRegistry.GE)) {
                
                // invert: >= -> <
                return FunctionNode.LT(
                   (ValueExpressionNode)innerFunctionNode.get(0), 
                   (ValueExpressionNode)innerFunctionNode.get(1));
                
             } else if (innerFunctionURI.equals(FunctionRegistry.GT)) {
                
                // invert: > -> <=
                return FunctionNode.LE(
                   (ValueExpressionNode)innerFunctionNode.get(0), 
                   (ValueExpressionNode)innerFunctionNode.get(1));

             }
          }
          
       } else if (functionURI.equals(FunctionRegistry.AND)) {

          return FunctionNode.AND(
                (ValueExpressionNode)pushNegations(
                   (IValueExpressionNode) functionNode.get(0)),
                (ValueExpressionNode)pushNegations(
                   (IValueExpressionNode) functionNode.get(1)));  
          
       } else if (functionURI.equals(FunctionRegistry.OR)) {

          return FunctionNode.OR(
             (ValueExpressionNode)pushNegations(
                (IValueExpressionNode) functionNode.get(0)),
             (ValueExpressionNode)pushNegations(
                (IValueExpressionNode) functionNode.get(1)));  
          
       } // else: nothing to be done
       
       return vexp;
    }

    
    /**
     * Recursively pushes logical ORs below logical ANDs in the operator tree, 
     * such that in the returned node all OR expressions are situated below 
     * AND expressions. Expectes that all NOT expressions have been pushed
     * down to the bottom already (otherwise, the behavior is undertermined).
     * 
     * The resulting {@link IValueExpressionNode} is logically equivalent.
     */
    static public IValueExpressionNode pushDisjuncts(
       final IValueExpressionNode vexp) {
       
       if(!(vexp instanceof FunctionNode)) {
          return vexp;
       }
       
       final FunctionNode functionNode = (FunctionNode)vexp;
       final URI functionURI = functionNode.getFunctionURI();
       
       if (functionURI.equals(FunctionRegistry.OR)) {

          // first, recurse, making sure that AND is propagated up in the subtrees
          final IValueExpressionNode left = 
             pushNegations(
                pushDisjuncts((IValueExpressionNode) functionNode.get(0)));
          final IValueExpressionNode right =
             pushNegations(
                pushDisjuncts((IValueExpressionNode) functionNode.get(1)));
          
          /*
           * New conjuncts are basically the cross product disjuncts of the left
           * and right subtree. Note that the special case (where neither the
           * left nor the right subtree has an AND at the top nicely fits in:
           * in that case, leftConjuncts and rightConjuncts have one element,
           * say x and y, and we compute x OR y as the one and only conjunct
           * (thus not changing the tree).
           */
          final List leftConjuncts = 
             extractToplevelConjuncts(
                left, new ArrayList());
          final List rightConjuncts = 
             extractToplevelConjuncts(
                right, new ArrayList());
          
          final List newConjuncts = 
             new ArrayList();
          for (IValueExpressionNode leftConjunct : leftConjuncts) {
             for (IValueExpressionNode rightConjunct : rightConjuncts) {
                
                final IValueExpressionNode newConjunct = 
                   FunctionNode.OR(
                      (ValueExpressionNode)leftConjunct,
                      (ValueExpressionNode)rightConjunct);
                newConjuncts.add(newConjunct);
             }
          }
          
          return toConjunctiveValueExpression(newConjuncts);
          
       } else if (functionURI.equals(FunctionRegistry.AND)) {
          
          // just recurse
          return FunctionNode.AND(
             (ValueExpressionNode)pushDisjuncts(
                (IValueExpressionNode) functionNode.get(0)),
             (ValueExpressionNode)pushDisjuncts(
                (IValueExpressionNode) functionNode.get(1)));
          
       }  // we're done recursing, no disjuncts will be found below this point


       return vexp; // return the (possibly modified) vexp
    }
    

    /** 
     * Extracts all AND-connected conjuncts located at the top of a given
     * value expression node (recursively, unless an operator different from
     * AND is encountered). 
     * 
     * @param vexpNode the value expression node
     * @param nodes set where to store the top level conjuncts in
     * 
     * @return the array of filters
     */
    static public List extractToplevelConjuncts(
          final IValueExpressionNode vexp, List nodes) {
       
       if (vexp instanceof FunctionNode) {

          final FunctionNode functionNode = (FunctionNode)vexp;
          final URI functionURI = functionNode.getFunctionURI();
          
          if (functionURI.equals(FunctionRegistry.AND)) {
             
             extractToplevelConjuncts(
                (ValueExpressionNode)functionNode.get(0), nodes);
             extractToplevelConjuncts(
                (ValueExpressionNode)functionNode.get(1), nodes);
             
             return nodes; // don't record this (complex AND) node
          }
       }

       nodes.add(vexp); // record conjunct (don't recurse)
       return nodes;
       
    }
    
    
    /**
     * Constructs an (unbalanced) tree out of the list of conjuncts.
     * If the conjuncts that are passed in are null or empty, null is returned.
     * 
     * @param conjuncts
     * @return
     */
    static public IValueExpressionNode toConjunctiveValueExpression(
          final List conjuncts) {
       
       if (conjuncts==null || conjuncts.isEmpty()) {
          return null; 
       }

       
       // if the list is unary, we return the one and only conjunct
       if (conjuncts.size()==1) {
          
          return conjuncts.get(0);
          
       } else {
          
          IValueExpressionNode tmp = 
             FunctionNode.AND(
                (ValueExpressionNode)conjuncts.get(0), 
                (ValueExpressionNode)conjuncts.get(1));
          
          for (int i=2; i