com.bigdata.rdf.sparql.ast.StaticAnalysis Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Sep 14, 2011
*/
package com.bigdata.rdf.sparql.ast;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Logger;
import org.openrdf.model.URI;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IConstraint;
import com.bigdata.bop.IValueExpression;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.aggregate.IAggregate;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.constraints.INeedsMaterialization;
import com.bigdata.rdf.internal.constraints.INeedsMaterialization.Requirement;
import com.bigdata.rdf.internal.constraints.IPassesMaterialization;
import com.bigdata.rdf.internal.impl.literal.FullyInlineTypedLiteralIV;
import com.bigdata.rdf.sparql.ast.cache.CacheConnectionImpl;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext;
import com.bigdata.rdf.sparql.ast.eval.IEvaluationContext;
import com.bigdata.rdf.sparql.ast.optimizers.ASTBottomUpOptimizer;
import com.bigdata.rdf.sparql.ast.optimizers.ASTLiftPreFiltersOptimizer;
import com.bigdata.rdf.sparql.ast.optimizers.ASTOptimizerList;
import com.bigdata.rdf.sparql.ast.service.ServiceNode;
import com.bigdata.rdf.sparql.ast.ssets.ISolutionSetManager;
/**
* Methods for static analysis of a query. There is one method which looks "up".
* This corresponds to how we actually evaluation things (left to right in the
* query plan). There are two methods which look "down". This corresponds to the
* bottom-up evaluation semantics of SPARQL.
*
* When determining the "known" bound variables on entry to a node we have to
* look "up" the tree until we reach the outer most group. Note that named
* subqueries DO NOT receive bindings from the places where they are INCLUDEd
* into the query.
*
*
Analysis of Incoming "Known" Bound Variables (Looking Up)
*
* Static analysis of the incoming "known" bound variables does NOT reflect
* bottom up evaluation semantics. If a variable binding would not be observed
* for bottom up evaluation semantics due to a badly designed left join pattern
* then the AST MUST be rewritten to lift the badly designed left join into a
* named subquery where it will enjoy effective bottom up evaluation semantics.
*
* Analysis of "must" and "maybe" Bound Variables (Looking Down).
*
* The following classes are producers of bindings and need to be handled by
* static analysis when looking down the AST tree:
*
* - {@link QueryBase}
* - The static analysis of the definitely and maybe bound variables depends
* on the projection and where clauses.
*
* - {@link SubqueryRoot}
* - SPARQL 1.1 subquery. This is just the static analysis of the QueryBase
* for that subquery.
*
* - {@link NamedSubqueryRoot}
* - This is just the static analysis of the QueryBase for that named
* subquery. Named subqueries are run without any visible bindings EXCEPT those
* which are exogenous.
*
* - {@link NamedSubqueryInclude}
* - The static analysis of the INCLUDE is really the static analysis of the
* NamedSubqueryRoot which produces that solution set. The incoming known
* variables are ignored when doing the static analysis of the named subquery
* root.
*
* - {@link ServiceNode}
* - The static analysis of the definitely and maybe bound variables depends
* on the graph pattern for that service call. This is analyzed like a normal
* graph pattern. Everything visible in the graph pattern is considered to be
* projected. As far as I can tell, ServiceNodes are not run "as-bound" and
* their static analysis is as if they were named subqueries (they have no known
* bound incoming variables other than those communicated by their
* BindingsClause).
*
* - {@link StatementPatternNode}
* - All variables are definitely bound UNLESS
* {@link StatementPatternNode#isOptional()} is
true
.
*
* Note: we sometimes attach a simple optional join to the parent group for
* efficiency, at which point it becomes an "optional" statement pattern. An
* optional statement pattern may also have zero or more {@link FilterNode}s
* associated with it.
*
* - {@link JoinGroupNode}
*
*
* - {@link UnionNode}
* - The definitely bound variables is the intersection of the definitely
* bound variables in the child join groups. The maybe bound variables is the
* union of the maybe bound variables in the child join groups.
*
* - {@link AssignmentNode}
* - BIND(expr AS var) in a group will not bind the variable if there is an
* error when evaluating the value expression and does not fail the solution.
* Thus BIND() in a group contributes to "maybe" bound variables.
*
* Note: BIND() in a PROJECTION is handled differently as it is non-optional (if
* the value expression results in an error the solution is dropped).
* Projections are handled when we do the analysis of a QueryBase node since we
* can see both the WHERE clause and the PROJECTION clauses at the same time.
*
*
* IF()
* -
* *
IF
semantics : If evaluating the first argument raises an
* error, then an error is raised for the evaluation of the IF expression. (This
* greatly simplifies the analysis of the EBV of the IF value expressions, but
* there is still uncertainty concerning whether the THEN or the ELSE is
* executed for a given solution.) However, IF
is not allowed to
* conditionally bind a variable in the THEN/ELSE expressions so we do not have
* to consider it here.
*
* BOUND(var)
* - Filters which use BOUND() can not be pruned unless we can prove that the
* variable is not (or is not) bound and also collapse the filter to a constant
* after substituting either
true
or false
in for the
* BOUND() expression.
*
*
*
* FILTERs
*
* FILTERs are groups based on whether they can run before any required joins
* (pre-), with the required join (join-), or after all joins (post-).
*
* - pre-
* - The pre-filters have all their required variables bound on entry to the
* join group. They should be lifted into the parent join group.
* - join-
* - The join-filters will have all their required variables bound by the time
* the required joins are done. These filters will wind up attached to the
* appropriate required join. The specific filter/join attachments depend on the
* join evaluation order.
* - post-
* - The post-filters might not have all of their required variables bound. We
* have to wait until the last of the optionals joins has been evaluated before
* we can evaluate any post-filters, so they run "last".
* - prune-
* - The prune-filters are those whose required variables CAN NOT be bound.
* They should be pruned from the AST.
*
*
* TODO We can probably cache the heck out of things on this class. There is no
* reason to recompute the SA of the know or maybe/must bound variables until
* there is an AST change, and the caller can build a new SA when that happens.
* However, note that we must make the cache sets unmodifiable since there are a
* lot of patterns which rely on computing the difference between two sets and
* those can not have a side-effect on the cache.
*
* We could also attach the {@link StaticAnalysis} as an annotation on the
* {@link QueryRoot} and provide a factory method for accessing it. That way we
* would have reuse of the cached static analysis data. Each AST optimizer (or
* the {@link ASTOptimizerList}) would have to clear the cached
* {@link StaticAnalysis} when producing a new {@link QueryRoot}. Do this when
* we add an ASTContainer to provide a better home for the queryStr, the parse
* tree, the original AST, and the optimized AST.
*
* @author Bryan Thompson
* @version $Id$
*/
public class StaticAnalysis extends StaticAnalysis_CanJoin {
private static final Logger log = Logger.getLogger(StaticAnalysis.class);
/**
*
* @param queryRoot
* The root of the query. We need to have this on hand in order
* to resolve {@link NamedSubqueryInclude}s during static
* analysis.
*
* @deprecated By the other form of this constructor. The constructor should
* have access to the {@link ISolutionSetStats}, which are on the
* {@link AST2BOpContext}. It also needs access to the
* {@link CacheConnectionImpl} for named solution sets.
*/
// Note: Only exposed to the same package for unit tests.
StaticAnalysis(final QueryRoot queryRoot) {
this(queryRoot, null/* evaluationContext */);
}
/**
*
* @param queryRoot
* The root of the query. We need to have this on hand in order
* to resolve {@link NamedSubqueryInclude}s during static
* analysis.
* @param evaluationContext
* The evaluation context provides access to the
* {@link ISolutionSetStats} and the {@link ISolutionSetManager} for
* named solution sets.
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/412
* (StaticAnalysis#getDefinitelyBound() ignores exogenous variables.)
*/
public StaticAnalysis(final QueryRoot queryRoot,
final IEvaluationContext evaluationContext) {
super(queryRoot, evaluationContext);
}
/**
* Find and return the parent {@link JoinGroupNode} which is the lowest such
* {@link JoinGroupNode} dominating the given {@link GraphPatternGroup}.
* This will search the tree to locate the parent when the
* {@link GraphPatternGroup} appears as the annotation of a
* {@link QueryBase}, {@link ServiceNode}, or a {@link FilterNode} having a
* {@link ExistsNode} or {@link NotExistsNode}.
*
* @param group
* The given group.
*
* @return The lowest dominating {@link JoinGroupNode} above that group.
*/
public JoinGroupNode findParentJoinGroup(final GraphPatternGroup> group) {
final IQueryNode p = findParent(group);
if (p instanceof JoinGroupNode) {
return (JoinGroupNode) p;
} else if (p instanceof UnionNode) {
return ((UnionNode) p).getParentJoinGroup();
} else if (p instanceof SubqueryRoot) {
return ((SubqueryRoot) p).getParentJoinGroup();
} else if (p instanceof NamedSubqueryRoot || p instanceof QueryRoot) {
// top level.
return null;
} else if (p instanceof ServiceNode) {
return ((ServiceNode) p).getParentJoinGroup();
} else if (p instanceof FilterNode) {
return ((FilterNode) p).getParentJoinGroup();
}
throw new UnsupportedOperationException();
}
/**
* Return the parent of the {@link GraphPatternGroup}. When the group has an
* explicit parent reference, that reference is returned immediately.
* Otherwise the {@link QueryRoot} is searched for a node having the given
* group as an annotation. This makes it possible to locate a
* {@link QueryBase}, {@link ServiceNode}, {@link ExistsNode}, or
* {@link NotExistsNode} given its {@link GraphPatternGroup}.
*
* Note: The parent of a {@link SubqueryRoot} is obtained by
* {@link SubqueryRoot#getParent()} and is simply the {@link JoinGroupNode}
* in which the {@link SubqueryRoot} appears.
*
* @param group
* The group.
*
* @return The parent of that group. This can be any of
* {@link GraphPatternGroup}, {@link QueryBase}, {@link ServiceNode}
* , or a {@link FilterNode}. This will be null
iff the
* group does not appear anywhere in the {@link QueryRoot}.
*
* TODO The parent of a {@link NamedSubqueryRoot} is less well
* defined. A {@link NamedSubqueryRoot} may be included in multiple
* positions within the AST. Each of those could be considered a
* parent of the {@link NamedSubqueryRoot} in the sense that it
* provides a context within which the result of the query may be
* included. However, for the purposes of bottom up analysis, there
* is no parent of a {@link NamedSubqueryRoot}. It runs as if it
* were a top-level query (except that it might not have visibility
* into exogenous variables?).
*/
public IQueryNode findParent(final GraphPatternGroup> group) {
return findParent(queryRoot, group);
}
public static IQueryNode findParent(final QueryRoot queryRoot,
final GraphPatternGroup> group) {
if (group == null)
throw new IllegalArgumentException();
IQueryNode p = group.getParentGraphPatternGroup();
if (p != null) {
return p;
}
if (queryRoot.getNamedSubqueries() != null) {
for (NamedSubqueryRoot namedSubquery : queryRoot
.getNamedSubqueries()) {
@SuppressWarnings("unchecked")
final GraphPatternGroup whereClause = (GraphPatternGroup) namedSubquery
.getWhereClause();
if (whereClause == group) {
return namedSubquery;
}
// Check the where clause.
if ((p = findParent2(whereClause, group)) != null) {
return p;
}
}
}
{
@SuppressWarnings("unchecked")
final GraphPatternGroup whereClause = (GraphPatternGroup) queryRoot
.getWhereClause();
if (whereClause == group) {
return queryRoot;
}
// Check the where clause.
if ((p = findParent2(whereClause, group)) != null) {
return p;
}
}
// Not found.
return p;
}
/**
* Search in aGroup for theGroup, peeking into
* {@link QueryBase#getWhereClause()}, {@link ServiceNode#getGraphPattern()},
* and all {@link SubqueryFunctionNodeBase} instances for any
* {@link FilterNode}s.
*
* @param aGroup
* A group which might be the "parent" of the group you are
* looking for.
* @param theGroup
* The group which you are looking for.
*
* @return The {@link QueryBase}, {@link ServiceNode}, or {@link FilterNode}
* which is the "parent" of theGroup.
*/
static public IQueryNode findParent2(
final GraphPatternGroup aGroup,
final GraphPatternGroup> theGroup) {
if (aGroup == theGroup) {
/*
* The caller should have reported this. Now we no longer have the
* context on hand.
*/
throw new AssertionError();
}
final int arity = aGroup.arity();
for (int i = 0; i < arity; i++) {
final IGroupMemberNode child = (IGroupMemberNode) aGroup.get(i);
if (child instanceof QueryBase) {
final QueryBase queryBase = (QueryBase) child;
if (queryBase.getWhereClause() == theGroup) {
return queryBase;
}
} else if (child instanceof ServiceNode) {
final ServiceNode serviceNode = (ServiceNode) child;
if (serviceNode.getGraphPattern() == theGroup) {
return serviceNode;
}
} else if (child instanceof FilterNode) {
final FilterNode filter = (FilterNode) child;
final Iterator itr = BOpUtility
.visitAll(filter, SubqueryFunctionNodeBase.class);
while (itr.hasNext()) {
final SubqueryFunctionNodeBase tmp = itr.next();
if (tmp.getGraphPattern() == theGroup) {
return filter;
}
}
} else if (child instanceof ArbitraryLengthPathNode) {
final ArbitraryLengthPathNode alpNode = (ArbitraryLengthPathNode) child;
if (alpNode.subgroup() == theGroup) {
return alpNode;
}
}
}
// Not found.
return null;
}
// /**
// * Return the set of variables which are "in-scope" for a given node. This
// * is based on bottom up evaluation semantics rather than the top-down,
// * left-to-right evaluation order. The "in-scope" variables are the
// * variables which are locally produced, which are produced in a child
// * group, or which are produced in the parent when the parent's variables
// * are in scope for the child (e.g., a FILTER in an OPTIONAL group can see
// * the variables in the parent group).
// *
// * Note: This method does NOT need to consider exogenous bindings. The scope
// * of a variable is a completely different thing from whether or not the
// * variable is must be bound in a given scope. If a variable has an
// * exogenous binding but is not projected into a query, then it is still not
// * visible in that query. If it is projected into the query, then it is in
// * scope regardless of whether or not it has an exogenous binding and
// * regardless of whether it MUST or MIGHT be bound.
// *
// * This method should be used for bottom up analysis. It SHOULD NOT be used
// * when you have a specific evaluation order and want to know whether or not
// * a given variable is incoming bound or produced by a node in the query.
// *
// * @param node
// * The node.
// * @param vars
// * The caller's collection.
// *
// * @return The caller's collection.
// *
// * @see http://www.w3.org/TR/sparql11-query/#variableScope
// *
// * FIXME Test suite and implementation for "in-scope".
// */
// public Set> getInScopeVariables(final IGroupMemberNode node,
// final Set> vars) {
//
// final GraphPatternGroup tmp;
//
// if (node instanceof GraphPatternGroup>) {
//
// /*
// * When given a group, report on the in-scope variable for this
// * group.
// */
// tmp = (GraphPatternGroup) node;
//
// } else {
//
// /*
// * Report on the in-scope variables
// */
// tmp = (GraphPatternGroup) node
// .getParent();
//
// }
//
// getInScopeVars(tmp, vars);
//
// return vars;
//
// }
//
// /**
// * Reports on all in-scope variables for a {@link JoinGroupNode} or
// * {@link UnionNode}.
// */
// private Set> getInScopeVars(
// final GraphPatternGroup group,
// final Set> vars) {
//
// for(IGroupMemberNode child : group ) {
//
// // TODO In scope means produced locally or in scope in the parent
// // and visible locally.
// getDefinitelyProducedBindings(sp, vars, false/* recursive */);
//
// }
//
// // Plus anything which is in scope in the parent.
// {
//
// final JoinGroupNode p = findParentJoinGroup(group);
//
// if (p != null) {
//
// getInScopeVars(p, vars);
//
// }
//
// }
//
// return vars;
//
// }
/**
* Return the set of variables which MUST be bound coming into this group
* during top-down, left-to-right evaluation. The returned set is based on a
* non-recursive analysis of the definitely (MUST) bound variables in each
* of the parent groups. The analysis is non-recursive for each parent
* group, but all parents of this group are considered. This approach
* excludes information about variables which MUST or MIGHT be bound from
* both this group and child groups.
*
* This method DOES NOT pay attention to bottom up variable scoping rules.
* Queries which are badly designed MUST be rewritten (by lifting out named
* subqueries) such that they become well designed and adhere to bottom-up
* evaluation semantics.
*
* @param vars
* Where to store the "MUST" bound variables.
*
* @return The argument.
*
* FIXME Both this and
* {@link #getMaybeIncomingBindings(IGroupMemberNode, Set)} need to
* consider the exogenous variables. Perhaps modify the
* StaticAnalysis constructor to pass in the exogenous
* IBindingSet[]?
*
* FIXME For some purposes we need to consider the top-down,
* left-to-right evaluation order. However, for others, such as when
* considering whether a variable appearing in a filter will be in
* scope, we need to consider whether there exists some evaluation
* order for which the variable would be in scope.
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/412
* (StaticAnalysis#getDefinitelyBound() ignores exogenous variables.)
*/
public Set> getDefinitelyIncomingBindings(
final IGroupMemberNode node, final Set> vars) {
/*
* Start by adding globally scoped and exogenous variables.
*/
if (evaluationContext != null) {
vars.addAll(evaluationContext.getGloballyScopedVariables());
if (locatedInToplevelQuery(node)) {
final ISolutionSetStats stats = evaluationContext.getSolutionSetStats();
// only add the vars that are always bound
vars.addAll(stats.getAlwaysBound());
}
}
final GraphPatternGroup> parent = node.getParentGraphPatternGroup();
/*
* We've reached the root.
*/
if (parent == null) {
/*
* FIXME This is unable to look upwards when the group is the graph
* pattern of a subquery, a service, or a (NOT) EXISTS filter. Unit
* tests. This could be fixed using a method which searched the
* QueryRoot for the node having a given join group as its
* annotation. However, that would not resolve the question of
* evaluation order versus "in scope" visibility.
*
* Use findParent(...) to fix this, but build up the test coverage
* before making the code changes.
*/
return vars;
}
/*
* Do the siblings of the node first. Unless it is a Union. Siblings
* don't see each other's bindings in a Union.
*/
if (!(parent instanceof UnionNode)) {
for (IGroupMemberNode child : parent) {
/*
* We've found ourself. Stop collecting vars.
*/
if (child == node) {
break;
}
if (child instanceof IBindingProducerNode) {
final boolean optional = child instanceof IJoinNode
&& ((IJoinNode) child).isOptional();
final boolean minus = child instanceof IJoinNode
&& ((IJoinNode) child).isMinus();
if (!optional && !minus) {
getDefinitelyProducedBindings(
(IBindingProducerNode) child, vars, true/* recursive */);
}
}
}
}
/*
* Next we recurse upwards to figure out what is definitely bound
* coming into the parent.
*/
return getDefinitelyIncomingBindings(parent, vars);
}
/**
* Returns true if the current node is located (recursively) inside the
* top-level query, false if it is nested inside a subquery or a
* named subquery. The method does not look into {@link FilterNode}s,
* but only recurses into {@link GroupNodeBase} nodes.
*
* @param node
* @return
*/
public boolean locatedInToplevelQuery(IGroupMemberNode node) {
return locatedInGroupNode(queryRoot.getWhereClause(), node);
}
/**
* Returns true if the current node is identical or (recursively) located
* inside the given group scope or is the group node itself, but not a
* subquery referenced in the node. The method does not look into
* {@link FilterNode}s, but only recurses into {@link GroupNodeBase} nodes.
*
* @param theNode the group we're looking in
* @param theNode the node we're looking for
* @return
*/
public boolean locatedInGroupNode(
final GroupNodeBase> theGroup, IGroupMemberNode theNode) {
if (theGroup==null || theNode==null) {
return false; // not found
}
if (theGroup==theNode)
return true;
for (IGroupMemberNode child : theGroup) {
if (child instanceof GroupNodeBase>) {
if (locatedInGroupNode((GroupNodeBase>)child, theNode))
return true;
}
}
return false; // not found
}
/**
* Return the set of variables which MIGHT be bound coming into this group
* during top-down, left-to-right evaluation. The returned set is based on a
* non-recursive analysis of the "maybe" bound variables in each of the
* parent groups. The analysis is non-recursive for each parent group, but
* all parents of this group are considered. This approach excludes
* information about variables which MUST or MIGHT be bound from both
* this group and child groups.
*
* This method DOES NOT pay attention to bottom up variable scoping rules.
* Queries which are badly designed MUST be rewritten (by lifting out named
* subqueries) such that they become well designed and adhere to bottom-up
* evaluation semantics.
*
* @param vars
* Where to store the "maybe" bound variables. This includes ANY
* variable which MIGHT or MUST be bound.
*
* @return The argument.
*
* FIXME Both this and
* {@link #getDefinitelyIncomingBindings(IGroupMemberNode, Set)}
* need to consider the exogenous variables. Perhaps modify the
* StaticAnalysis constructor to pass in the exogenous
* IBindingSet[]?
*
* FIXME This is unable to look upwards when the group is the graph
* pattern of a subquery, a service, or a (NOT) EXISTS filter.
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/412
*/
public Set> getMaybeIncomingBindings(
final IGroupMemberNode node, final Set> vars) {
/*
* Start by adding the exogenous variables.
*/
if (evaluationContext != null) {
vars.addAll(evaluationContext.getGloballyScopedVariables());
if (locatedInToplevelQuery(node)) {
final ISolutionSetStats stats = evaluationContext.getSolutionSetStats();
// add the vars that are always bound and those that might be bound
vars.addAll(stats.getAlwaysBound());
vars.addAll(stats.getNotAlwaysBound());
}
}
final GraphPatternGroup> parent = node.getParentGraphPatternGroup();
/*
* We've reached the root.
*/
if (parent == null) {
return vars;
}
/*
* Do the siblings of the node first. Unless it is a Union. Siblings
* don't see each other's bindings in a Union.
*/
if (!(parent instanceof UnionNode)) {
for (IGroupMemberNode child : parent) {
/*
* We've found ourself. Stop collecting vars.
*/
if (child == node) {
break;
}
if (child instanceof IBindingProducerNode) {
// final boolean optional = child instanceof IJoinNode
// && ((IJoinNode) child).isOptional();
final boolean minus = child instanceof IJoinNode
&& ((IJoinNode) child).isMinus();
if (/* !optional && */!minus) {
/*
* MINUS does not produce any bindings, it just removes
* solutions. On the other hand, OPTIONAL joins DO
* produce bindings, they are just "maybe" bindings.
*/
getMaybeProducedBindings(
(IBindingProducerNode) child, vars, true/* recursive */);
}
}
}
}
/*
* Next we recurse upwards to figure out what is definitely bound
* coming into the parent.
*/
return getMaybeIncomingBindings(parent, vars);
}
/**
* Return the set of variables which MUST be bound for solutions after the
* evaluation of this group. A group will produce "MUST" bindings for
* variables from its statement patterns and a LET based on an expression
* whose variables are known bound.
*
* The returned collection reflects "bottom-up" evaluation semantics. This
* method does NOT consider variables which are already bound on entry to
* the group.
*
* Note: When invoked for an OPTIONAL or MINUS join group, the variables
* which would become bound during the evaluation of the join group are
* reported. Caller's who wish to NOT have variables reported for OPTIONAL
* or MINUS groups MUST NOT invoke this method for those groups.
*
* Note: The recursive analysis does not throw out variables when part of
* the tree will provably fail to bind anything. It is the role of query
* optimizers to identify those situations and prune the AST appropriately.
*
* @param node
* The node to be analyzed.
* @param vars
* Where to store the "MUST" bound variables.
* @param recursive
* When true
, the child groups will be recursively
* analyzed. When false
, only this group will
* be analyzed.
*
* @return The argument.
*/
public Set> getDefinitelyProducedBindings(
final IBindingProducerNode node, final Set> vars,
final boolean recursive) {
if (node instanceof GraphPatternGroup>) {
if (node instanceof JoinGroupNode) {
getDefinitelyProducedBindings((JoinGroupNode) node, vars,
recursive);
} else if (node instanceof UnionNode) {
getDefinitelyProducedBindings((UnionNode) node, vars, recursive);
} else {
throw new AssertionError(node.toString());
}
} else if(node instanceof StatementPatternNode) {
final StatementPatternNode sp = (StatementPatternNode) node;
// if(!sp.isOptional()) {
//
// // Only if the statement pattern node is a required join.
vars.addAll(sp.getProducedBindings());
//
// }
} else if (node instanceof PropertyPathNode) {
final PropertyPathNode ppn = (PropertyPathNode) node;
vars.addAll(ppn.getProducedBindings());
} else if (node instanceof ArbitraryLengthPathNode) {
vars.addAll(((ArbitraryLengthPathNode) node).getDefinitelyProducedBindings());
} else if (node instanceof ZeroLengthPathNode) {
vars.addAll(((ZeroLengthPathNode) node).getProducedBindings());
} else if(node instanceof SubqueryRoot) {
final SubqueryRoot subquery = (SubqueryRoot) node;
vars.addAll(getDefinitelyProducedBindings(subquery));
} else if (node instanceof NamedSubqueryInclude) {
final NamedSubqueryInclude nsi = (NamedSubqueryInclude) node;
final String name = nsi.getName();
final NamedSubqueryRoot nsr = getNamedSubqueryRoot(name);
if (nsr != null) {
vars.addAll(getDefinitelyProducedBindings(nsr));
} else {
final ISolutionSetStats stats = getSolutionSetStats(name);
/*
* Note: This is all variables which are bound in ALL solutions.
*/
vars.addAll(stats.getAlwaysBound());
}
} else if(node instanceof ServiceNode) {
final ServiceNode service = (ServiceNode) node;
vars.addAll(getDefinitelyProducedBindings(service));
} else if(node instanceof AssignmentNode) {
/*
* Note: BIND() in a group is only a "maybe" because the spec says
* that an error when evaluating a BIND() in a group will not fail
* the solution.
*
* @see http://www.w3.org/TR/sparql11-query/#assignment (
* "If the evaluation of the expression produces an error, the
* variable remains unbound for that solution.")
*/
} else if(node instanceof FilterNode) {
// NOP.
} else if(node instanceof BindingsClause) {
final BindingsClause bc = (BindingsClause) node;
vars.addAll(bc.getDeclaredVariables());
} else {
throw new AssertionError(node.toString());
}
return vars;
}
/**
* Collect all variables appearing in the group. This DOES NOT descend
* recursively into groups. It DOES report variables projected out of named
* subqueries, SPARQL 1.1 subqueries, and SERVICE calls.
*
* This has the same behavior as a non-recursive call obtain the definitely
* bound variables PLUS the variables used by the filters in the group.
*
* @param vars
* The variables are added to this set.
* @param group
* The group whose variables will be reported.
* @param includeFilters
* When true
, variables appearing in FILTERs are
* also reported.
*
* @return The caller's set.
*/
public Set> getDefinitelyProducedBindingsAndFilterVariables(
final IGroupNode extends IGroupMemberNode> group,
final Set> vars) {
getDefinitelyProducedBindings((IBindingProducerNode) group, vars, false/* recursive */);
for (IGroupMemberNode op : group) {
if (op instanceof FilterNode) {
addAll(vars, op);
}
}
return vars;
}
/**
* Return the set of variables which MUST or MIGHT be bound after the
* evaluation of this join group.
*
* The returned collection reflects "bottom-up" evaluation semantics. This
* method does NOT consider variables which are already bound on entry to
* the group.
*
* @param vars
* Where to store the "MUST" and "MIGHT" be bound variables.
* @param recursive
* When true
, the child groups will be recursively
* analyzed. When false
, only this group will
* be analyzed.
*
* @return The caller's set.
*/
public Set> getMaybeProducedBindings(
final IBindingProducerNode node,//
final Set> vars,//
final boolean recursive) {
if (node instanceof GraphPatternGroup>) {
if (node instanceof JoinGroupNode) {
getMaybeProducedBindings((JoinGroupNode) node, vars,
recursive);
} else if (node instanceof UnionNode) {
getMaybeProducedBindings((UnionNode) node, vars, recursive);
} else {
throw new AssertionError(node.toString());
}
} else if( node instanceof StatementPatternNode) {
final StatementPatternNode sp = (StatementPatternNode) node;
// if(sp.isOptional()) {
//
// // Only if the statement pattern node is an optional join.
vars.addAll(sp.getProducedBindings());
//
// }
} else if (node instanceof PropertyPathNode) {
final PropertyPathNode ppn = (PropertyPathNode) node;
vars.addAll(ppn.getProducedBindings());
} else if (node instanceof ArbitraryLengthPathNode) {
vars.addAll(((ArbitraryLengthPathNode) node).getMaybeProducedBindings());
} else if (node instanceof ZeroLengthPathNode) {
vars.addAll(((ZeroLengthPathNode) node).getProducedBindings());
} else if(node instanceof SubqueryRoot) {
final SubqueryRoot subquery = (SubqueryRoot) node;
vars.addAll(getMaybeProducedBindings(subquery));
} else if (node instanceof NamedSubqueryInclude) {
final NamedSubqueryInclude nsi = (NamedSubqueryInclude) node;
final String name = nsi.getName();
final NamedSubqueryRoot nsr = getNamedSubqueryRoot(name);
if (nsr != null) {
vars.addAll(getMaybeProducedBindings(nsr));
} else {
final ISolutionSetStats stats = getSolutionSetStats(name);
/*
* Note: This is all variables bound in ANY solution. It MAY
* include variables which are NOT bound in some solutions.
*/
vars.addAll(stats.getUsedVars());
}
} else if(node instanceof ServiceNode) {
final ServiceNode service = (ServiceNode) node;
vars.addAll(getMaybeProducedBindings(service));
} else if(node instanceof AssignmentNode) {
/*
* Note: BIND() in a group is only a "maybe" because the spec says
* that an error when evaluating a BIND() in a group will not fail
* the solution.
*
* @see http://www.w3.org/TR/sparql11-query/#assignment (
* "If the evaluation of the expression produces an error, the
* variable remains unbound for that solution.")
*/
vars.add(((AssignmentNode) node).getVar());
} else if(node instanceof FilterNode) {
// NOP
} else if(node instanceof BindingsClause) {
final BindingsClause bc = (BindingsClause) node;
vars.addAll(bc.getDeclaredVariables());
} else {
throw new AssertionError(node.toString());
}
return vars;
}
/*
* Private type specific helper methods.
*/
// MUST : JOIN GROUP
Set> getDefinitelyProducedBindings(
final JoinGroupNode node, final Set> vars,
final boolean recursive) {
// Note: always report what is bound when we enter a group. The caller
// needs to avoid entering a group which is optional if they do not want
// it's bindings.
// if(node.isOptional())
// return vars;
for (IGroupMemberNode child : node) {
if(!(child instanceof IBindingProducerNode))
continue;
if (child instanceof StatementPatternNode) {
final StatementPatternNode sp = (StatementPatternNode) child;
if (!sp.isOptional()) {
/*
* Required JOIN (statement pattern).
*/
getDefinitelyProducedBindings(sp, vars, recursive);
}
} else if (child instanceof ArbitraryLengthPathNode) {
vars.addAll(((ArbitraryLengthPathNode) child).getDefinitelyProducedBindings());
} else if (child instanceof ZeroLengthPathNode) {
vars.addAll(((ZeroLengthPathNode) child).getProducedBindings());
} else if (child instanceof NamedSubqueryInclude
|| child instanceof SubqueryRoot
|| child instanceof ServiceNode) {
/*
* Required JOIN (Named solution set, SPARQL 1.1 subquery,
* EXISTS, or SERVICE).
*
* Note: We have to descend recursively into these structures in
* order to determine anything.
*/
vars.addAll(getDefinitelyProducedBindings(
(IBindingProducerNode) child,
new LinkedHashSet>(), true/* recursive */));
} else if (child instanceof GraphPatternGroup>) {
if (recursive) {
// Add anything bound by a child group.
final GraphPatternGroup> group = (GraphPatternGroup>) child;
if (!group.isOptional() && !group.isMinus()) {
getDefinitelyProducedBindings(group, vars, recursive);
}
}
} else if (child instanceof AssignmentNode) {
/*
* Note: BIND() in a group is only a "maybe" because the spec says
* that an error when evaluating a BIND() in a group will not fail
* the solution.
*
* @see http://www.w3.org/TR/sparql11-query/#assignment (
* "If the evaluation of the expression produces an error, the
* variable remains unbound for that solution.")
*/
} else if(child instanceof FilterNode) {
// NOP
} else if(child instanceof BindingsClause) {
final BindingsClause bc = (BindingsClause) child;
vars.addAll(bc.getDeclaredVariables());
} else if (child instanceof PropertyPathNode) {
getDefinitelyProducedBindings((PropertyPathNode)child, vars, recursive);
} else {
throw new AssertionError(child.toString());
}
}
/*
* Note: Assignments which have an error cause the variable to be left
* unbound rather than failing the solution. Therefore assignment nodes
* are handled as "maybe" bound, not "must" bound.
*/
return vars;
}
// MAYBE : JOIN GROUP
private Set> getMaybeProducedBindings(
final JoinGroupNode node, final Set> vars,
final boolean recursive) {
// Add in anything definitely produced by this group (w/o recursion).
getDefinitelyProducedBindings(node, vars, false/* recursive */);
/*
* Note: Assignments which have an error cause the variable to be left
* unbound rather than failing the solution. Therefore assignment nodes
* are handled as "maybe" bound, not "must" bound.
*/
for (AssignmentNode bind : node.getAssignments()) {
vars.add(bind.getVar());
}
if (recursive) {
/*
* Add in anything "maybe" produced by a child group.
*/
for (IGroupMemberNode child : node) {
if (child instanceof IBindingProducerNode) {
final IBindingProducerNode tmp = (IBindingProducerNode) child;
if(tmp instanceof IJoinNode && ((IJoinNode)tmp).isMinus()) {
// MINUS never contributes bindings, it only removes
// solutions.
continue;
}
// vars.addAll(
getMaybeProducedBindings(tmp, vars, recursive)
// )
;
}
}
}
return vars;
}
// MUST : UNION
private Set> getDefinitelyProducedBindings(
final UnionNode node,
final Set> vars, final boolean recursive) {
if (!recursive || node.isOptional() || node.isMinus()) {
// Nothing to contribute
return vars;
}
/*
* Collect all definitely produced bindings from each of the children.
*/
final Set> all = new LinkedHashSet>();
final List>> perChildSets = new LinkedList>>();
for (JoinGroupNode child : node) {
final Set> childSet = new LinkedHashSet>();
perChildSets.add(childSet);
getDefinitelyProducedBindings(child, childSet, recursive);
all.addAll(childSet);
}
/*
* Now retain only those bindings which are definitely produced by each
* child of the union.
*/
for(Set> childSet : perChildSets) {
all.retainAll(childSet);
}
// These are the variables which are definitely bound by the union.
vars.addAll(all);
return vars;
}
// MAYBE : UNION
private Set> getMaybeProducedBindings(final UnionNode node,
final Set> vars, final boolean recursive) {
if (!recursive) {
// Nothing to contribute.
return vars;
}
/*
* Collect all "maybe" bindings from each of the children.
*/
for (JoinGroupNode child : node) {
getMaybeProducedBindings(child, vars, recursive);
}
return vars;
}
/**
* Report "MUST" bound bindings projected by the query. This involves
* checking the WHERE clause and the {@link ProjectionNode} for the query.
* Note that the projection can rename variables. It can also bind a
* constant on a variable. Variables which are not projected by the query
* will NOT be reported.
*
* FIXME For a top-level query, any exogenously bound variables are also
* definitely bound (in a subquery they are definitely bound if they are
* projected into the subquery).
*
* TODO In the case when the variable is bound to an expression
* and the expression may execute with an error, this
* method incorrectly reports that variable as definitely bound
* see trac 750
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/412
* (StaticAnalysis#getDefinitelyBound() ignores exogenous variables.)
*
* @see http://sourceforge.net/apps/trac/bigdata/ticket/430 (StaticAnalysis
* does not follow renames of projected variables)
*
* @see http://sourceforge.net/apps/trac/bigdata/ticket/750
* artificial test case fails, currently wontfix
*/
// MUST : QueryBase
public Set> getDefinitelyProducedBindings(final QueryBase queryBase) {
final ProjectionNode projection = queryBase.getProjection();
if(projection == null) {
// If there is no projection then there is nothing to report.
return new LinkedHashSet>();
}
// The set of definitely bound variables in the query.
final Set> definitelyBound = new LinkedHashSet>();
@SuppressWarnings("unchecked")
final GraphPatternGroup whereClause = queryBase.getWhereClause();
if (whereClause != null) {
getDefinitelyProducedBindings(whereClause, definitelyBound, true/* recursive */);
if (log.isInfoEnabled()) {
log.info(whereClause);
log.info(definitelyBound);
}
}
/*
* Now, we need to consider each select expression in turn. There are
* several cases:
*
* 1. Projection of a constant.
*
* 2. Projection of a variable under the same name.
*
* 3. Projection of a variable under a different name.
*
* 4. Projection of a select expression which is not an aggregate.
*
* This case is the one explored in trac750, and the code
* below while usually correct is incorrect if the expression
* can evaluate with an error - in which case the variable
* will remain unbound.
*
* 5. Projection of a select expression which is an aggregate. This case
* is tricky. A select expression that is an aggregate which evaluates
* to an error will cause an unbound value for to be reported for the
* projected variable for the solution in which the error is computed.
* Therefore, we must not assume that aggregation expressions MUST be
* bound. (Given the schema flexible nature of RDF data, it is very
* difficult to prove that an aggregate expression will never result in
* an error without actually running the aggregation query.)
*
* 6. Projection of an exogenously bound variable which is in scope.
*
* TODO (6) is not yet handled! We need to know what variables are in
* scope at each level as we descend into subqueries. Even if we know
* the set of exogenous variables, the in scope exogenous varaibles are
* not available in the typical invocation context.
*/
{
final boolean isAggregate = isAggregate(queryBase);
/*
* The set of projected variables which are definitely bound.
*/
final Set> tmp = new LinkedHashSet>();
for (AssignmentNode bind : projection) {
if (bind.getValueExpression() instanceof IConstant>) {
/*
* 1. The projection of a constant.
*
* Note: This depends on pre-evaluation of constant
* expressions. If the expression has not been reduced to a
* constant then it will not be detected by this test!
*/
tmp.add(bind.getVar());
continue;
}
if (bind.getVar().equals(bind.getValueExpression())) {
if (definitelyBound.contains(bind.getVar())) {
/*
* 2. The projection of a definitely bound variable
* under the same name.
*/
tmp.add(bind.getVar());
}
continue;
}
if (bind.getValueExpression() instanceof IVariable>) {
if (definitelyBound.contains(bind.getValueExpression())) {
/*
* 3. The projection of a definitely bound variable
* under a different name.
*/
tmp.add(bind.getVar());
}
continue;
}
if (!isAggregate) {
/*
* 4. The projection of a select expression which is not an
* aggregate. Normally, the projected variable will be
* bound if all components of the select expression are
* definitely bound: this comment ignores the possibility
* that the expression may raise an error, in which case
* this block of code is incorrect.
* As of Oct 11, 2013 - we are no-fixing this
* because of caution about the performance impact,
* and it seeming to be a corner case. See trac 750.
*
* TODO Does coalesce() change the semantics for this
* analysis? If any of the values for coalesce() is
* definitely bound, then the coalesce() will produce a
* value. Can coalesce() be used to propagate an unbound
* value? If so, then we must either not assume that any
* value expression involving coalesce() is definitely bound
* or we must do a more detailed analysis of the value
* expression.
*/
final Set> usedVars = getSpannedVariables(
(BOp) bind.getValueExpression(),
new LinkedHashSet>());
usedVars.removeAll(definitelyBound);
if (!usedVars.isEmpty()) {
/*
* There is at least one variable which is used by the
* select expression which is not definitely bound.
*/
continue;
}
/*
* All variables used by the select expression are
* definitely bound so the projected variable for that
* select expression will be definitely bound.
*/
tmp.add(bind.getVar());
} else {
/* 5. Projection of a select expression which is an aggregate.
* We do nothing
*/
}
/* 6. Projection of an exogenously bound variable which is in scope.
* We incorrectly do nothing
*/
}
return tmp;
}
}
/**
* Report the "MUST" and "MAYBE" bound bindings projected by the query. This
* reduces to reporting the projected variables. We do not need to analyze
* the whereClause or projection any further in order to know what "might"
* be projected.
*/
// MAYBE : QueryBase
public Set> getMaybeProducedBindings(final QueryBase node) {
final Set> vars = new LinkedHashSet>();
final ProjectionNode projection = node.getProjection();
if(projection == null) {
// If there is no projection then there is nothing to report.
return vars;
}
return projection.getProjectionVars(vars);
}
/**
* Report "MUST" bound bindings projected by the SERVICE. This involves
* checking the graph pattern reported by
* {@link ServiceNode#getGraphPattern()}.
*
* Note: If the SERVICE URI is a variable, then it can only become bound
* through some other operation. If the SERVICE variable never becomes
* bound, then the SERVICE call can not run.
*/
// MUST : ServiceNode
public Set> getDefinitelyProducedBindings(
final ServiceNode node) {
final Set> vars = new LinkedHashSet>();
final GraphPatternGroup graphPattern = (GraphPatternGroup) node
.getGraphPattern();
if (graphPattern != null) {
getDefinitelyProducedBindings(graphPattern, vars, true/* recursive */);
}
return vars;
}
/**
* Report the "MUST" and "MAYBE" bound variables projected by the service.
* This involves checking the graph pattern reported by
* {@link ServiceNode#getGraphPattern()}. A SERVICE does NOT have an
* explicit PROJECTION so it can not rename the projected bindings.
*/
// MAY : ServiceNode
public Set> getMaybeProducedBindings(final ServiceNode node) {
final Set> vars = new LinkedHashSet>();
final GraphPatternGroup graphPattern = (GraphPatternGroup) node.getGraphPattern();
if (graphPattern != null) {
getMaybeProducedBindings(graphPattern, vars, true/* recursive */);
}
return vars;
}
/*
* FILTERS analysis for JoinGroupNodes
*/
/**
* Return only the filter child nodes in this group that will be fully bound
* before running any of the joins in this group.
*
* Note: Anything returned by this method should be lifted into the parent
* group since it can be run before this group is evaluated. By lifting the
* pre-filters into the parent group we can avoid issuing as many as-bound
* subqueries for this group since those which fail the filter will not be
* issued.
*
* @param group
* The {@link JoinGroupNode}.
*
* @return The filters which should either be run before the non-optional
* join graph or (preferably) lifted into the parent group.
*
* @see ASTLiftPreFiltersOptimizer
*/
public List getPreFilters(final JoinGroupNode group) {
/*
* Get the variables known to be bound starting out.
*/
final Set> knownBound = getDefinitelyIncomingBindings(group,
new LinkedHashSet>());
/*
* Get the filters that are bound by this set of known bound variables.
*/
final List filters = getBoundFilters(group,
knownBound);
return filters;
}
/**
* Return only the filter child nodes in this group whose variables were not
* fully bound on entry into the join group but which will be fully bound no
* later than once we have run the required joins in this group.
*
* @param group
* The {@link JoinGroupNode}.
*
* @return The filters to be attached to the non-optional join graph for
* this group.
*/
public List getJoinFilters(final JoinGroupNode group) {
/*
* Get the variables known to be bound starting out.
*/
final Set> knownBound = getDefinitelyIncomingBindings(group,
new LinkedHashSet>());
/*
* Add all the "must" bound variables for this group.
*
* Note: We do not recursively compute the "must" bound variables for
* this step because we are only interested in a FILTER which can be
* attached to a non-optional JOIN run within this group.
*/
getDefinitelyProducedBindings(group, knownBound, false/* recursive */);
/*
* Get the filters that are bound by this set of known bound variables.
*/
final List filters = getBoundFilters(group,
knownBound);
/*
* Remove the preConditional filters (those fully bound by just incoming
* bindings).
*/
filters.removeAll(getPreFilters(group));
return filters;
}
/**
* Return only the filter child nodes in this group that will not be fully
* bound even after running the required joins in this group.
*
* Note: It is possible that some of these filters will be fully bound due
* to nested optionals and unions.
*
* Note: This will report any filters which are not pre-filters and are
* not-join filters, including filters which are prune-filters. An AST
* optimizer is responsible for identifying and removing filters which
* should be pruned. Until they have been pruned, they will continue to be
* reported by this method.
*
* @param group
* The {@link JoinGroupNode}.
*
* @return The filters to be run last in the group (after the nested
* optionals and unions).
*/
public List getPostFilters(final JoinGroupNode group) {
/*
* Start with all the filters in this group.
*/
final List filters = group.getAllFiltersInGroup();
/*
* Get the variables known to be bound starting out.
*/
final Set> knownBound = getDefinitelyIncomingBindings(group,
new LinkedHashSet>());
/*
* Add all the "must" bound variables for this group.
*
* Note: We do not recursively compute the "must" bound variables for
* this step because we are only interested in FILTERs which can be
* attached to a required JOIN run within this group. However, this
* SHOULD consider statement pattern joins, named subquery include
* joins, SPARQL 1.1 subquery joins, and service call joins -- all of
* which are required joins.
*/
getDefinitelyProducedBindings(group, knownBound, false/* recursive */);
/*
* Get the filters that are bound by this set of known bound variables.
*/
final Collection preAndJoinFilters = getBoundFilters(group,
knownBound);
/*
* Remove the preFilters and joinFilters, leaving only the postFilters.
*
* Note: This approach deliberately will report any filter which would
* not have already been run for the group.
*/
filters.removeAll(preAndJoinFilters);
return filters;
}
/**
* Return any filters can not succeed based on the "incoming", "must" and
* "may" bound variables for this group. These filters are candidates for
* pruning.
*
* Note: Filters containing a {@link FunctionNode} for
* {@link FunctionRegistry#BOUND} MUST NOT be pruned and are NOT reported by
* this method.
*
* @param group
* The {@link JoinGroupNode}.
*
* @return The filters which are known to fail.
*
* TODO It is possible to prune a BOUND(?x) or NOT BOUND(?x) filter
* through a more detailed analysis of the value expression. If the
* variable ?x
simply does not appear in the group or
* any child of that group, then BOUND(?x) can be replaced by
* false
and NOT BOUND(?x) by true
.
*
* However, in order to do this we must also look at any exogenous
* solution(s) (those supplied with the query when it is being
* evaluated). If the variable is bound in some exogenous solutions
* then it could be bound when the FILTER is run and the filter can
* not be pruned.
*
* @deprecated This is now handled by {@link ASTBottomUpOptimizer}. I think
* that we will not need this method (it is only invoked from
* the test suite at this point).
*/
public List getPruneFilters(final JoinGroupNode group) {
/*
* Start with all the filters in this group.
*/
final List filters = group.getAllFiltersInGroup();
/*
* Get the variables known to be bound starting out.
*/
final Set> maybeBound = getDefinitelyIncomingBindings(group, new LinkedHashSet>());
/*
* Add all "must" / "may" bound variables for this group (recursively).
*/
getMaybeProducedBindings(group, maybeBound, true/* recursive */);
/*
* Get the filters that are bound by this set of "maybe" bound variables.
*/
final Collection maybeFilters = getBoundFilters(group,
maybeBound);
/*
* Remove the maybe bound filters, leaving only those which can not
* succeed.
*/
filters.removeAll(maybeFilters);
/*
* Collect all maybeFilters which use BOUND(). These can not be failed
* as easily.
*/
final Set isBoundFilters = new LinkedHashSet();
for (FilterNode filter : maybeFilters) {
final IValueExpressionNode node = filter.getValueExpressionNode();
if (node instanceof FunctionNode) {
if (((FunctionNode) node).isBound()) {
isBoundFilters.add(filter);
}
}
}
// Remove filters which use BOUND().
filters.removeAll(isBoundFilters);
return filters;
}
/**
* Helper method to determine the set of filters that will be fully bound
* assuming the specified set of variables is bound.
*/
private final List getBoundFilters(
final JoinGroupNode group, final Set> knownBound) {
final List filters = new LinkedList();
for (IQueryNode node : group) {
if (!(node instanceof FilterNode))
continue;
final FilterNode filter = (FilterNode) node;
final Set> filterVars = filter.getConsumedVars();
boolean allBound = true;
for (IVariable> v : filterVars) {
allBound &= knownBound.contains(v);
}
if (allBound) {
filters.add(filter);
}
}
return filters;
}
/*
* Materialization pipeline support.
*/
/**
* Use the {@link INeedsMaterialization} interface to find and collect
* variables that need to be materialized for this constraint.
*/
@SuppressWarnings("rawtypes")
public static boolean requiresMaterialization(final IConstraint c) {
return StaticAnalysis.gatherVarsToMaterialize(c,
new LinkedHashSet>()) != Requirement.NEVER;
}
/**
* Static helper used to determine materialization requirements.
*
* TODO This should also reason about datatype constraints on variables. If
* we know that a variable is constrained in a given scope to only take on a
* data type which is associated with an {@link FullyInlineTypedLiteralIV}
* or a specific numeric data type, then some operators may be able to
* operate directly on that {@link IV}. This is especially interesting for
* aggregates.
*/
@SuppressWarnings("rawtypes")
public static INeedsMaterialization.Requirement gatherVarsToMaterialize(
final BOp c, final Set> terms) {
boolean materialize = false;
boolean always = false;
final Iterator it = BOpUtility.preOrderIterator(c);
while (it.hasNext()) {
final BOp bop = it.next();
if (bop instanceof INeedsMaterialization) {
final INeedsMaterialization bop2 = (INeedsMaterialization) bop;
final Set> t = getVarsFromArguments(bop);
if (t.size() > 0) {
terms.addAll(t);
materialize = true;
// if any bops have terms that always needs materialization
// then mark the whole constraint as such
if (bop2.getRequirement() == Requirement.ALWAYS) {
always = true;
}
}
}
}
return materialize ? (always ? Requirement.ALWAYS
: Requirement.SOMETIMES) : Requirement.NEVER;
}
@SuppressWarnings({ "rawtypes", "unchecked" })
private static Set> getVarsFromArguments(final BOp c) {
final int arity = c.arity();
final Set> terms = new LinkedHashSet>(arity);
for (int i = 0; i < arity; i++) {
final BOp arg = c.get(i);
if (arg != null) {
if (arg instanceof IValueExpression
&& arg instanceof IPassesMaterialization) {
terms.addAll(getVarsFromArguments(arg));
} else if (arg instanceof IVariable) {
terms.add((IVariable) arg);
}
}
}
return terms;
}
/**
* Identify the join variables for the specified INCLUDE for the position
* within the query in which it appears.
*
* @param aNamedSubquery
* The named subquery.
* @param anInclude
* An include for that subquery.
*/
public Set> getJoinVars(
final NamedSubqueryRoot aNamedSubquery,
final NamedSubqueryInclude anInclude, final Set> vars) {
return _getJoinVars(aNamedSubquery, anInclude, vars);
}
/**
* Identify the join variables for the specified subquery for the position
* within the query in which it appears.
*
* @param aSubquery
* The subquery.
* @param vars
*
* @return The join variables.
*/
public Set> getJoinVars(final SubqueryRoot subquery,
final Set> vars) {
return _getJoinVars(subquery, subquery, vars);
}
/**
* Identify the join variables for the specified subquery for the position
* within the query in which it appears. For a named subquery, it considers
* the position in which the INCLUDE appears.
*
* @param aSubquery
* Either a {@link NamedSubqueryRoot} or a {@link SubqueryRoot}.
* @param theNode
* The node which represents the subquery in the join group. For
* a named subquery, this will be a {@link NamedSubqueryInclude}.
* For a {@link SubqueryRoot}, it is just the
* {@link SubqueryRoot} itself.
*
* @return The join variables.
*/
private Set> _getJoinVars(final SubqueryBase aSubquery,
final IGroupMemberNode theNode, final Set> vars) {
/*
* The variables which are projected by the subquery which will be
* definitely bound based on an analysis of the subquery.
*/
final Set> boundBySubquery = getDefinitelyProducedBindings(aSubquery);
if (log.isInfoEnabled()) {
log.info(boundBySubquery);
}
/*
* The variables which are possibly bound on entry to the join group
* in which the subquery appears.
*/
final Set> incomingBindings = getDefinitelyIncomingBindings(
theNode, new LinkedHashSet>());
if (log.isInfoEnabled()) {
log.info(incomingBindings);
}
/*
* This is only those variables which are bound on entry into the group
* in which the subquery join appears *and* which are "must" bound
* variables projected by the subquery.
*/
boundBySubquery.retainAll(incomingBindings);
if (log.isInfoEnabled()) {
log.info(boundBySubquery);
}
vars.addAll(boundBySubquery);
if (log.isInfoEnabled()) {
log.info(vars);
}
return vars;
}
/**
* Return the join variables for a SERVICE.
*
* @param serviceNode
* @param vars
* @return
*/
public Set> getJoinVars(final ServiceNode serviceNode,
final Set> vars) {
/*
* The variables which will be definitely bound based on an analysis of
* the SERVICE.
*/
final Set> boundByService = getDefinitelyProducedBindings(serviceNode);
/*
* The variables which are definitely bound on entry to the join group
* in which the SERVICE appears.
*/
final Set> incomingBindings = getDefinitelyIncomingBindings(
serviceNode, new LinkedHashSet>());
/*
* This is only those variables which are bound on entry into the group
* in which the SERVICE join appears *and* which are "must" bound
* variables projected by the SERVICE.
*/
boundByService.retainAll(incomingBindings);
vars.addAll(boundByService);
return vars;
}
/**
* Return the join variables for a VALUES clause (embedded only - not
* top-level).
*
* @param bc The VALUES clause (a bunch of solutions)
* @param stats A static analysis of those solutions.
* @param vars
* @return
*/
public Set> getJoinVars(final BindingsClause bc,
final ISolutionSetStats stats,
final Set> vars) {
/*
* The variables which will be definitely bound based on the solutions
* in the VALUES clause.
*
* Note: Collection is not modifyable, so we copy it.
*/
final Set> boundByBindingsClause = new LinkedHashSet>(
stats.getAlwaysBound());
/*
* The variables which are definitely bound on entry to the join group
* in which the VALUES clause appears.
*/
final Set> incomingBindings = getDefinitelyIncomingBindings(
bc, new LinkedHashSet>());
/*
* This is only those variables which are bound on entry into the group
* in which the VALUES join appears *and* which are "must" bound
* variables projected by the VALUES.
*
* FIXME Is this the correct semantics? I followed the pattern for SERVICE.
*/
boundByBindingsClause.retainAll(incomingBindings);
vars.addAll(boundByBindingsClause);
return vars;
}
/**
* Return the join variables for an INCLUDE of a pre-existing named solution
* set.
*
* @param nsi
* The {@link NamedSubqueryInclude}
* @param solutionSet
* The name of a pre-existing solution set.
* @param vars
* The caller's collection.
*
* @return The caller's collection.
*/
public Set> getJoinVars(final NamedSubqueryInclude nsi,
final String solutionSet, final Set> vars) {
final String name = solutionSet;
/*
* The variables which will be definitely bound based on the statistics
* collected for that solution set.
*/
final ISolutionSetStats stats = getSolutionSetStats(name);
/*
* All variables which are bound in each solution of this solution set.
*
* Note: The summary data for a named solution set is typically
* immutable, so we insert the variables into a mutable collection in
* order to make changes to that collection below.
*/
final Set> boundInSolutionSet = new LinkedHashSet>(
stats.getAlwaysBound());
/*
* The variables which are definitely bound on entry to the INCLUDE
* operator based on the static analysis of the query, including where
* it appears in the join order of the query.
*/
final Set> incomingBindings = getDefinitelyIncomingBindings(
nsi, new LinkedHashSet>());
/*
* This is only those variables which are bound on entry into the
* INCLUDE *and* which are "must" bound variables projected by the
* pre-existing named solution set.
*/
boundInSolutionSet.retainAll(incomingBindings);
vars.addAll(boundInSolutionSet);
return vars;
}
/**
* Return any variables which are used after the given node in the current
* ordering of its parent {@link JoinGroupNode} but DOES NOT consider the
* parent or the PROJECTION for the query in which this group appears.
*
* @param node
* A node which is a direct child of some {@link JoinGroupNode}.
* @param vars
* Where to store the variables.
*
* @return The caller's set.
*
* @throws IllegalArgumentException
* if the node is not the direct child of some
* {@link JoinGroupNode}.
*/
public Set> getAfterVars(final IGroupMemberNode node,
final Set> vars) {
if (node.getParent() == null) {
// Immediate parent MUST be defined.
throw new IllegalArgumentException();
}
if (!(node.getParent() instanceof JoinGroupNode)) {
// Immediate parent MUST be a join group node.
throw new IllegalArgumentException();
}
final JoinGroupNode p = node.getParentJoinGroup();
boolean found = false;
for (IGroupMemberNode c : p) {
if (found) {
// Add in any variables referenced after this proxy node.
getSpannedVariables((BOp) c, true/* filters */, vars);
}
if (c == node) {
// Found the position of the proxy node in the group.
found = true;
}
}
assert found;
return vars;
}
/**
* Return the set of variables which must be projected if the group is to be
* converted into a sub-query. This method identifies variables which are
* either MUST or MIGHT bound outside of the group which are also used
* within the group and includes them in the projection. It also identified
* variables used after the group (in the current evaluation order) which
* are also used within the group and include them in the projection.
*
* When considering the projection of the (sub-)query in which the group
* appears, the SELECT EXPRESSIONS are consulted to identify variables which
* we need to project out of the group.
*
* @param proxy
* The join group which will be replaced by a sub-query. This is
* used to decide which variables are known bound (and hence
* should be projected into the WHERE clause if they are used
* within that WHERE clause). It is also used to decide which
* variables which become bound in the WHERE clause will be used
* outside of its scope and hence must be projected out of the
* WHERE clause. (The parent of this proxy MUST be a
* {@link JoinGroupNode}, not a {@link UnionNode} and not
* null
. This condition is readily satisified if the
* rewrite is considering the children of some join group node as
* the parent of the proxy will be that join group node.)
* @param groupToLift
* The group which is being lifted out and whose projection will
* be computed.
* @param query
* The query (or sub-query) in which that proxy node exists. This
* is used to identify anything which is PROJECTed out of the
* query.
* @param exogenousVars
* Any variables which are bound outside of the query AND known
* to be in scope (exogenous variables in a sub-select are only
* in scope if they are projected into the sub-select).
* @param projectedVars
* The variables which must be projected will be added to this
* collection.
* @return The projection.
*
* TODO We should recognize conditions under which this can be made
* into a DISTINCT projection. This involves a somewhat tricky
* analysis of the context in which each projected variable is used.
* There is *substantial* benefit to be gained from this analysis as
* a DISTINCT projection can radically reduce the size of the
* intermediate solution sets and the work performed by the overall
* query. However, if the analysis is incorrect and we mark the
* PROJECTION as DISTINCT when that is not allowed by the semantics
* of the query, then the query will not have the same behavior. So,
* getting this analysis correct is very important.
*/
public Set> getProjectedVars(
final IGroupMemberNode proxy,
final GraphPatternGroup> groupToLift,//
final QueryBase query,//
final Set> exogenousVars,//
final Set> projectedVars) {
// All variables which are used within the WHERE clause.
final Set> groupVars = getSpannedVariables(groupToLift,
new LinkedHashSet>());
/*
* Figure out what we need to project INTO the group.
*/
// All variables which might be incoming bound into the proxy node.
final Set> beforeVars = getMaybeIncomingBindings(
proxy, new LinkedHashSet>());
// Add in anything which is known to be bound outside of the query.
beforeVars.addAll(exogenousVars);
// Drop anything not used within the group.
beforeVars.retainAll(groupVars);
/*
* Figure out what we need to project FROM the group.
*/
// All variables used after the proxy node in its's parent join group.
final Set> afterVars = getAfterVars(proxy,
new LinkedHashSet>());
// Gather the variables used by the SELECT EXPRESSIONS which are
// projected out of the query in which this group appears.
query.getSelectExprVars(afterVars);
// Drop anything not used within the group.
afterVars.retainAll(groupVars);
/*
* The projection for the group is anything MAYBE bound on entry to the
* group which is also used within the group PLUS anything used after
* the group which is used within the group.
*/
projectedVars.addAll(beforeVars);
projectedVars.addAll(afterVars);
return projectedVars;
}
/**
* Return true
if any of the {@link ProjectionNode},
* {@link GroupByNode}, or {@link HavingNode} indicate that this is an
* aggregation query.
*
* @param query
* The query.
*
* @return true
if it is an aggregation query.
*/
public static boolean isAggregate(final QueryBase query) {
return isAggregate(query.getProjection(), query.getGroupBy(),
query.getHaving());
}
/**
* Return true
if any of the {@link ProjectionNode},
* {@link GroupByNode}, or {@link HavingNode} indicate that this is an
* aggregation query. All arguments are optional.
*/
public static boolean isAggregate(final ProjectionNode projection,
final GroupByNode groupBy, final HavingNode having) {
if (groupBy != null && !groupBy.isEmpty())
return true;
if (having != null && !having.isEmpty())
return true;
if (projection != null) {
for (IValueExpressionNode exprNode : projection) {
if (isAggregateExpressionNode(exprNode)) {
return true;
}
}
}
return false;
}
/**
* Checks if given expression node is or contains any aggregates
*
* After refactoring of SPARQL parser (https://jira.blazegraph.com/browse/BLZG-1176),
* AggregationNode needs to be checked recuresively, as its value expression is not completely parsed, but could be an aggregate, that should result in failing checks while preparing queries.
* For example, following test is failing without this check: com.bigdata.rdf.sail.sparql.BigdataSPARQL2ASTParserTest.test_agg10()
*
* @param exprNode - expression node to be checked
*/
private static boolean isAggregateExpressionNode(IValueExpressionNode exprNode) {
final IValueExpression> expr = exprNode.getValueExpression();
if (expr == null) {
/*
* The value expression is not cached....
*/
if (exprNode instanceof AssignmentNode) {
return isAggregateExpressionNode(((AssignmentNode) exprNode).getValueExpressionNode());
}
if (exprNode instanceof FunctionNode) {
/*
* Hack used when the BigdataExprBuilder needs to decide
* on the validity of aggregate expressions before we
* get around to caching the value expressions during
* evaluation (i.e., to pass the compliance tests for
* the parser).
*/
final FunctionNode functionNode = (FunctionNode) exprNode;
if (FunctionRegistry.isAggregate(functionNode
.getFunctionURI()))
return true;
}
return false;
}
if (isObviousAggregate(expr)) {
return true;
}
return false;
}
/**
* Return true
iff the {@link IValueExpression} is an obvious
* aggregate (it uses an {@link IAggregate} somewhere within it). This is
* used to identify projections which are aggregates when they are used
* without an explicit GROUP BY or HAVING clause.
*
* Note: Value expressions can be "non-obvious" aggregates when considered
* in the context of a GROUP BY, HAVING, or even a SELECT expression where
* at least one argument is a known aggregate. For example, a constant is an
* aggregate when it appears in a SELECT expression for a query which has a
* GROUP BY clause. Another example: any value expression used in a GROUP BY
* clause is an aggregate when the same value expression appears in the
* SELECT clause.
*
* This method is only to find the "obvious" aggregates which signal that a
* bare SELECT clause is in fact an aggregation.
*
* @param expr
* The expression.
*
* @return true
iff it is an obvious aggregate.
*/
private static boolean isObviousAggregate(final IValueExpression> expr) {
if (expr instanceof IAggregate>)
return true;
final Iterator itr = expr.argIterator();
while (itr.hasNext()) {
final IValueExpression> arg = (IValueExpression>) itr.next();
if (arg != null) {
if (isObviousAggregate(arg)) // recursion.
return true;
}
}
return false;
}
/**
* Extract the set of variables contained in a binding set.
* @param bss
* @return
*/
@SuppressWarnings("rawtypes")
public Set> getVarsInBindingSet(final List bss) {
Set> bssVars = new HashSet>();
for (int i=0; i bsVars = bs.vars();
while (bsVars.hasNext()) {
bssVars.add(bsVars.next());
}
}
return bssVars;
}
/**
* Checks whether the filter node's value expression node is in CNF.
*/
static public boolean isCNF(final FilterNode filter) {
return isCNF(filter.getValueExpressionNode());
}
/**
* Checks whether the given value expression node is in CNF.
*
* @param vexpr
*
*/
static public boolean isCNF(final IValueExpressionNode vexpr) {
if(!(vexpr instanceof FunctionNode)) {
return true;
}
final FunctionNode functionNode = (FunctionNode)vexpr;
final URI functionURI = functionNode.getFunctionURI();
if (functionURI.equals(FunctionRegistry.NOT)) {
return isCNFNegationOrTerminal(functionNode);
} else if (functionURI.equals(FunctionRegistry.OR)) {
return isCNFDisjunct(functionNode);
} else if (functionURI.equals(FunctionRegistry.AND)) {
return isCNF((ValueExpressionNode)functionNode.get(0)) &&
isCNF((ValueExpressionNode)functionNode.get(1));
} else {
return true; // everything else is a terminal
}
}
/**
* Check if filter node is an inner disjunct within a CNF. In particular,
* it must not contain any other conjunctive nodes.
*
* @param functionNode
* @return
*/
static public boolean isCNFDisjunct(final FunctionNode functionNode) {
final URI functionURI = functionNode.getFunctionURI();
if (functionURI.equals(FunctionRegistry.NOT)) {
return isCNFNegationOrTerminal(functionNode);
} else if (functionURI.equals(FunctionRegistry.OR)) {
boolean isCNFDisjunct =
!(functionNode.get(0) instanceof FunctionNode) ||
isCNFDisjunct((FunctionNode)functionNode.get(0));
isCNFDisjunct &=
!(functionNode.get(1) instanceof FunctionNode) ||
isCNFDisjunct((FunctionNode)functionNode.get(1));
return isCNFDisjunct;
} else if (functionURI.equals(FunctionRegistry.AND)) {
return false; // not allowed
} else {
return true; // everything else is a terminal
}
}
/**
* Check if filter node is a negation (possibly recursive) or terminal
* within a CNF. In particular, it must not contain any other disjuncts
* or conjuncts.
*
* @param functionNode
*/
static public boolean isCNFNegationOrTerminal(final FunctionNode functionNode) {
final URI functionURI = functionNode.getFunctionURI();
if (functionURI.equals(FunctionRegistry.AND) ||
functionURI.equals(FunctionRegistry.OR)) {
return false;
} else if (functionURI.equals(FunctionRegistry.NOT)) {
final BOp bop = functionNode.get(0);
if (!(bop instanceof FunctionNode)) {
return true; // terminal
} else {
return isCNFNegationOrTerminal((FunctionNode)bop);
}
} else {
return true; // everything else is a terminal
}
}
/**
* Returns the corresponding (equivalent) value expression in CNF. Makes
* a copy of the original value expression, leaving it unmodified.
*
* @param vexpr
* @return null if the value expression is already in CNF, an equivalent
* value expression in CNF otherwise
*/
static public IValueExpressionNode toCNF(final IValueExpressionNode vexpr) {
final IValueExpressionNode copy =
(IValueExpressionNode)BOpUtility.deepCopy((BOp) vexpr);
return pushDisjuncts(pushNegations(copy));
}
/**
* Recursively pushes negations down the operator tree, such that in the
* returned node, negations are always at the bottom of the tree. In
* particular, all AND and OR value expressions will be situated above
* negations.
*
* The resulting {@link IValueExpressionNode} is logically equivalent.
*/
static public IValueExpressionNode pushNegations(IValueExpressionNode vexp) {
if(!(vexp instanceof FunctionNode)) {
return vexp;
}
final FunctionNode functionNode = (FunctionNode)vexp;
final URI functionURI = functionNode.getFunctionURI();
if (functionURI.equals(FunctionRegistry.NOT)) {
final IValueExpressionNode inner =
(IValueExpressionNode) functionNode.get(0);
if(inner instanceof FunctionNode) {
final FunctionNode innerFunctionNode = (FunctionNode)inner;
final URI innerFunctionURI = innerFunctionNode.getFunctionURI();
if (innerFunctionURI.equals(FunctionRegistry.AND)) {
final IValueExpressionNode negLeft =
pushNegations(
FunctionNode.NOT(
(ValueExpressionNode)innerFunctionNode.get(0)));
final IValueExpressionNode negRight =
pushNegations(
FunctionNode.NOT(
(ValueExpressionNode)innerFunctionNode.get(1)));
return FunctionNode.OR(
(ValueExpressionNode)negLeft,
(ValueExpressionNode)negRight);
} else if (innerFunctionURI.equals(FunctionRegistry.OR)) {
final IValueExpressionNode negLeft =
pushNegations(
FunctionNode.NOT(
(ValueExpressionNode)innerFunctionNode.get(0)));
final IValueExpressionNode negRight =
pushNegations(
FunctionNode.NOT(
(ValueExpressionNode)innerFunctionNode.get(1)));
return FunctionNode.AND(
(ValueExpressionNode)negLeft,
(ValueExpressionNode)negRight);
} else if (innerFunctionURI.equals(FunctionRegistry.NOT)) {
// drop double negation
final BOp innerInner = innerFunctionNode.get(0);
functionNode.setArg(0, innerInner);
// recurse if necessary
if (innerInner instanceof IValueExpressionNode) {
return pushNegations((IValueExpressionNode)innerInner);
}
} else if (innerFunctionURI.equals(FunctionRegistry.EQ)) {
// invert: = -> !=
return FunctionNode.NE(
(ValueExpressionNode)innerFunctionNode.get(0),
(ValueExpressionNode)innerFunctionNode.get(1));
} else if (innerFunctionURI.equals(FunctionRegistry.NE)) {
// invert: != -> =
return FunctionNode.EQ(
(ValueExpressionNode)innerFunctionNode.get(0),
(ValueExpressionNode)innerFunctionNode.get(1));
} else if (innerFunctionURI.equals(FunctionRegistry.LE)) {
// invert: <= -> >
return FunctionNode.GT(
(ValueExpressionNode)innerFunctionNode.get(0),
(ValueExpressionNode)innerFunctionNode.get(1));
} else if (innerFunctionURI.equals(FunctionRegistry.LT)) {
// invert: < -> >=
return FunctionNode.GE(
(ValueExpressionNode)innerFunctionNode.get(0),
(ValueExpressionNode)innerFunctionNode.get(1));
} else if (innerFunctionURI.equals(FunctionRegistry.GE)) {
// invert: >= -> <
return FunctionNode.LT(
(ValueExpressionNode)innerFunctionNode.get(0),
(ValueExpressionNode)innerFunctionNode.get(1));
} else if (innerFunctionURI.equals(FunctionRegistry.GT)) {
// invert: > -> <=
return FunctionNode.LE(
(ValueExpressionNode)innerFunctionNode.get(0),
(ValueExpressionNode)innerFunctionNode.get(1));
}
}
} else if (functionURI.equals(FunctionRegistry.AND)) {
return FunctionNode.AND(
(ValueExpressionNode)pushNegations(
(IValueExpressionNode) functionNode.get(0)),
(ValueExpressionNode)pushNegations(
(IValueExpressionNode) functionNode.get(1)));
} else if (functionURI.equals(FunctionRegistry.OR)) {
return FunctionNode.OR(
(ValueExpressionNode)pushNegations(
(IValueExpressionNode) functionNode.get(0)),
(ValueExpressionNode)pushNegations(
(IValueExpressionNode) functionNode.get(1)));
} // else: nothing to be done
return vexp;
}
/**
* Recursively pushes logical ORs below logical ANDs in the operator tree,
* such that in the returned node all OR expressions are situated below
* AND expressions. Expectes that all NOT expressions have been pushed
* down to the bottom already (otherwise, the behavior is undertermined).
*
* The resulting {@link IValueExpressionNode} is logically equivalent.
*/
static public IValueExpressionNode pushDisjuncts(
final IValueExpressionNode vexp) {
if(!(vexp instanceof FunctionNode)) {
return vexp;
}
final FunctionNode functionNode = (FunctionNode)vexp;
final URI functionURI = functionNode.getFunctionURI();
if (functionURI.equals(FunctionRegistry.OR)) {
// first, recurse, making sure that AND is propagated up in the subtrees
final IValueExpressionNode left =
pushNegations(
pushDisjuncts((IValueExpressionNode) functionNode.get(0)));
final IValueExpressionNode right =
pushNegations(
pushDisjuncts((IValueExpressionNode) functionNode.get(1)));
/*
* New conjuncts are basically the cross product disjuncts of the left
* and right subtree. Note that the special case (where neither the
* left nor the right subtree has an AND at the top nicely fits in:
* in that case, leftConjuncts and rightConjuncts have one element,
* say x and y, and we compute x OR y as the one and only conjunct
* (thus not changing the tree).
*/
final List leftConjuncts =
extractToplevelConjuncts(
left, new ArrayList());
final List rightConjuncts =
extractToplevelConjuncts(
right, new ArrayList());
final List newConjuncts =
new ArrayList();
for (IValueExpressionNode leftConjunct : leftConjuncts) {
for (IValueExpressionNode rightConjunct : rightConjuncts) {
final IValueExpressionNode newConjunct =
FunctionNode.OR(
(ValueExpressionNode)leftConjunct,
(ValueExpressionNode)rightConjunct);
newConjuncts.add(newConjunct);
}
}
return toConjunctiveValueExpression(newConjuncts);
} else if (functionURI.equals(FunctionRegistry.AND)) {
// just recurse
return FunctionNode.AND(
(ValueExpressionNode)pushDisjuncts(
(IValueExpressionNode) functionNode.get(0)),
(ValueExpressionNode)pushDisjuncts(
(IValueExpressionNode) functionNode.get(1)));
} // we're done recursing, no disjuncts will be found below this point
return vexp; // return the (possibly modified) vexp
}
/**
* Extracts all AND-connected conjuncts located at the top of a given
* value expression node (recursively, unless an operator different from
* AND is encountered).
*
* @param vexpNode the value expression node
* @param nodes set where to store the top level conjuncts in
*
* @return the array of filters
*/
static public List extractToplevelConjuncts(
final IValueExpressionNode vexp, List nodes) {
if (vexp instanceof FunctionNode) {
final FunctionNode functionNode = (FunctionNode)vexp;
final URI functionURI = functionNode.getFunctionURI();
if (functionURI.equals(FunctionRegistry.AND)) {
extractToplevelConjuncts(
(ValueExpressionNode)functionNode.get(0), nodes);
extractToplevelConjuncts(
(ValueExpressionNode)functionNode.get(1), nodes);
return nodes; // don't record this (complex AND) node
}
}
nodes.add(vexp); // record conjunct (don't recurse)
return nodes;
}
/**
* Constructs an (unbalanced) tree out of the list of conjuncts.
* If the conjuncts that are passed in are null or empty, null is returned.
*
* @param conjuncts
* @return
*/
static public IValueExpressionNode toConjunctiveValueExpression(
final List conjuncts) {
if (conjuncts==null || conjuncts.isEmpty()) {
return null;
}
// if the list is unary, we return the one and only conjunct
if (conjuncts.size()==1) {
return conjuncts.get(0);
} else {
IValueExpressionNode tmp =
FunctionNode.AND(
(ValueExpressionNode)conjuncts.get(0),
(ValueExpressionNode)conjuncts.get(1));
for (int i=2; i