com.bigdata.rdf.sparql.ast.StatementPatternNode Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.bigdata.rdf.sparql.ast;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.openrdf.query.algebra.StatementPattern.Scope;
import com.bigdata.bop.BOp;
import com.bigdata.bop.Constant;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.htree.HTree;
import com.bigdata.rdf.internal.constraints.RangeBOp;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpBase;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpJoins;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpUtility;
import com.bigdata.rdf.sparql.ast.optimizers.ASTGraphGroupOptimizer;
import com.bigdata.rdf.sparql.ast.optimizers.ASTRangeConstraintOptimizer;
import com.bigdata.rdf.sparql.ast.optimizers.ASTSimpleOptionalOptimizer;
import com.bigdata.rdf.sparql.ast.optimizers.StaticOptimizer;
import com.bigdata.rdf.spo.DistinctTermAdvancer;
import com.bigdata.rdf.spo.ISPO;
import com.bigdata.rdf.spo.SPOAccessPath;
import com.bigdata.relation.rule.eval.ISolution;
import com.bigdata.striterator.IKeyOrder;
/**
* A node in the AST representing a statement pattern.
*
* Note: The annotations on the class are mostly interpreted by the
* toPredicate() method in {@link AST2BOpUtility} and by the logic in
* {@link AST2BOpJoins} which handles the default and named graph access
* patterns.
*
* Note: If a variable is bound, then we bind that slot of the predicate. If a
* variable can take some enumerated set of values, then we use an
* {@link #INLINE} access path to model that "IN" constraint. If the value for a
* variable must lie within some key range, then we handle that case using
* {@link RangeBOp}. If we have no information about a variable, then we just
* leave the variable unbound.
*/
public class StatementPatternNode extends
GroupMemberNodeBase implements
IJoinNode, IStatementContainer, IReorderableNode {
private static final long serialVersionUID = 1L;
public interface Annotations extends GroupMemberNodeBase.Annotations,
IJoinNode.Annotations {
/**
* The {@link Scope} (required).
*
* @see ASTGraphGroupOptimizer
*/
String SCOPE = "scope";
/**
* Boolean flag indicates that the distinct solutions for the statement
* pattern are required ({@value #DEFAULT_DISTINCT}).
*
* Note: This is a hint that the {@link DistinctTermAdvancer} should be
* used to visit the distinct {@link ISPO}s having a common prefix. This
* is used for GRAPH ?g {}
, which evaluates to all of the
* named graphs in the database (if the named graphs were not explicitly
* specified).
*
* Note: For only partly historical reasons, this is not used to mark
* default graph access. A default graph access path strips the context
* and then applies a DISTINCT filter to the resulting triples.
*/
String DISTINCT = "distinct";
boolean DEFAULT_DISTINCT = false;
/**
* The existence of at least one solution will be verified otherwise the
* solution will be failed. This turns into an iterator with a limit of
* ONE (1) on the {@link SPOAccessPath}.
*
* Note: This is used in combination with a join against an inline
* access path for the named graphs. The "exists" statement pattern MUST
* run after the access path which produces the variety since
* it will be used to constrain that as-bound variety. This the join
* order in query plan must look like:
*
*
* (_,_,_,?g)[@INLINE,@IN(g,namedGraphs)] x (_,_,_,?g)[@EXISTS]
*
*
* rather than
*
*
* (_,_,_,?g)[@EXISTS] x (_,_,_,?g)[@INLINE,@IN(g,namedGraphs)]
*
*
* as the latter will find only one solution for ?g
.
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/429
* (Optimization for GRAPH uri {} and GRAPH ?foo {})
*/
String EXISTS = "exists";
/**
* The data for this access path is inline. The value of the attribute
* is the column projection / solution set reference.
*
* TODO A column projection is more efficient when we are handling
* things like the named graphs or constraining the subquery for an
* optional with multiple predicates. That column projection can be
* modeled as IN(var,values)
. The IN filter could be
* attached to {@link #FILTERS} or it could be the value of this
* attribute.
*
* We also have use cases for inline solution set access paths for use
* with the samples materialized by the RTO. Those should be an
* {@link HTree} and the data should be modeled as {@link ISolution}s.
* (Note that some vertices may correspond to "bop fragment" joins, in
* which case the can not be modeled as {@link ISPO}s.)
*
* Both the column projection (IN) and the inline solution set (HTree)
* are simpler access paths. They only support element visitation, a
* full scan of the access path (this is the same as saying that there
* are no join variables), or probing to find all solutions which join
* on some join variable(s). This is in contrast to the
* {@link SPOAccessPath}, which supports key-range constraints (prefix)
* and range constraints (prefix with key range on a data type value).
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/429
* (Optimization for GRAPH uri {} and GRAPH ?foo {})
*/
String INLINE = "inline";
/**
* An optional attribute whose value is an {@link RangeBOp} which models
* the key-range constraint on the access path. The {@link RangeBOp} is
* used when there are filters which impose a GT/GTE and/or LT/LTE
* restriction on the values which a variable may take on for that
* access path.
*
* TODO We should also handle datatype constraints on a variable here.
* For example, if a variable is known to be numeric, or known to be
* xsd:int, then we can immediately reject any bindings which would
* violate that type constraint. To do this right, we need to notice
* those type constraints and propagate them backwards in the plan so we
* can reject bindings as early as possible. (In fact, we can also do a
* range constraint which spans each of the datatypes which the variable
* could take on. Datatype constraints and value range constraints are
* very much related. The datatype constraint is effectively a value
* range constraint allowing the entire value space for that datatype.
* Likewise, a value range constraint must be applied across the UNION
* of the allowable ground datatypes for the variable.)
*
* @see ASTRangeConstraintOptimizer
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/238 (lift range
* constraints onto AP)
*/
String RANGE = "range";
/**
* An optional annotation whose value is the variable which will be
* bound to the statement identifier for the matched statement patterns.
* The statement identifier is always formed from the subject, predicate
* and object (the triple). The context is NOT represented in the
* statement identifier. This keeps the semantics consistent with RDF
* reification.
*
* @see
* Reification Done Right
*/
String SID = "sid";
/**
* An optional annotation whose value is a variable which will become
* bound to the fast range count of the associated triple pattern.
*
* @see SELECT
* COUNT(...) (DISTINCT|REDUCED) {single-triple-pattern} is slow.
*
*/
String FAST_RANGE_COUNT_VAR = "fastRangeCountVar";
/**
* An optional annotation whose value the variable that will be bound by
* a {@link DistinctTermAdvancer} layered over the access path.
*
* @see DISTINCT
* PREDICATEs query is slow
*/
String DISTINCT_TERM_SCAN_VAR = "distinctTermScanVar";
}
/**
* Constructor required for {@link com.bigdata.bop.BOpUtility#deepCopy(FilterNode)}.
*/
public StatementPatternNode(final StatementPatternNode op) {
super(op);
}
/**
* Required shallow copy constructor.
*/
public StatementPatternNode(final BOp[] args, final Map anns) {
super(args, anns);
}
/**
* A triple pattern. The {@link Scope} will be
* {@link Scope#DEFAULT_CONTEXTS}, the context will be null
.
*
* @param s
* @param p
* @param o
*
* @see StatementPatternNode#StatementPatternNode(TermNode, TermNode,
* TermNode, TermNode, Scope)
*/
public StatementPatternNode(final TermNode s, final TermNode p,
final TermNode o) {
this(s, p, o, null/* context */, Scope.DEFAULT_CONTEXTS);
}
/**
* A quad pattern.
*
* Note: When a {@link StatementPatternNode} appears in a WHERE clause, the
* {@link Scope} should be marked as {@link Scope#DEFAULT_CONTEXTS} if it is
* NOT embedded within a GRAPH clause and otherwise as
* {@link Scope#NAMED_CONTEXTS}.
*
* The context position of the statement should be null
unless
* it is embedded within a GRAPH clause, in which case the context is the
* context specified for the parent GRAPH clause.
*
* The SPARQL UPDATE WITH uri
is a syntactic sugar for
* GRAPH uri {...}
. Therefore, when present, any
* {@link StatementPatternNode} outside of an explicit GRAPH group is also
* marked as {@link Scope#NAMED_CONTEXTS} and the context position will be
* bound to the uri specified in the WITH
clause.
*
* A null
context in {@link Scope#DEFAULT_CONTEXTS} is
* interpreted as the RDF merge of the graphs in the defaultGraph (as
* specified by the {@link DatasetNode}). When non-null
(it can
* be bound by the SPARQL UPDATE WITH clause), the defaultGraph
* declared by the {@link DatasetNode} is ignored and the context is bound
* to the constant specified in that WITH clause.
*
* Absent any other constraints on the query, an unbound variable context in
* {@link Scope#NAMED_CONTEXTS} may be bound to any named graph specified by
* the {@link DatasetNode}.
*
* @param s
* The subject (variable or constant; required).
* @param p
* The subject (variable or constant; required).
* @param o
* The subject (variable or constant; required).
* @param c
* The context (variable or constant; optional).
* @param scope
* Either {@link Scope#DEFAULT_CONTEXTS} or
* {@link Scope#NAMED_CONTEXTS} (required).
*
* @throws IllegalArgumentException
* if s, p, or o is null
.
* @throws IllegalArgumentException
* if scope is null
.
* @throws IllegalArgumentException
* if scope is {@link Scope#NAMED_CONTEXTS} and c
* is null
.
*/
public StatementPatternNode(final TermNode s, final TermNode p,
final TermNode o, final TermNode c, final Scope scope) {
super(new BOp[] { s, p, o, c }, scope == null ? null/* anns */: NV
.asMap(new NV(Annotations.SCOPE, scope)));
if (scope == null)
throw new IllegalArgumentException();
if (s == null || p == null || o == null)
throw new IllegalArgumentException();
if (scope == Scope.NAMED_CONTEXTS && c == null)
throw new IllegalArgumentException();
}
/**
* The variable or constant for the subject position (required).
*/
final public TermNode s() {
return (TermNode) get(0);
}
/**
* The variable or constant for the predicate position (required).
*/
final public TermNode p() {
return (TermNode) get(1);
}
/**
* The variable or constant for the object position (required).
*/
final public TermNode o() {
return (TermNode) get(2);
}
/**
* The variable or constant for the context position (required iff in quads
* mode).
*/
final public TermNode c() {
return (TermNode) get(3);
}
/**
* Strengthen return type.
*/
@Override
public TermNode get(final int i) {
return (TermNode) super.get(i);
}
final public void setC(final TermNode c) {
this.setArg(3, c);
}
/**
* The statement identifier variable for triples which match this statement
* pattern (optional). The statement identifier is the composition of the
* (subject, predicate, and object) positions of the matched statements.
*
* @see Annotations#SID
*/
final public VarNode sid() {
return (VarNode) getProperty(Annotations.SID);
}
/**
* Set the SID variable.
*/
final public void setSid(final VarNode sid) {
setProperty(Annotations.SID, sid);
}
/**
* The scope for this statement pattern (either named graphs or default
* graphs).
*
* @see Annotations#SCOPE
* @see Scope
*/
final public Scope getScope() {
return (Scope) getRequiredProperty(Annotations.SCOPE);
}
final public void setScope(final Scope scope) {
if (scope == null)
throw new IllegalArgumentException();
setProperty(Annotations.SCOPE, scope);
}
/**
* Return the {@link VarNode} associated with the optional
* {@link Annotations#FAST_RANGE_COUNT_VAR} property.
*
* @return The {@link VarNode} -or- null
if this triple pattern
* is not associated with that annotation.
*/
final public VarNode getFastRangeCountVar() {
return (VarNode) getProperty(Annotations.FAST_RANGE_COUNT_VAR);
}
final public void setFastRangeCount(final VarNode var) {
if (var == null)
throw new IllegalArgumentException();
setProperty(Annotations.FAST_RANGE_COUNT_VAR, var);
}
/**
* Return the variable that will be bound by the
* {@link DistinctTermAdvancer} pattern.
*
* @return The distinct term scan variable -or- null
if the
* access path will not use a distinct term scan.
*
* @see Annotations#DISTINCT_TERM_SCAN_VAR
*/
final public VarNode getDistinctTermScanVar() {
return (VarNode) getProperty(Annotations.DISTINCT_TERM_SCAN_VAR);
}
final public void setDistinctTermScanVar(final VarNode var) {
setProperty(Annotations.DISTINCT_TERM_SCAN_VAR, var);
}
/**
* {@inheritDoc}
*
* This returns true
iff the {@link StatementPatternNode} was
* lifted out of an optional {@link JoinGroupNode} such that it has OPTIONAL
* semantics.
*
* @see ASTSimpleOptionalOptimizer
*/
@Override
final public boolean isOptional() {
return getProperty(Annotations.OPTIONAL, Annotations.DEFAULT_OPTIONAL);
}
/**
* Returns false
.
*/
@Override
final public boolean isMinus() {
return false;
}
/**
* Mark this {@link StatementPatternNode} as one which was lifted out of a
* "simple optional" group and which therefore has "optional" semantics (we
* will do an optional join for it).
*
* Note: The need to maintain the correct semantics for the simple optional
* group (statement pattern plus filter(s)) is also the reason why the
* lifted FILTER(s) MUST NOT require the materialization of any variables
* which would not have been bound before that JOIN. Since variables bound
* by the JOIN for the optional statement pattern will not be materialized,
* filters attached to that JOIN can not require materialization of
* variables bound by the JOIN (though they can depend on variables already
* bound by the required joins in the parent group).
*
* @see ASTSimpleOptionalOptimizer
*/
final public void setOptional(final boolean optional) {
setProperty(Annotations.OPTIONAL, optional);
}
/**
* Attach a {@link RangeNode} that describes a range for the statement
* pattern's O value.
*
* @param range
*/
final public void setRange(final RangeNode range) {
setProperty(Annotations.RANGE, range);
}
final public RangeNode getRange() {
return (RangeNode) getProperty(Annotations.RANGE);
}
@Override
final public List getAttachedJoinFilters() {
@SuppressWarnings("unchecked")
final List filters = (List) getProperty(Annotations.FILTERS);
if (filters == null) {
return Collections.emptyList();
}
return Collections.unmodifiableList(filters);
}
@Override
final public void setAttachedJoinFilters(final List filters) {
setProperty(Annotations.FILTERS, filters);
}
/**
* Return true
if none of s, p, o, or c is a variable.
*/
public boolean isGround() {
if (s() instanceof VarNode)
return false;
if (p() instanceof VarNode)
return false;
if (o() instanceof VarNode)
return false;
if (c() instanceof VarNode)
return false;
return true;
}
/**
* Return the variables used by the predicate - i.e. what this node will
* attempt to bind when run.
*/
public Set> getProducedBindings() {
final Set> producedBindings = new LinkedHashSet>();
final TermNode s = s();
final TermNode p = p();
final TermNode o = o();
final TermNode c = c();
addProducedBindings(s, producedBindings);
addProducedBindings(p, producedBindings);
addProducedBindings(o, producedBindings);
addProducedBindings(c, producedBindings);
return producedBindings;
}
/**
* This handles the special case where we've wrapped a Var with a Constant
* because we know it's bound, perhaps by the exogenous bindings. If we
* don't handle this case then we get the join vars wrong.
*
* @see StaticAnalysis._getJoinVars
*/
private void addProducedBindings(final TermNode t,
final Set> producedBindings) {
if (t instanceof VarNode) {
producedBindings.add(((VarNode) t).getValueExpression());
} else if (t instanceof ConstantNode) {
final ConstantNode cNode = (ConstantNode) t;
final Constant> c = (Constant>) cNode.getValueExpression();
final IVariable> var = c.getVar();
if (var != null) {
producedBindings.add(var);
}
}
}
@Override
public String toString(final int indent) {
final StringBuilder sb = new StringBuilder();
sb.append("\n").append(indent(indent)).append(toShortString());
final List filters = getAttachedJoinFilters();
if(!filters.isEmpty()) {
for (FilterNode filter : filters) {
sb.append(filter.toString(indent + 1));
}
}
if (getQueryHints() != null && !getQueryHints().isEmpty()) {
sb.append("\n");
sb.append(indent(indent + 1));
shortenName(sb, Annotations.QUERY_HINTS);
sb.append("=");
sb.append(getQueryHints().toString());
}
final Long rangeCount = (Long) getProperty(AST2BOpBase.Annotations.ESTIMATED_CARDINALITY);
final IKeyOrder> keyOrder = (IKeyOrder>) getProperty(AST2BOpBase.Annotations.ORIGINAL_INDEX);
if (rangeCount != null) {
sb.append("\n");
sb.append(indent(indent + 1));
shortenName(sb, AST2BOpBase.Annotations.ESTIMATED_CARDINALITY);
sb.append("=");
sb.append(rangeCount.toString());
}
if (keyOrder != null) {
sb.append("\n");
sb.append(indent(indent + 1));
shortenName(sb, AST2BOpBase.Annotations.ORIGINAL_INDEX);
sb.append("=");
sb.append(keyOrder.toString());
}
return sb.toString();
}
@Override
public String toShortString() {
final StringBuilder sb = new StringBuilder();
final Integer id = (Integer)getProperty(BOp.Annotations.BOP_ID);
sb.append("StatementPatternNode");
if (id != null) {
sb.append("[").append(id.toString()).append("]");
}
sb.append("(");
sb.append(s()).append(", ");
sb.append(p()).append(", ");
sb.append(o());
final TermNode c = c();
if (c != null) {
sb.append(", ").append(c);
}
sb.append(")");
final VarNode sid = sid();
if (sid != null) {
sb.append(" [sid=" + sid + "]");
}
final Scope scope = getScope();
if (scope != null) {
sb.append(" [scope=" + scope + "]");
}
final VarNode fastRangeCountVar = getFastRangeCountVar();
if (fastRangeCountVar != null) {
sb.append(" [fastRangeCount=" + fastRangeCountVar + "]");
}
final VarNode distinctTermScanVar = getDistinctTermScanVar();
if (distinctTermScanVar != null) {
sb.append(" [distinctTermScan=" + distinctTermScanVar + "]");
}
if(isOptional()) {
sb.append(" [optional]");
}
if (!getAttachedJoinFilters().isEmpty()) {
sb.append(" [#filters=" + getAttachedJoinFilters().size() + "]");
}
return sb.toString();
}
/* (non-Javadoc)
* @see com.bigdata.rdf.sparql.ast.IReorderableNode#isReorderable()
*/
@Override
public boolean isReorderable() {
return !isOptional();
}
/* (non-Javadoc)
* @see com.bigdata.rdf.sparql.ast.IReorderableNode#getEstimatedCardinality()
*/
@Override
public long getEstimatedCardinality(StaticOptimizer opt) {
return getProperty(AST2BOpBase.Annotations.ESTIMATED_CARDINALITY, -1l);
}
@Override
public Set> getRequiredBound(StaticAnalysis sa) {
return new HashSet>();
}
@Override
public Set> getDesiredBound(StaticAnalysis sa) {
return sa.getSpannedVariables(this, true, new HashSet>());
}
}