com.bigdata.rdf.sparql.ast.ArbitraryLengthPathNode Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.bigdata.rdf.sparql.ast;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.log4j.Logger;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.Constant;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.rdf.sparql.ast.PathNode.PathMod;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpBase;
import com.bigdata.rdf.sparql.ast.optimizers.ASTALPServiceOptimizer;
import com.bigdata.rdf.sparql.ast.optimizers.StaticOptimizer;
/**
* A special kind of AST node that represents the SPARQL 1.1 arbitrary length
* path operator. This node has a single child arg - a JoinGroupNode consisting
* of other operators (the path) that must be run to fixed point. This node also
* has several annotations that define the schematics (the left and right sides
* and the lower and upper bounds) of the arbitrary length path.
*/
public class ArbitraryLengthPathNode
extends GroupMemberNodeBase
implements IBindingProducerNode, IReorderableNode {
private static final transient Logger log = Logger.getLogger(ArbitraryLengthPathNode.class);
/**
*
*/
private static final long serialVersionUID = 1L;
public interface Annotations extends GroupNodeBase.Annotations {
/**
* The left term - can be a variable or a constant.
*/
String LEFT_TERM = Annotations.class.getName() + ".leftTerm";
/**
* The right term - can be a variable or a constant.
*/
String RIGHT_TERM = Annotations.class.getName() + ".rightTerm";
/**
* The left transitivity variable.
*/
String TRANSITIVITY_VAR_LEFT = Annotations.class.getName() + ".transitivityVarLeft";
/**
* The right transitivity variable.
*/
String TRANSITIVITY_VAR_RIGHT = Annotations.class.getName() + ".transitivityVarRight";
/**
* The lower bound on the number of rounds to run. Can be zero (0) or
* one (1). A lower bound of zero is a special kind of path - the
* Zero Length Path. A zero length path connects a vertex to itself
* (in graph parlance). In the context of arbitrary length paths it
* means we bind the input onto the output regardless of whether they
* are actually connected via the path or not.
*/
String LOWER_BOUND = Annotations.class.getName() + ".lowerBound";
/**
* The upper bound on the number of rounds to run.
*/
String UPPER_BOUND = Annotations.class.getName() + ".upperBound";
/**
* The middle term - can be a variable or a constant.
*/
String MIDDLE_TERM = Annotations.class.getName() + ".middleTerm";
/**
* The variable representing the visited edge. Bound using the binding
* from the middle term. Only used by ALP service when projecting edges.
*/
String EDGE_VAR = Annotations.class.getName() + ".edgeVar";
/**
* A set of intermediate variables (VarNodes) used by the ALP node
* that should be dropped from the solutions after each round.
*/
String DROP_VARS = Annotations.class.getName() + ".dropVars";
}
/**
* Constructor required for {@link com.bigdata.bop.BOpUtility#deepCopy(FilterNode)}.
*/
public ArbitraryLengthPathNode(ArbitraryLengthPathNode op) {
super(op);
}
/**
* Required shallow copy constructor.
*/
public ArbitraryLengthPathNode(BOp[] args, Map anns) {
super(args, anns);
subgroup().setSubgroupOfALPNode( true );
}
/**
* Fully construct an arbitrary length path node with all required
* annotations.
*/
public ArbitraryLengthPathNode(final TermNode left, final TermNode right,
final VarNode tVarLeft, final VarNode tVarRight,
final PathMod mod) {
this(new BOp[] { new JoinGroupNode() }, NV.asMap(
new NV(Annotations.LEFT_TERM, left),
new NV(Annotations.RIGHT_TERM, right),
new NV(Annotations.TRANSITIVITY_VAR_LEFT, tVarLeft),
new NV(Annotations.TRANSITIVITY_VAR_RIGHT, tVarRight),
new NV(Annotations.LOWER_BOUND, mod == PathMod.ONE_OR_MORE ? 1L : 0L),
new NV(Annotations.UPPER_BOUND, mod == PathMod.ZERO_OR_ONE ? 1L : Long.MAX_VALUE)
));
final Set dropVars = new LinkedHashSet<>();
dropVars.add(tVarLeft);
dropVars.add(tVarRight);
setProperty(Annotations.DROP_VARS, dropVars);
}
/**
* Fully construct an arbitrary length path node with all required
* annotations.
*/
public ArbitraryLengthPathNode(final TermNode left, final TermNode right,
final VarNode tVarLeft, final VarNode tVarRight,
final long lowerBound, final long upperBound) {
this(new BOp[] { new JoinGroupNode() }, NV.asMap(
new NV(Annotations.LEFT_TERM, left),
new NV(Annotations.RIGHT_TERM, right),
new NV(Annotations.TRANSITIVITY_VAR_LEFT, tVarLeft),
new NV(Annotations.TRANSITIVITY_VAR_RIGHT, tVarRight),
new NV(Annotations.DROP_VARS, new ArrayList()),
new NV(Annotations.LOWER_BOUND, lowerBound),
new NV(Annotations.UPPER_BOUND, upperBound)
));
final Set dropVars = new LinkedHashSet<>();
dropVars.add(tVarLeft);
dropVars.add(tVarRight);
setProperty(Annotations.DROP_VARS, dropVars);
}
/**
* Returns the left term.
*/
public TermNode left() {
return (TermNode) super.getRequiredProperty(Annotations.LEFT_TERM);
}
/**
* Returns the right term.
*/
public TermNode right() {
return (TermNode) super.getRequiredProperty(Annotations.RIGHT_TERM);
}
/**
* Return the left transitivity var.
*/
public VarNode tVarLeft() {
return (VarNode) super.getRequiredProperty(Annotations.TRANSITIVITY_VAR_LEFT);
}
/**
* Return the right transitivity var.
*/
public VarNode tVarRight() {
return (VarNode) super.getRequiredProperty(Annotations.TRANSITIVITY_VAR_RIGHT);
}
/**
* Return the lower bound.
*/
public long lowerBound() {
return (Long) super.getRequiredProperty(Annotations.LOWER_BOUND);
}
/**
* Return the upper bound.
*/
public long upperBound() {
return (Long) super.getRequiredProperty(Annotations.UPPER_BOUND);
}
/**
* Returns the (optional) middle term.
*/
public TermNode middle() {
return (VarNode) super.getProperty(Annotations.MIDDLE_TERM);
}
/**
* Return the (optional) edge var.
*/
public VarNode edgeVar() {
return (VarNode) super.getProperty(Annotations.EDGE_VAR);
}
/**
* Set the edge var and middle term. Only used by the ALP service when
* projecting edges.
*/
public void setEdgeVar(final VarNode edgeVar, final TermNode middle) {
setProperty(Annotations.MIDDLE_TERM, middle);
setProperty(Annotations.EDGE_VAR, edgeVar);
}
/**
* Set the vars that should be dropped after each round.
*
* @see Annotations#DROP_VARS
*/
public void setDropVars(final Set dropVars) {
super.setProperty(Annotations.DROP_VARS, dropVars);
}
/**
* Add a var that should be dropped after each round.
*
* @see Annotations#DROP_VARS
*/
public void addDropVar(final VarNode dropVar) {
dropVars().add(dropVar);
}
/**
* Get the vars that should be dropped after each round.
*
* @see Annotations#DROP_VARS
*/
@SuppressWarnings("unchecked")
public Set dropVars() {
return (Set) super.getProperty(Annotations.DROP_VARS);
}
/**
* Return the subgroup.
*/
public JoinGroupNode subgroup() {
return (JoinGroupNode) get(0);
}
/**
* Return the variables bound by the path - i.e. what this node will
* attempt to bind when run.
*/
public Set> getMaybeProducedBindings() {
final Set> producedBindings = getDefinitelyProducedBindings();
for (StatementPatternNode sp : subgroup().getStatementPatterns()) {
addProducedBinding(sp.s(), producedBindings);
addProducedBinding(sp.p(), producedBindings);
addProducedBinding(sp.o(), producedBindings);
addProducedBinding(sp.c(), producedBindings);
}
return producedBindings;
}
/**
* Return the variables bound by the path - i.e. what this node will
* attempt to bind when run.
*/
public Set> getDefinitelyProducedBindings() {
final Set> producedBindings = new LinkedHashSet>();
addVar(left(), producedBindings, true);
addVar(right(), producedBindings, true);
final VarNode edgeVar = edgeVar();
if (edgeVar != null) {
addProducedBinding(edgeVar, producedBindings);
}
return producedBindings;
}
/**
* Return the set of variables used by this ALP node (statement pattern
* terms and inside filters). Used to determine what needs to be projected
* into the op.
*/
public Set> getUsedVars() {
final Set> used = getDefinitelyProducedBindings();
for (StatementPatternNode sp : subgroup().getStatementPatterns()) {
addUsedVar(sp.s(), used);
addUsedVar(sp.p(), used);
addUsedVar(sp.o(), used);
addUsedVar(sp.c(), used);
for (FilterNode filter : sp.getAttachedJoinFilters()) {
final Iterator it = BOpUtility.preOrderIteratorWithAnnotations(filter);
while (it.hasNext()) {
final BOp bop = it.next();
if (bop instanceof TermNode) {
addUsedVar((TermNode) bop, used);
}
}
}
}
for (FilterNode filter : subgroup().getChildren(FilterNode.class)) {
final Iterator it = BOpUtility.preOrderIteratorWithAnnotations(filter);
while (it.hasNext()) {
final BOp bop = it.next();
if (bop instanceof TermNode) {
addUsedVar((TermNode) bop, used);
}
}
}
return used;
}
private void addUsedVar(final TermNode t,
final Set> vars) {
addVar(t, vars, true);
}
private void addProducedBinding(final TermNode t,
final Set> producedBindings) {
addVar(t, producedBindings, false);
}
/**
* This handles the special case where we've wrapped a Var with a Constant
* because we know it's bound, perhaps by the exogenous bindings. If we
* don't handle this case then we get the join vars wrong.
*
* @see StaticAnalysis._getJoinVars
*/
private void addVar(final TermNode t,
final Set> producedBindings, final boolean addAnonymous) {
if (t instanceof VarNode) {
if (addAnonymous || !((VarNode) t).isAnonymous()) {
producedBindings.add(((VarNode) t).getValueExpression());
}
} else if (t instanceof ConstantNode) {
final ConstantNode cNode = (ConstantNode) t;
final Constant> c = (Constant>) cNode.getValueExpression();
final IVariable> var = c.getVar();
if (var != null) {
producedBindings.add(var);
}
}
}
@Override
public String toString(int indent) {
final String s = indent(indent);
final StringBuilder sb = new StringBuilder();
sb.append("\n");
sb.append(s).append(getClass().getSimpleName());
sb.append("(left=").append(left()).append(", right=").append(right()).append(") {");
sb.append(subgroup().toString(indent+1));
sb.append("\n").append(s);
sb.append("}");
final Long rangeCount = (Long) getProperty(AST2BOpBase.Annotations.ESTIMATED_CARDINALITY);
if (rangeCount != null) {
sb.append(" AST2BOpBase.estimatedCardinality=");
sb.append(getProperty(AST2BOpBase.Annotations.ESTIMATED_CARDINALITY).toString());
}
return sb.toString();
}
@Override
public boolean isReorderable() {
// final long estCard = getEstimatedCardinality(null);
//
// return estCard >= 0 && estCard < Long.MAX_VALUE;
/*
* I think it's always better to allow this thing to be re-ordered.
* If it shares join variables with anything that is usually going
* to produce a better order, regardless of whether we can calculate
* the true cardinality of the underlying subgroup. Reordering by
* variable-sharing is better than not re-ordering it at all.
*/
return true;
}
@Override
public long getEstimatedCardinality(final StaticOptimizer opt) {
final JoinGroupNode group = subgroup();
long zeroMatchAdjustment = 0;
/*
* if lowerBound() is zero, and both ?s and ?o are
* variables then we (notionally) match
* any subject or object in the triple store,
* see:
*
* http://www.w3.org/TR/2013/REC-sparql11-query-20130321/#defn_evalPP_ZeroOrOnePath
*
* Despite this not being implemented, the optimizer does better
* knowing this correctly.
*/
if (lowerBound() == 0 ) {
int fixedCount = (left() instanceof VarNode ? 1 : 0) + (right() instanceof VarNode ? 1 : 0);
switch (fixedCount) {
case 0:
zeroMatchAdjustment = left().getValue().equals(right().getValue())?1:0;
break;
case 1:
zeroMatchAdjustment = 1;
break;
case 2:
zeroMatchAdjustment = Long.MAX_VALUE / 2;
// The following is more accurate, but more expensive and unnecessary.
// db.getURICount() + db.getBNodeCount();
// System.err.println("adj: "+zeroMatchAdjustment);
break;
}
}
if (log.isDebugEnabled()) {
log.debug("zma: " + zeroMatchAdjustment);
}
/*
* Normal simple ALP node will have the cardinality on the group.
*/
final long groupCard = group.getProperty(
AST2BOpBase.Annotations.ESTIMATED_CARDINALITY,
Long.MAX_VALUE);
if (groupCard < Long.MAX_VALUE) {
final long result = groupCard + zeroMatchAdjustment;
if (log.isDebugEnabled()) {
log.debug("reported cardinality: " + result);
}
return result;
}
/*
* Question mark ALP nodes with single statement pattern inherit the
* cardinality of the inner statement pattern, in case there is a
* single statement pattern
*/
if (group.arity() == 1 && upperBound()>=1 && upperBound() it =
BOpUtility.preOrderIteratorWithAnnotations(group);
while (it.hasNext()) {
final BOp bop = it.next();
if (log.isDebugEnabled()) {
log.debug("considering:\n"+bop);
}
if (!(bop instanceof StatementPatternNode)) {
if (log.isDebugEnabled()) {
log.debug("continuing");
}
continue;
}
final StatementPatternNode sp = (StatementPatternNode) bop;
if (!sp.getQueryHintAsBoolean(ASTALPServiceOptimizer.PATH_EXPR, false)) {
if (log.isDebugEnabled()) {
log.debug("continuing");
}
continue;
}
if (sp.getProperty(AST2BOpBase.Annotations.ESTIMATED_CARDINALITY) == null) {
if (log.isDebugEnabled()) {
log.debug("continuing");
}
continue;
}
final long estCard = sp.getProperty(
AST2BOpBase.Annotations.ESTIMATED_CARDINALITY,
Long.MAX_VALUE);
if (estCard == Long.MAX_VALUE) {
result = Long.MAX_VALUE;
}
if (result == Long.MAX_VALUE) {
if (log.isDebugEnabled()) {
log.debug("continuing");
}
continue;
}
result += estCard;
}
if (result > 0) {
if (log.isDebugEnabled()) {
log.debug("found a path expression");
}
result += zeroMatchAdjustment;
if (log.isDebugEnabled()) {
log.debug("reported cardinality: " + result);
}
return result;
}
if (log.isDebugEnabled()) {
log.debug("could not find a path expr");
}
/*
* Must be a complex alp like: ?x (/)* ?y
*/
/*
* We can't be certain of the exact cardinality, but we know if it
* shares variables with an ancestor it will probably still do better
* than a statement pattern with known cardinality that does not share
* any variables.
*/
return Long.MAX_VALUE / 2;
}
@Override
public Set> getRequiredBound(StaticAnalysis sa) {
return new HashSet>();
}
@Override
public Set> getDesiredBound(StaticAnalysis sa) {
return sa.getSpannedVariables(this, true, new HashSet>());
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy