All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.rdf.sparql.ast.ArbitraryLengthPathNode Maven / Gradle / Ivy

/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
package com.bigdata.rdf.sparql.ast;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.log4j.Logger;

import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.Constant;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.rdf.sparql.ast.PathNode.PathMod;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpBase;
import com.bigdata.rdf.sparql.ast.optimizers.ASTALPServiceOptimizer;
import com.bigdata.rdf.sparql.ast.optimizers.StaticOptimizer;

/**
 * A special kind of AST node that represents the SPARQL 1.1 arbitrary length
 * path operator. This node has a single child arg - a JoinGroupNode consisting
 * of other operators (the path) that must be run to fixed point. This node also
 * has several annotations that define the schematics (the left and right sides
 * and the lower and upper bounds) of the arbitrary length path.
 */
public class ArbitraryLengthPathNode 
	extends GroupMemberNodeBase 
		implements IBindingProducerNode, IReorderableNode {

    private static final transient Logger log = Logger.getLogger(ArbitraryLengthPathNode.class);
    
    /**
     * 
     */
    private static final long serialVersionUID = 1L;

    public interface Annotations extends GroupNodeBase.Annotations {

    	/**
    	 * The left term - can be a variable or a constant.
    	 */
        String LEFT_TERM = Annotations.class.getName() + ".leftTerm";

    	/**
    	 * The right term - can be a variable or a constant.
    	 */
        String RIGHT_TERM = Annotations.class.getName() + ".rightTerm";

        /**
         * The left transitivity variable.
         */
        String TRANSITIVITY_VAR_LEFT = Annotations.class.getName() + ".transitivityVarLeft";

        /**
         * The right transitivity variable.
         */
        String TRANSITIVITY_VAR_RIGHT = Annotations.class.getName() + ".transitivityVarRight";
        
        /**
         * The lower bound on the number of rounds to run.  Can be zero (0) or
         * one (1).  A lower bound of zero is a special kind of path - the
         * Zero Length Path.  A zero length path connects a vertex to itself
         * (in graph parlance).  In the context of arbitrary length paths it
         * means we bind the input onto the output regardless of whether they
         * are actually connected via the path or not.
         */
        String LOWER_BOUND =  Annotations.class.getName() + ".lowerBound";

        /**
         * The upper bound on the number of rounds to run.
         */
        String UPPER_BOUND =  Annotations.class.getName() + ".upperBound";
        
        /**
         * The middle term - can be a variable or a constant.
         */
        String MIDDLE_TERM = Annotations.class.getName() + ".middleTerm";

        /**
         * The variable representing the visited edge. Bound using the binding 
         * from the middle term. Only used by ALP service when projecting edges.
         */
        String EDGE_VAR = Annotations.class.getName() + ".edgeVar";
        
        /**
         * A set of intermediate variables (VarNodes) used by the ALP node
         * that should be dropped from the solutions after each round.
         */
        String DROP_VARS = Annotations.class.getName() + ".dropVars";
        
    }
	
    /**
     * Constructor required for {@link com.bigdata.bop.BOpUtility#deepCopy(FilterNode)}.
     */
    public ArbitraryLengthPathNode(ArbitraryLengthPathNode op) {

        super(op);
        
    }

    /**
     * Required shallow copy constructor.
     */
    public ArbitraryLengthPathNode(BOp[] args, Map anns) {

        super(args, anns);

        subgroup().setSubgroupOfALPNode( true );
    }

    /**
     * Fully construct an arbitrary length path node with all required
     * annotations.
     */
    public ArbitraryLengthPathNode(final TermNode left, final TermNode right, 
    		final VarNode tVarLeft, final VarNode tVarRight,
    		final PathMod mod) {
    	this(new BOp[] { new JoinGroupNode() }, NV.asMap(
    			new NV(Annotations.LEFT_TERM, left),
    			new NV(Annotations.RIGHT_TERM, right),
    			new NV(Annotations.TRANSITIVITY_VAR_LEFT, tVarLeft),
    			new NV(Annotations.TRANSITIVITY_VAR_RIGHT, tVarRight),
    			new NV(Annotations.LOWER_BOUND, mod == PathMod.ONE_OR_MORE ? 1L : 0L),
    			new NV(Annotations.UPPER_BOUND, mod == PathMod.ZERO_OR_ONE ? 1L : Long.MAX_VALUE)
    			));
    	
    	final Set dropVars = new LinkedHashSet<>();
    	dropVars.add(tVarLeft);
        dropVars.add(tVarRight);
    	setProperty(Annotations.DROP_VARS, dropVars);
    }
    
    /**
     * Fully construct an arbitrary length path node with all required
     * annotations.
     */
    public ArbitraryLengthPathNode(final TermNode left, final TermNode right, 
            final VarNode tVarLeft, final VarNode tVarRight,
            final long lowerBound, final long upperBound) {
        this(new BOp[] { new JoinGroupNode() }, NV.asMap(
                new NV(Annotations.LEFT_TERM, left),
                new NV(Annotations.RIGHT_TERM, right),
                new NV(Annotations.TRANSITIVITY_VAR_LEFT, tVarLeft),
                new NV(Annotations.TRANSITIVITY_VAR_RIGHT, tVarRight),
                new NV(Annotations.DROP_VARS, new ArrayList()),
                new NV(Annotations.LOWER_BOUND, lowerBound),
                new NV(Annotations.UPPER_BOUND, upperBound)
                ));             
        
        final Set dropVars = new LinkedHashSet<>();
        dropVars.add(tVarLeft);
        dropVars.add(tVarRight);
        setProperty(Annotations.DROP_VARS, dropVars);
    }
    
    /**
     * Returns the left term.
     */
    public TermNode left() {
    	return (TermNode) super.getRequiredProperty(Annotations.LEFT_TERM);
    }
    
    /**
     * Returns the right term.
     */
    public TermNode right() {
    	return (TermNode) super.getRequiredProperty(Annotations.RIGHT_TERM);
    }
    
    /**
     * Return the left transitivity var.
     */
    public VarNode tVarLeft() {
    	return (VarNode) super.getRequiredProperty(Annotations.TRANSITIVITY_VAR_LEFT);
    }
    
    /**
     * Return the right transitivity var.
     */
    public VarNode tVarRight() {
    	return (VarNode) super.getRequiredProperty(Annotations.TRANSITIVITY_VAR_RIGHT);
    }
    
    /**
     * Return the lower bound.
     */
    public long lowerBound() {
    	return (Long) super.getRequiredProperty(Annotations.LOWER_BOUND);
    }
    
    /**
     * Return the upper bound.
     */
    public long upperBound() {
    	return (Long) super.getRequiredProperty(Annotations.UPPER_BOUND);
    }
    
    /**
     * Returns the (optional) middle term.
     */
    public TermNode middle() {
        return (VarNode) super.getProperty(Annotations.MIDDLE_TERM);
    }
    
    /**
     * Return the (optional) edge var.
     */
    public VarNode edgeVar() {
        return (VarNode) super.getProperty(Annotations.EDGE_VAR);
    }
    
    /**
     * Set the edge var and middle term.  Only used by the ALP service when 
     * projecting edges.
     */
    public void setEdgeVar(final VarNode edgeVar, final TermNode middle) {
        setProperty(Annotations.MIDDLE_TERM, middle);
        setProperty(Annotations.EDGE_VAR, edgeVar);
    }
    
    /**
     * Set the vars that should be dropped after each round.
     * 
     * @see Annotations#DROP_VARS
     */
    public void setDropVars(final Set dropVars) {
        super.setProperty(Annotations.DROP_VARS, dropVars);
    }
    
    /**
     * Add a var that should be dropped after each round.
     * 
     * @see Annotations#DROP_VARS
     */
    public void addDropVar(final VarNode dropVar) {
        dropVars().add(dropVar);
    }
    
    /**
     * Get the vars that should be dropped after each round.
     * 
     * @see Annotations#DROP_VARS
     */
    @SuppressWarnings("unchecked")
    public Set dropVars() {
        return (Set) super.getProperty(Annotations.DROP_VARS);
    }

    /**
     * Return the subgroup.
     */
    public JoinGroupNode subgroup() {
    	return (JoinGroupNode) get(0);
    }
    
    /**
     * Return the variables bound by the path - i.e. what this node will
     * attempt to bind when run.
     */
    public Set> getMaybeProducedBindings() {

        final Set> producedBindings = getDefinitelyProducedBindings();

        for (StatementPatternNode sp : subgroup().getStatementPatterns()) {
            addProducedBinding(sp.s(), producedBindings);
            addProducedBinding(sp.p(), producedBindings);
            addProducedBinding(sp.o(), producedBindings);
            addProducedBinding(sp.c(), producedBindings);
        }
        
        return producedBindings;

    }
    
    /**
     * Return the variables bound by the path - i.e. what this node will
     * attempt to bind when run.
     */
    public Set> getDefinitelyProducedBindings() {

        final Set> producedBindings = new LinkedHashSet>();

        addVar(left(), producedBindings, true);
        addVar(right(), producedBindings, true);
        
        final VarNode edgeVar = edgeVar();
        if (edgeVar != null) {
            addProducedBinding(edgeVar, producedBindings);
        }
        
        return producedBindings;

    }
    
    /**
     * Return the set of variables used by this ALP node (statement pattern
     * terms and inside filters). Used to determine what needs to be projected
     * into the op.
     */
    public Set> getUsedVars() {
        
        final Set> used = getDefinitelyProducedBindings();
        
        for (StatementPatternNode sp : subgroup().getStatementPatterns()) {
            addUsedVar(sp.s(), used);
            addUsedVar(sp.p(), used);
            addUsedVar(sp.o(), used);
            addUsedVar(sp.c(), used);
            for (FilterNode filter : sp.getAttachedJoinFilters()) {
                final Iterator it = BOpUtility.preOrderIteratorWithAnnotations(filter);
                while (it.hasNext()) {
                    final BOp bop = it.next();
                    if (bop instanceof TermNode) {
                        addUsedVar((TermNode) bop, used);
                    }
                }
            }
        }
        for (FilterNode filter : subgroup().getChildren(FilterNode.class)) {
            final Iterator it = BOpUtility.preOrderIteratorWithAnnotations(filter);
            while (it.hasNext()) {
                final BOp bop = it.next();
                if (bop instanceof TermNode) {
                    addUsedVar((TermNode) bop, used);
                }
            }
        }
        
        return used;
        
    }

    private void addUsedVar(final TermNode t, 
            final Set> vars) {
        
        addVar(t, vars, true);
        
    }
    
    private void addProducedBinding(final TermNode t, 
            final Set> producedBindings) {
        
        addVar(t, producedBindings, false);
        
    }
    
    /**
     * This handles the special case where we've wrapped a Var with a Constant
     * because we know it's bound, perhaps by the exogenous bindings.  If we
     * don't handle this case then we get the join vars wrong.
     * 
     * @see StaticAnalysis._getJoinVars
     */
    private void addVar(final TermNode t, 
            final Set> producedBindings, final boolean addAnonymous) {
    	
    	if (t instanceof VarNode) {
    		
    	    if (addAnonymous || !((VarNode) t).isAnonymous()) {
    	        producedBindings.add(((VarNode) t).getValueExpression());
    	    }
            
    	} else if (t instanceof ConstantNode) {
    		
    		final ConstantNode cNode = (ConstantNode) t;
    		final Constant c = (Constant) cNode.getValueExpression();
    		final IVariable var = c.getVar();
    		if (var != null) {
    			producedBindings.add(var);
    		}
    		
    	}
    	
    }

	@Override
	public String toString(int indent) {

		final String s = indent(indent);
        
        final StringBuilder sb = new StringBuilder();
        sb.append("\n");
        sb.append(s).append(getClass().getSimpleName());
        sb.append("(left=").append(left()).append(", right=").append(right()).append(") {");
        sb.append(subgroup().toString(indent+1));
        sb.append("\n").append(s);
        sb.append("}");

        final Long rangeCount = (Long) getProperty(AST2BOpBase.Annotations.ESTIMATED_CARDINALITY);

        if (rangeCount != null) {
            sb.append(" AST2BOpBase.estimatedCardinality=");
            sb.append(getProperty(AST2BOpBase.Annotations.ESTIMATED_CARDINALITY).toString());
        }
        
        return sb.toString();

	}

	@Override
	public boolean isReorderable() {

//		final long estCard = getEstimatedCardinality(null);
//		
//		return estCard >= 0 && estCard < Long.MAX_VALUE;

	    /*
	     * I think it's always better to allow this thing to be re-ordered.
	     * If it shares join variables with anything that is usually going
	     * to produce a better order, regardless of whether we can calculate
	     * the true cardinality of the underlying subgroup.  Reordering by
	     * variable-sharing is better than not re-ordering it at all.
	     */
	    return true;
	    
	}

	@Override
	public long getEstimatedCardinality(final StaticOptimizer opt) {
		
		final JoinGroupNode group = subgroup();
		
		long zeroMatchAdjustment = 0;
		/*
		 * if lowerBound() is zero, and both ?s and ?o are
		 * variables then we (notionally) match
		 * any subject or object in the triple store,
		 * see:
		 * 
		 * http://www.w3.org/TR/2013/REC-sparql11-query-20130321/#defn_evalPP_ZeroOrOnePath
		 * 
		 * Despite this not being implemented, the optimizer does better
		 * knowing this correctly.
		 */
		if (lowerBound() == 0 ) {
			int fixedCount = (left() instanceof VarNode ? 1 : 0) + (right() instanceof VarNode ? 1 : 0);
			switch (fixedCount) {
			case 0:
				zeroMatchAdjustment = left().getValue().equals(right().getValue())?1:0;
				break;
			case 1:
				zeroMatchAdjustment = 1;
				break;
			case 2:
				zeroMatchAdjustment =  Long.MAX_VALUE / 2;
				// The following is more accurate, but more expensive and unnecessary.
				// db.getURICount() + db.getBNodeCount(); 
//				System.err.println("adj: "+zeroMatchAdjustment);
				break;
			}
		}
		
        if (log.isDebugEnabled()) {
            log.debug("zma: " + zeroMatchAdjustment);
        }
		
        /*
         * Normal simple ALP node will have the cardinality on the group.
         */
        final long groupCard = group.getProperty(
              AST2BOpBase.Annotations.ESTIMATED_CARDINALITY, 
              Long.MAX_VALUE);
        
        if (groupCard < Long.MAX_VALUE) {
            
            final long result = groupCard + zeroMatchAdjustment;
            if (log.isDebugEnabled()) {
                log.debug("reported cardinality: " + result);
            }
            return result;
            
        }
        
        /*
         * Question mark ALP nodes with single statement pattern inherit the
         * cardinality of the inner statement pattern, in case there is a
         * single statement pattern 
         */
        if (group.arity() == 1 && upperBound()>=1 && upperBound() it = 
                BOpUtility.preOrderIteratorWithAnnotations(group);

        while (it.hasNext()) {
            final BOp bop = it.next();
            
            if (log.isDebugEnabled()) {
                log.debug("considering:\n"+bop);
            }
            
            if (!(bop instanceof StatementPatternNode)) {
                if (log.isDebugEnabled()) {
                    log.debug("continuing");
                }
                continue;
            }
            final StatementPatternNode sp = (StatementPatternNode) bop;
            
            if (!sp.getQueryHintAsBoolean(ASTALPServiceOptimizer.PATH_EXPR, false)) {
                if (log.isDebugEnabled()) {
                    log.debug("continuing");
                }
                continue;
            }
            
            if (sp.getProperty(AST2BOpBase.Annotations.ESTIMATED_CARDINALITY) == null) {
                if (log.isDebugEnabled()) {
                    log.debug("continuing");
                }
                continue;
            }
                
            final long estCard = sp.getProperty(
                    AST2BOpBase.Annotations.ESTIMATED_CARDINALITY, 
                    Long.MAX_VALUE);
            
            if (estCard == Long.MAX_VALUE) {
                result = Long.MAX_VALUE;
            }
                
            if (result == Long.MAX_VALUE) {
                if (log.isDebugEnabled()) {
                    log.debug("continuing");
                }
                continue;
            }

            result += estCard;
            
        }
        
        if (result > 0) {
            
            if (log.isDebugEnabled()) {
                log.debug("found a path expression");
            }
            
            result += zeroMatchAdjustment;
            
            if (log.isDebugEnabled()) {
                log.debug("reported cardinality: " + result);
            }
            
            return result;
            
        }
        
        if (log.isDebugEnabled()) {
            log.debug("could not find a path expr");
        }
        
        /*
         * Must be a complex alp like: ?x (/)* ?y
         */
	    /*
	     * We can't be certain of the exact cardinality, but we know if it 
	     * shares variables with an ancestor it will probably still do better
	     * than a statement pattern with known cardinality that does not share
	     * any variables.
	     */
        return Long.MAX_VALUE / 2;
        
	}

   @Override
   public Set> getRequiredBound(StaticAnalysis sa) {
      return new HashSet>();
   }

   @Override
   public Set> getDesiredBound(StaticAnalysis sa) {
      return sa.getSpannedVariables(this, true, new HashSet>());
   }    
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy