com.bigdata.rdf.sparql.ast.StatementPatternNode Maven / Gradle / Ivy

Go to download
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
package com.bigdata.rdf.sparql.ast;

import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.openrdf.query.algebra.StatementPattern.Scope;

import com.bigdata.bop.BOp;
import com.bigdata.bop.Constant;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.htree.HTree;
import com.bigdata.rdf.internal.constraints.RangeBOp;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpBase;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpJoins;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpUtility;
import com.bigdata.rdf.sparql.ast.optimizers.ASTGraphGroupOptimizer;
import com.bigdata.rdf.sparql.ast.optimizers.ASTRangeConstraintOptimizer;
import com.bigdata.rdf.sparql.ast.optimizers.ASTSimpleOptionalOptimizer;
import com.bigdata.rdf.sparql.ast.optimizers.StaticOptimizer;
import com.bigdata.rdf.spo.DistinctTermAdvancer;
import com.bigdata.rdf.spo.ISPO;
import com.bigdata.rdf.spo.SPOAccessPath;
import com.bigdata.relation.rule.eval.ISolution;
import com.bigdata.striterator.IKeyOrder;

/**
 * A node in the AST representing a statement pattern.
 * 
 * Note: The annotations on the class are mostly interpreted by the
 * toPredicate() method in {@link AST2BOpUtility} and by the logic in
 * {@link AST2BOpJoins} which handles the default and named graph access
 * patterns.
 * 

 * Note: If a variable is bound, then we bind that slot of the predicate. If a
 * variable can take some enumerated set of values, then we use an
 * {@link #INLINE} access path to model that "IN" constraint. If the value for a
 * variable must lie within some key range, then we handle that case using
 * {@link RangeBOp}. If we have no information about a variable, then we just
 * leave the variable unbound.
 */
public class StatementPatternNode extends
        GroupMemberNodeBase implements
        IJoinNode, IStatementContainer, IReorderableNode {

    private static final long serialVersionUID = 1L;

    public interface Annotations extends GroupMemberNodeBase.Annotations,
            IJoinNode.Annotations {

        /**
         * The {@link Scope} (required).
         * 
         * @see ASTGraphGroupOptimizer
         */
        String SCOPE = "scope";
        
        /**
         * Boolean flag indicates that the distinct solutions for the statement
         * pattern are required ({@value #DEFAULT_DISTINCT}).
         * 

         * Note: This is a hint that the {@link DistinctTermAdvancer} should be
         * used to visit the distinct {@link ISPO}s having a common prefix. This
         * is used for GRAPH ?g {}, which evaluates to all of the
         * named graphs in the database (if the named graphs were not explicitly
         * specified).
         * 

         * Note: For only partly historical reasons, this is not used to mark
         * default graph access. A default graph access path strips the context
         * and then applies a DISTINCT filter to the resulting triples.
         */
        String DISTINCT = "distinct";
        
        boolean DEFAULT_DISTINCT = false;
        
        /**
         * The existence of at least one solution will be verified otherwise the
         * solution will be failed. This turns into an iterator with a limit of
         * ONE (1) on the {@link SPOAccessPath}.
         * 

         * Note: This is used in combination with a join against an inline
         * access path for the named graphs. The "exists" statement pattern MUST
         * run after the access path which produces the variety since
         * it will be used to constrain that as-bound variety. This the join
         * order in query plan must look like:
         * 
         * 
         * (_,_,_,?g)[@INLINE,@IN(g,namedGraphs)] x (_,_,_,?g)[@EXISTS]
         * 
         * 
         * rather than
         * 
         *          * (_,_,_,?g)[@EXISTS] x (_,_,_,?g)[@INLINE,@IN(g,namedGraphs)]
         * 
         * 
         * as the latter will find only one solution for ?g.
         * 
         * @see https://sourceforge.net/apps/trac/bigdata/ticket/429
         *      (Optimization for GRAPH uri {} and GRAPH ?foo {})
         */
        String EXISTS = "exists";
        
        /**
         * The data for this access path is inline. The value of the attribute
         * is the column projection / solution set reference.
         * 
         * TODO A column projection is more efficient when we are handling
         * things like the named graphs or constraining the subquery for an
         * optional with multiple predicates. That column projection can be
         * modeled as IN(var,values). The IN filter could be
         * attached to {@link #FILTERS} or it could be the value of this
         * attribute.
         * 
         * We also have use cases for inline solution set access paths for use
         * with the samples materialized by the RTO. Those should be an
         * {@link HTree} and the data should be modeled as {@link ISolution}s.
         * (Note that some vertices may correspond to "bop fragment" joins, in
         * which case the can not be modeled as {@link ISPO}s.)
         * 

         * Both the column projection (IN) and the inline solution set (HTree)
         * are simpler access paths. They only support element visitation, a
         * full scan of the access path (this is the same as saying that there
         * are no join variables), or probing to find all solutions which join
         * on some join variable(s). This is in contrast to the
         * {@link SPOAccessPath}, which supports key-range constraints (prefix)
         * and range constraints (prefix with key range on a data type value).
         * 
         * @see https://sourceforge.net/apps/trac/bigdata/ticket/429
         *      (Optimization for GRAPH uri {} and GRAPH ?foo {})
         */
        String INLINE = "inline";
        
        /**
         * An optional attribute whose value is an {@link RangeBOp} which models
         * the key-range constraint on the access path. The {@link RangeBOp} is
         * used when there are filters which impose a GT/GTE and/or LT/LTE
         * restriction on the values which a variable may take on for that
         * access path.
         * 
         * TODO We should also handle datatype constraints on a variable here.
         * For example, if a variable is known to be numeric, or known to be
         * xsd:int, then we can immediately reject any bindings which would
         * violate that type constraint. To do this right, we need to notice
         * those type constraints and propagate them backwards in the plan so we
         * can reject bindings as early as possible. (In fact, we can also do a
         * range constraint which spans each of the datatypes which the variable
         * could take on. Datatype constraints and value range constraints are
         * very much related. The datatype constraint is effectively a value
         * range constraint allowing the entire value space for that datatype.
         * Likewise, a value range constraint must be applied across the UNION
         * of the allowable ground datatypes for the variable.)
         * 
         * @see ASTRangeConstraintOptimizer
         * 
         * @see https://sourceforge.net/apps/trac/bigdata/ticket/238 (lift range
         *      constraints onto AP)
         */
        String RANGE = "range";
        
        /**
		 * An optional annotation whose value is the variable which will be
		 * bound to the statement identifier for the matched statement patterns.
		 * The statement identifier is always formed from the subject, predicate
		 * and object (the triple). The context is NOT represented in the
		 * statement identifier. This keeps the semantics consistent with RDF
		 * reification.
		 * 
		 * @see 
		 *      Reification Done Right
		 */
        String SID = "sid";

		/**
		 * An optional annotation whose value is a variable which will become
		 * bound to the fast range count of the associated triple pattern.
		 * 
		 * @see  SELECT
		 *      COUNT(...) (DISTINCT|REDUCED) {single-triple-pattern} is slow.
		 *      
		 */
        String FAST_RANGE_COUNT_VAR = "fastRangeCountVar";
        
		/**
		 * An optional annotation whose value the variable that will be bound by
		 * a {@link DistinctTermAdvancer} layered over the access path.
		 * 
		 * @see  DISTINCT
		 *      PREDICATEs query is slow 
		 */
        String DISTINCT_TERM_SCAN_VAR = "distinctTermScanVar";
        
    }
    
    /**
     * Constructor required for {@link com.bigdata.bop.BOpUtility#deepCopy(FilterNode)}.
     */
    public StatementPatternNode(final StatementPatternNode op) {

        super(op);
        
    }

    /**
     * Required shallow copy constructor.
     */
    public StatementPatternNode(final BOp[] args, final Map anns) {

        super(args, anns);

    }

    /**
     * A triple pattern. The {@link Scope} will be
     * {@link Scope#DEFAULT_CONTEXTS}, the context will be null.
     * 
     * @param s
     * @param p
     * @param o
     * 
     * @see StatementPatternNode#StatementPatternNode(TermNode, TermNode,
     *      TermNode, TermNode, Scope)
     */
    public StatementPatternNode(final TermNode s, final TermNode p,
            final TermNode o) {

        this(s, p, o, null/* context */, Scope.DEFAULT_CONTEXTS);

    }

    /**
     * A quad pattern.
     * 

     * Note: When a {@link StatementPatternNode} appears in a WHERE clause, the
     * {@link Scope} should be marked as {@link Scope#DEFAULT_CONTEXTS} if it is
     * NOT embedded within a GRAPH clause and otherwise as
     * {@link Scope#NAMED_CONTEXTS}.
     * 

     * The context position of the statement should be null unless
     * it is embedded within a GRAPH clause, in which case the context is the
     * context specified for the parent GRAPH clause.
     * 

     * The SPARQL UPDATE WITH uri is a syntactic sugar for
     * GRAPH uri {...}. Therefore, when present, any
     * {@link StatementPatternNode} outside of an explicit GRAPH group is also
     * marked as {@link Scope#NAMED_CONTEXTS} and the context position will be
     * bound to the uri specified in the WITH clause.
     * 

     * A null context in {@link Scope#DEFAULT_CONTEXTS} is
     * interpreted as the RDF merge of the graphs in the defaultGraph (as
     * specified by the {@link DatasetNode}). When non-null (it can
     * be bound by the SPARQL UPDATE WITH clause), the defaultGraph
     * declared by the {@link DatasetNode} is ignored and the context is bound
     * to the constant specified in that WITH clause.
     * 

     * Absent any other constraints on the query, an unbound variable context in
     * {@link Scope#NAMED_CONTEXTS} may be bound to any named graph specified by
     * the {@link DatasetNode}.
     * 
     * @param s
     *            The subject (variable or constant; required).
     * @param p
     *            The subject (variable or constant; required).
     * @param o
     *            The subject (variable or constant; required).
     * @param c
     *            The context (variable or constant; optional).
     * @param scope
     *            Either {@link Scope#DEFAULT_CONTEXTS} or
     *            {@link Scope#NAMED_CONTEXTS} (required).
     * 
     * @throws IllegalArgumentException
     *             if s, p, or o is null.
     * @throws IllegalArgumentException
     *             if scope is null.
     * @throws IllegalArgumentException
     *             if scope is {@link Scope#NAMED_CONTEXTS} and c
     *             is null.
     */
    public StatementPatternNode(final TermNode s, final TermNode p,
            final TermNode o, final TermNode c, final Scope scope) {

        super(new BOp[] { s, p, o, c }, scope == null ? null/* anns */: NV
                .asMap(new NV(Annotations.SCOPE, scope)));

        if (scope == null)
            throw new IllegalArgumentException();
        
		if (s == null || p == null || o == null)
		    throw new IllegalArgumentException();

        if (scope == Scope.NAMED_CONTEXTS && c == null)
            throw new IllegalArgumentException();
		
	}

	/**
	 * The variable or constant for the subject position (required).
	 */
    final public TermNode s() {

        return (TermNode) get(0);

    }

	/**
	 * The variable or constant for the predicate position (required).
	 */
    final public TermNode p() {

        return (TermNode) get(1);

    }

	/**
	 * The variable or constant for the object position (required).
	 */
    final public TermNode o() {

        return (TermNode) get(2);

    }

	/**
	 * The variable or constant for the context position (required iff in quads
	 * mode).
	 */
    final public TermNode c() {

        return (TermNode) get(3);

    }
    
    /**
     * Strengthen return type.
     */
    @Override
    public TermNode get(final int i) {
    	
    	return (TermNode) super.get(i);
    	
    }

	final public void setC(final TermNode c) {

		this.setArg(3, c);
		
    }
    
	/**
	 * The statement identifier variable for triples which match this statement
	 * pattern (optional). The statement identifier is the composition of the
	 * (subject, predicate, and object) positions of the matched statements.
	 * 
	 * @see Annotations#SID
	 */
	final public VarNode sid() {

		return (VarNode) getProperty(Annotations.SID);

    }

	/**
	 * Set the SID variable.
	 */
	final public void setSid(final VarNode sid) {

		setProperty(Annotations.SID, sid);

	}
    
    /**
     * The scope for this statement pattern (either named graphs or default
     * graphs).
     * 
     * @see Annotations#SCOPE
     * @see Scope
     */
    final public Scope getScope() {

        return (Scope) getRequiredProperty(Annotations.SCOPE);
        
    }

	final public void setScope(final Scope scope) {

		if (scope == null)
			throw new IllegalArgumentException();
    	
		setProperty(Annotations.SCOPE, scope);
    	
    }

	/**
	 * Return the {@link VarNode} associated with the optional
	 * {@link Annotations#FAST_RANGE_COUNT_VAR} property.
	 * 
	 * @return The {@link VarNode} -or- null if this triple pattern
	 *         is not associated with that annotation.
	 */
	final public VarNode getFastRangeCountVar() {
		
		return (VarNode) getProperty(Annotations.FAST_RANGE_COUNT_VAR);
		
	}
	
	final public void setFastRangeCount(final VarNode var) {

		if (var == null)
			throw new IllegalArgumentException();

		setProperty(Annotations.FAST_RANGE_COUNT_VAR, var);

	}

	/**
	 * Return the variable that will be bound by the
	 * {@link DistinctTermAdvancer} pattern.
	 * 
	 * @return The distinct term scan variable -or- null if the
	 *         access path will not use a distinct term scan.
	 * 
	 * @see Annotations#DISTINCT_TERM_SCAN_VAR
	 */
	final public VarNode getDistinctTermScanVar() {

		return (VarNode) getProperty(Annotations.DISTINCT_TERM_SCAN_VAR);

	}

	final public void setDistinctTermScanVar(final VarNode var) {

		setProperty(Annotations.DISTINCT_TERM_SCAN_VAR, var);

	}
    
    /**
     * {@inheritDoc}
     * 

     * This returns true iff the {@link StatementPatternNode} was
     * lifted out of an optional {@link JoinGroupNode} such that it has OPTIONAL
     * semantics.
     * 
     * @see ASTSimpleOptionalOptimizer
     */
    @Override
    final public boolean isOptional() {

        return getProperty(Annotations.OPTIONAL, Annotations.DEFAULT_OPTIONAL);

    }    

    /**
     * Returns false.
     */
    @Override
    final public boolean isMinus() {
     
        return false;
        
    }
    
    /**
     * Mark this {@link StatementPatternNode} as one which was lifted out of a
     * "simple optional" group and which therefore has "optional" semantics (we
     * will do an optional join for it).
     * 
     * Note: The need to maintain the correct semantics for the simple optional
     * group (statement pattern plus filter(s)) is also the reason why the
     * lifted FILTER(s) MUST NOT require the materialization of any variables
     * which would not have been bound before that JOIN. Since variables bound
     * by the JOIN for the optional statement pattern will not be materialized,
     * filters attached to that JOIN can not require materialization of
     * variables bound by the JOIN (though they can depend on variables already
     * bound by the required joins in the parent group).
     * 
     * @see ASTSimpleOptionalOptimizer
     */
    final public void setOptional(final boolean optional) {

        setProperty(Annotations.OPTIONAL, optional);

    }
    
    /**
     * Attach a {@link RangeNode} that describes a range for the statement
     * pattern's O value.
     * 
     * @param range
     */
    final public void setRange(final RangeNode range) {
    	
        setProperty(Annotations.RANGE, range);
    	
    }
    
    final public RangeNode getRange() {
    	
        return (RangeNode) getProperty(Annotations.RANGE);
    	
    }

    @Override
    final public List getAttachedJoinFilters() {

        @SuppressWarnings("unchecked")
        final List filters = (List) getProperty(Annotations.FILTERS);

        if (filters == null) {

            return Collections.emptyList();

        }

        return Collections.unmodifiableList(filters);

    }

    @Override
    final public void setAttachedJoinFilters(final List filters) {

        setProperty(Annotations.FILTERS, filters);

    }

    /**
     * Return true if none of s, p, o, or c is a variable.
     */
    public boolean isGround() {

        if (s() instanceof VarNode)
            return false;

        if (p() instanceof VarNode)
            return false;
        
        if (o() instanceof VarNode)
            return false;
        
        if (c() instanceof VarNode)
            return false;
        
        return true;
        
    }

    /**
     * Return the variables used by the predicate - i.e. what this node will
     * attempt to bind when run.
     */
    public Set> getProducedBindings() {

        final Set> producedBindings = new LinkedHashSet>();

        final TermNode s = s();
        final TermNode p = p();
        final TermNode o = o();
        final TermNode c = c();

        addProducedBindings(s, producedBindings);
        addProducedBindings(p, producedBindings);
        addProducedBindings(o, producedBindings);
        addProducedBindings(c, producedBindings);
        
        return producedBindings;

    }
    
    /**
     * This handles the special case where we've wrapped a Var with a Constant
     * because we know it's bound, perhaps by the exogenous bindings. If we
     * don't handle this case then we get the join vars wrong.
     * 
     * @see StaticAnalysis._getJoinVars
     */
    private void addProducedBindings(final TermNode t,
            final Set> producedBindings) {

        if (t instanceof VarNode) {

            producedBindings.add(((VarNode) t).getValueExpression());

        } else if (t instanceof ConstantNode) {

            final ConstantNode cNode = (ConstantNode) t;
            final Constant c = (Constant) cNode.getValueExpression();
            final IVariable var = c.getVar();
            if (var != null) {
                producedBindings.add(var);
            }

        }

    }

    @Override
	public String toString(final int indent) {
		
	    final StringBuilder sb = new StringBuilder();

        sb.append("\n").append(indent(indent)).append(toShortString());

        final List filters = getAttachedJoinFilters();
        if(!filters.isEmpty()) {
            for (FilterNode filter : filters) {
                sb.append(filter.toString(indent + 1));
            }
        }

        if (getQueryHints() != null && !getQueryHints().isEmpty()) {
            sb.append("\n");
            sb.append(indent(indent + 1));
            shortenName(sb, Annotations.QUERY_HINTS);
            sb.append("=");
            sb.append(getQueryHints().toString());
        }
        
        final Long rangeCount = (Long) getProperty(AST2BOpBase.Annotations.ESTIMATED_CARDINALITY);

        final IKeyOrder keyOrder = (IKeyOrder) getProperty(AST2BOpBase.Annotations.ORIGINAL_INDEX);

        if (rangeCount != null) {
            sb.append("\n");
            sb.append(indent(indent + 1));
            shortenName(sb, AST2BOpBase.Annotations.ESTIMATED_CARDINALITY);
            sb.append("=");
            sb.append(rangeCount.toString());
        }

        if (keyOrder != null) {
            sb.append("\n");
            sb.append(indent(indent + 1));
            shortenName(sb, AST2BOpBase.Annotations.ORIGINAL_INDEX);
            sb.append("=");
            sb.append(keyOrder.toString());
        }

        return sb.toString();
		
	}

	@Override
    public String toShortString() {
        
	    final StringBuilder sb = new StringBuilder();

	    final Integer id = (Integer)getProperty(BOp.Annotations.BOP_ID);
        sb.append("StatementPatternNode");
        if (id != null) {
            sb.append("[").append(id.toString()).append("]");
        }
        sb.append("(");
        sb.append(s()).append(", ");
        sb.append(p()).append(", ");
        sb.append(o());

        final TermNode c = c();
        if (c != null) {
            sb.append(", ").append(c);
        }

        sb.append(")");
        
		final VarNode sid = sid();
		if (sid != null) {
			sb.append(" [sid=" + sid + "]");
		}

		final Scope scope = getScope();
		if (scope != null) {
			sb.append(" [scope=" + scope + "]");
		}

		final VarNode fastRangeCountVar = getFastRangeCountVar();
		if (fastRangeCountVar != null) {
			sb.append(" [fastRangeCount=" + fastRangeCountVar + "]");
		}

		final VarNode distinctTermScanVar = getDistinctTermScanVar();
		if (distinctTermScanVar != null) {
			sb.append(" [distinctTermScan=" + distinctTermScanVar + "]");
		}

        if(isOptional()) {
            sb.append(" [optional]");
        }

        if (!getAttachedJoinFilters().isEmpty()) {
            sb.append(" [#filters=" + getAttachedJoinFilters().size() + "]");
        }

        return sb.toString();
    }

	/* (non-Javadoc)
	 * @see com.bigdata.rdf.sparql.ast.IReorderableNode#isReorderable()
	 */
	@Override
	public boolean isReorderable() {
		
		return !isOptional();
		
	}

	/* (non-Javadoc)
	 * @see com.bigdata.rdf.sparql.ast.IReorderableNode#getEstimatedCardinality()
	 */
	@Override
	public long getEstimatedCardinality(StaticOptimizer opt) {
        
		return getProperty(AST2BOpBase.Annotations.ESTIMATED_CARDINALITY, -1l);
        
	}
	
   @Override
   public Set> getRequiredBound(StaticAnalysis sa) {
      return new HashSet>();
   }

   @Override
   public Set> getDesiredBound(StaticAnalysis sa) {
      return sa.getSpannedVariables(this, true, new HashSet>());
   }
   
   

}