com.bigdata.bop.IPredicate Maven / Gradle / Ivy
/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Jun 19, 2008
*/
package com.bigdata.bop;
import java.io.Serializable;
import com.bigdata.bop.ap.Predicate;
import com.bigdata.bop.ap.filter.BOpFilterBase;
import com.bigdata.bop.ap.filter.BOpTupleFilter;
import com.bigdata.bop.ap.filter.DistinctFilter;
import com.bigdata.bop.join.HTreeHashJoinOp;
import com.bigdata.bop.join.JVMHashJoinOp;
import com.bigdata.bop.join.PipelineJoin;
import com.bigdata.bop.joinGraph.IEvaluationPlan;
import com.bigdata.btree.IRangeQuery;
import com.bigdata.btree.ITuple;
import com.bigdata.btree.ITupleCursor;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.btree.filter.Advancer;
import com.bigdata.btree.filter.TupleFilter;
import com.bigdata.mdi.PartitionLocator;
import com.bigdata.rdf.spo.SPOKeyOrder;
import com.bigdata.relation.IRelation;
import com.bigdata.relation.accesspath.AccessPath;
import com.bigdata.relation.accesspath.ElementFilter;
import com.bigdata.relation.accesspath.IAccessPath;
import com.bigdata.relation.rule.IAccessPathExpander;
import com.bigdata.relation.rule.IRule;
import com.bigdata.relation.rule.eval.pipeline.JoinMasterTask;
import com.bigdata.service.ndx.IClientIndex;
import com.bigdata.striterator.IKeyOrder;
import cutthecrap.utils.striterators.FilterBase;
import cutthecrap.utils.striterators.IFilter;
/**
* An immutable constraint on the elements visited using an {@link IAccessPath}.
* The slots in the predicate corresponding to variables are named and those
* names establish binding patterns access {@link IPredicate}s. Access is
* provided to slots by ordinal index regardless of whether or not they are
* named variables.
*
* @author Bryan Thompson
* @version $Id$
*/
public interface IPredicate extends BOp, Cloneable, Serializable {
/**
* Interface declaring well known annotations.
*/
public interface Annotations extends BOp.Annotations, BufferAnnotations,
ILocatableResourceAnnotations {
/**
* Optional property overrides the {@link IKeyOrder} to use for
* the access path. Sometimes changing which index is used for a join
* can change the locality of the join and improve join performance.
*
* This property mostly makes sense for the quads mode of the database
* where there is typically more than one {@link SPOKeyOrder} which
* begins with the same key component. E.g., {@link SPOKeyOrder#POCS}
* versus {@link SPOKeyOrder#PCSO}.
*
* An {@link IKeyOrder} override can also make sense when reading on a
* fully bound access path if there is an alternative key order which
* might provide better temporal locality. For example, by reading on
* the same {@link IKeyOrder} as the previous join.
*
* WARNING: DO NOT override the {@link IKeyOrder}
* unless you are also taking control of (or taking into account) the
* join order -or- if a hash join will be used against the predicate.
* Failure to heed this advice can result in an extremely bad access
* path as the join ordering may cause the variables which are actually
* bound when the predicate is evaluated to be quite different from what
* you might otherwise expect.
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/150 (chosing
* the index for testing fully bound access paths based on index
* locality)
*/
String KEY_ORDER = IPredicate.class.getName() + ".keyOrder";
/**
* true
iff the predicate has SPARQL optional semantics
* (default {@value #DEFAULT_OPTIONAL}).
*
* Note: When a JOIN is associated with an optional predicate and there
* are {@link IConstraint}s attached to that join, the constraints are
* evaluated only for solutions which join. Solutions which do not join
* (or which join but fail the constraints) are output without testing
* the constraints a second time.
*
* Note: JOIN and SUBQUERY operators will route optional solutions to
* the altSink if the altSink is specified and to the default sink
* otherwise.
*/
String OPTIONAL = IPredicate.class.getName() + ".optional";
boolean DEFAULT_OPTIONAL = false;
// /**
// * Constraints on the elements read from the relation.
// *
// * @deprecated This is being replaced by two classes of filters. One
// * which is always evaluated local to the index and one
// * which is evaluated in the JVM in which the access path is
// * evaluated once the {@link ITuple}s have been resolved to
// * elements of the relation.
// *
// * This was historically an {@link IElementFilter}, which is
// * an {@link IFilterTest}. {@link #INDEX_LOCAL_FILTER} is an
// * {@link IFilter}. You can wrap the {@link IFilterTest} as
// * an {@link IFilter} using {@link ElementFilter}.
// */
// String CONSTRAINT = "constraint";
// /**
// * An optional {@link IConstraint}[] which places restrictions on the
// * legal patterns in the variable bindings.
// */
// String CONSTRAINTS = PipelineJoin.class.getName() + ".constraints";
/**
* An optional {@link IFilter} that will be evaluated local to the to
* the index. When the index is remote, the filter will be sent to the
* node on which the index resides and evaluated there. This makes it
* possible to efficiently filter out tuples which are not of interest
* for a given access path.
*
* Note: The filter MUST NOT change the type of data visited by the
* iterator - it must remain an {@link ITupleIterator}. An attempt to
* change the type of the visited objects will result in a runtime
* exception. This filter must be "aware" of the reuse of tuples within
* tuple iterators. See {@link BOpTupleFilter}, {@link TupleFilter} and
* {@link ElementFilter} for starting points.
*
* You can chain {@link BOpFilterBase} filters by nesting them inside of
* one another. You can chain {@link FilterBase} filters together as
* well.
*
* @see #ACCESS_PATH_FILTER
*
* @see IRangeQuery#rangeIterator(byte[], byte[], int, int, IFilter)
*/
String INDEX_LOCAL_FILTER = IPredicate.class.getName() + ".indexLocalFilter";
/**
* An optional {@link BOpFilterBase} to be applied to the elements of
* the relation as they are materialized from the index. {@link ITuple}s
* are automatically resolved into relation "elements" before this
* filter is applied.
*
* Unlike {@link #INDEX_LOCAL_FILTER}, this an
* {@link #ACCESS_PATH_FILTER} is never sent to a remote index for
* evaluation. This makes it possible to impose {@link DistinctFilter}
* across a {@link #REMOTE_ACCESS_PATH}.
*
* You can chain {@link BOpFilterBase} filters by nesting them inside of
* one another. You can chain {@link FilterBase} filters together as
* well.
*/
String ACCESS_PATH_FILTER = IPredicate.class.getName() + ".accessPathFilter";
/**
* Access path expander pattern. This allows you to wrap or replace the
* {@link IAccessPath}.
*
* Note: Access path expanders in scale-out are logically consistent
* when used with a {@link #REMOTE_ACCESS_PATH}. However, remote access
* paths often lack the performance of a local access path. In order for
* the expander to be consistent with a local access path it MUST NOT
* rewrite the predicate in such a manner as to read on data onto found
* on the shard onto which the predicate was mapped during query
* evaluation.
*
* In general, scale-out query depends on binding sets being mapped onto
* the shards against which they will read or write. When an expander
* changes the bindings on an {@link IPredicate}, it typically changes
* the access path which will be used. This is only supported when the
* access path is {@link #REMOTE_ACCESS_PATH}.
*
* Note: If an expander generates nested {@link IAccessPath}s then it
* typically must strip off both the {@link #ACCESS_PATH_EXPANDER} and
* the {@link #ACCESS_PATH_EXPANDER} from the {@link IPredicate} before
* generating the inner {@link IAccessPath} and then layer the
* {@link #ACCESS_PATH_FILTER} back onto the expanded visitation
* pattern.
*
* @see IAccessPathExpander
*/
String ACCESS_PATH_EXPANDER = IPredicate.class.getName() + ".accessPathExpander";
/**
* The partition identifier -or- -1
if the predicate does
* not address a specific shard.
*/
String PARTITION_ID = IPredicate.class.getName() + ".partitionId";
int DEFAULT_PARTITION_ID = -1;
/**
* Boolean option determines whether the predicate will use a data
* service local access path (partitioned index view) or a remote access
* path (global index view) (default
* {@value #DEFAULT_REMOTE_ACCESS_PATH}).
*
* Note: "Remote" has the semantics that the access path has a total
* view of the index. In scale-out this is achieved using RMI and an
* {@link IClientIndex}. "Local" has the semantics that the access path
* has a partitioned view of the index. In scale-out, this corresponds
* to a shard. In standalone, there is no difference between "local" and
* "remote" index views since the indices are not partitioned.
*
* Local access paths (in scale-out) are much more efficient and should
* be used for most purposes. However, it is not possible to impose
* certain kinds of filters on a partitioned index. For example, a
* DISTINCT filter requires a global index view.
*
* When the access path is local (aka partitioned), the parent operator
* must be annotated to use a {@link BOpEvaluationContext#SHARDED shard
* wise} or {@link BOpEvaluationContext#HASHED node-wise} mapping of the
* binding sets.
*
* Remote access paths (in scale-out) use a scale-out index view. This
* view makes the scale-out index appear as if it were monolithic rather
* than partitioned. The monolithic view of a scale-out index can be
* used to impose a DISTINCT filter since all tuples will flow back to
* the caller.
*
* When the access path is remote the parent operator should use
* {@link BOpEvaluationContext#ANY} to prevent the binding sets from
* being moved around when the access path is remote. Note that the
* {@link BOpEvaluationContext} is basically ignored for standalone
* since there is only one place for the operator to run - on the query
* controller.
*
* @see BOpEvaluationContext
*/
String REMOTE_ACCESS_PATH = IPredicate.class.getName() + ".remoteAccessPath";
boolean DEFAULT_REMOTE_ACCESS_PATH = true;
/**
* If the estimated rangeCount for an {@link AccessPath#iterator()} is
* LTE this threshold then use a fully buffered (synchronous) iterator.
* Otherwise use an asynchronous iterator whose capacity is governed by
* {@link #CHUNK_OF_CHUNKS_CAPACITY}.
*
* @see #DEFAULT_FULLY_BUFFERED_READ_THRESHOLD
*/
String FULLY_BUFFERED_READ_THRESHOLD = IPredicate.class.getName()
+ ".fullyBufferedReadThreshold";
/**
* Default for {@link #FULLY_BUFFERED_READ_THRESHOLD}.
*
* @todo Experiment with this. It should probably be something close to
* the branching factor, e.g., 100.
*/
int DEFAULT_FULLY_BUFFERED_READ_THRESHOLD = 100;//trunk=20*Bytes.kilobyte32;
/**
* Specify the {@link IRangeQuery} flags for the {@link IAccessPath} (
* default is {@link IRangeQuery#KEYS}, {@link IRangeQuery#VALS}).
*
* Note: Most access paths are read-only so it is nearly always a good
* idea to set the {@link IRangeQuery#READONLY} flag.
*
* Note: Access paths used to support high-level query can nearly always
* use {@link IRangeQuery#PARALLEL} iterator semantics, which permits
* the iterator to run in parallel across index partitions in scale-out.
* This flag only effects operations which use a global index view in
* scale-out ( pipeline joins do something different).
*
* Note: Some expanders may require the {@link IRangeQuery#CURSOR} flag.
* For example, {@link Advancer} patterns use an {@link ITupleCursor}
* rather than an {@link ITupleIterator}. However, since the cursors are
* slightly slower, they should only be specified when their
* semantics are necessary.
*
* @see #DEFAULT_FLAGS
*/
String FLAGS = IPredicate.class.getName() + ".flags";
/**
* The default flags will visit the keys and values of the non-deleted
* tuples.
*/
final int DEFAULT_FLAGS = IRangeQuery.KEYS | IRangeQuery.VALS
// | IRangeQuery.PARALLEL
;
// /**
// * Boolean property whose value is true
iff this operator
// * writes on a database.
// *
// * Most operators operate solely on streams of elements or binding sets.
// * Some operators read or write on the database using an access path,
// * which is typically described by an {@link IPredicate}. This property
// * MUST be true
when access path is used to write on the
// * database.
// *
// * Operators which read or write on the database must declare the
// * {@link Annotations#TIMESTAMP} associated with that operation.
// *
// * @see #TIMESTAMP
// */
// String MUTATION = IPredicate.class.getName() + ".mutation";
//
// boolean DEFAULT_MUTATION = false;
/**
* Note: This annotation is not currently integrated. It is intended to
* provide a means to constrain the key-range of the predicate based on
* an allowable value range for some slot in that predicate.
*/
String RANGE = IPredicate.class.getName() + ".range";
/**
* Limits the number of elements read from the access path for this
* predicate when used in a join ({@link HTreeHashJoinOp},
* {@link JVMHashJoinOp}, {@link PipelineJoin}). Note that this is
* limiting INPUT to a join, it says nothing about the output from
* a join.
*/
String CUTOFF_LIMIT = IPredicate.class.getName() + ".cutoffLimit";
/**
* Deault is to not cut off the join.
*/
final long DEFAULT_CUTOFF_LIMIT = Long.MAX_VALUE;
}
/**
* Resource identifier (aka namespace) identifies the {@link IRelation}
* associated with this {@link IPredicate}.
*
* @throws IllegalStateException
* if there is more than on element in the view.
*
* @see Annotations#RELATION_NAME
*
* @todo Rename as getRelationName()
*/
public String getOnlyRelationName();
/**
* Return the ith element of the relation view. The view is an ordered array
* of resource identifiers that describes the view for the relation.
*
* @param index
* The index into the array of relation names in the view.
*
* @deprecated Unions of predicates must be handled explicitly as a union of
* pipeline operators reading against the different predicate.
*/
public String getRelationName(int index);
/**
* The #of elements in the relation view.
*
* @deprecated per {@link #getRelationName(int)}.
*/
public int getRelationCount();
/**
* The index partition identifier and -1
if no partition
* identifier was specified.
*
* @return The index partition identifier -or- -1
if the
* predicate is not locked to a specific index partition.
*
* @see Annotations#PARTITION_ID
*
* @see PartitionLocator
* @see AccessPath
* @see JoinMasterTask
*/
public int getPartitionId();
/**
* Sets the index partition identifier constraint.
*
* @param partitionId
* The index partition identifier.
*
* @return The constrained {@link IPredicate}.
*
* @throws IllegalArgumentException
* if the index partition identified is a negative integer.
* @throws IllegalStateException
* if the index partition identifier was already specified.
* @see Annotations#PARTITION_ID
*/
public IPredicate setPartitionId(int partitionId);
/**
* true
iff the predicate is optional when evaluated as the
* right-hand side of a join. An optional predicate will match once after
* all matches in the data have been exhausted. By default, the match will
* NOT bind any variables that have been determined to be bound by the
* predicate based on the computed {@link IEvaluationPlan}.
*
* For mutation, some {@link IRelation}s may require that all variables
* appearing in the head are bound. This and similar constraints can be
* enforced using {@link IConstraint}s on the {@link IRule}.
*
* @return true
iff this predicate is optional when evaluating
* a JOIN.
*/
public boolean isOptional();
/**
* Returns the object that may be used to selectively override the
* evaluation of the predicate.
*
* @return The {@link IAccessPathExpander}.
*
* @see Annotations#ACCESS_PATH_EXPANDER
*
* @todo Replace with {@link IAccessPathExpander#getAccessPath(IAccessPath)}
* , which is the only method declared by {@link IAccessPathExpander}?
*/
public IAccessPathExpander getAccessPathExpander();
// /**
// * An optional constraint on the visitable elements.
// *
// * @see Annotations#CONSTRAINT
// *
// * @deprecated This is being replaced by two classes of filters. One which
// * is always evaluated local to the index and one which is
// * evaluated in the JVM in which the access path is evaluated
// * once the {@link ITuple}s have been resolved to elements of
// * the relation.
// */
// public IElementFilter getConstraint();
// /**
// * Return the optional {@link IConstraint}[] to be applied by a join which
// * evaluates this {@link IPredicate}.
// *
// * Note: The {@link Annotations#CONSTRAINTS} are annotated on the
// * {@link IPredicate} rather than the join operators so they may be used
// * with join graphs, which are expressed solely as an unordered set of
// * {@link IPredicate}s. Using join graphs, we are able to do nifty things
// * such as runtime query optimization which would not be possible if the
// * annotations were decorating the joins since we would be unable to
// * dynamically generate the join operators with the necessary annotations.
// *
// * @see Annotations#CONSTRAINTS
// */
// public IConstraint[] constraints();
/**
* Return the optional filter to be evaluated local to the index.
*
* @see Annotations#INDEX_LOCAL_FILTER
*/
public IFilter getIndexLocalFilter();
/**
* Return the optional filter to be evaluated once tuples have been
* converted into relation elements by the access path (local to the
* caller).
*
* @see Annotations#ACCESS_PATH_FILTER
*/
public IFilter getAccessPathFilter();
/**
* Return the {@link IKeyOrder} override for this {@link IPredicate} by the
* query optimizer.
*
* @return The assigned {@link IKeyOrder} or null
if the
* {@link IKeyOrder} was not overridden.
*
* @see Annotations#KEY_ORDER
*/
public IKeyOrder getKeyOrder();
// /**
// * Set the {@link IKeyOrder} annotation on the {@link IPredicate}, returning
// * a new {@link IPredicate} in which the annotation takes on the given
// * binding.
// *
// * @param keyOrder
// * The {@link IKeyOrder}.
// *
// * @return The new {@link IPredicate}.
// *
// * @see Annotations#KEY_ORDER
// */
// public IPredicate setKeyOrder(final IKeyOrder keyOrder);
/**
* Figure out if all positions in the predicate which are required to form
* the key for this access path are bound in the predicate.
*/
public boolean isFullyBound(IKeyOrder keyOrder);
/**
* @deprecated This is only used in a few places, which should probably use
* {@link BOpUtility#getArgumentVariableCount(BOp)} instead.
*/
public int getVariableCount();
/**
* The #of arguments in the predicate required for the specified
* {@link IKeyOrder} which are unbound.
*
* @param keyOrder
* The key order.
*
* @return The #of unbound arguments for that {@link IKeyOrder}.
*/
public int getVariableCount(IKeyOrder keyOrder);
/**
* Return true
if this is a remote access path.
*
* @see Annotations#REMOTE_ACCESS_PATH
*/
public boolean isRemoteAccessPath();
/**
* Return the variable or constant at the specified index.
*
* @param index
* The index.
*
* @return The variable or constant at the specified index.
*
* @throws IllegalArgumentException
* if the index is less than zero or GTE the {@link #arity()} of
* the {@link IPredicate}.
*/
/*
* Note: the return value can not be parameterized without breaking code.
*/
@SuppressWarnings("rawtypes")
public IVariableOrConstant get(int index);
/**
* Return the asBound value at the specified index for the given element.
* This method does not consider the bindings of the predicate instance.
*
* Note: there is no general means available to implement this method of an
* awareness of the internal structure of the element type. General purpose
* record types, such as GOM or relation records, can generally implement
* this method in the context of the "schema" imposed by the predicate.
*
* @param e
* The element, which must implement {@link IElement}.
* @param index
* The index.
*
* @return The value.
*
* @throws UnsupportedOperationException
* If this operation is not supported by the {@link IPredicate}
* implementation or for the given element type.
*
* @deprecated by {@link IElement#get(int)} which does exactly what this
* method is trying to do.
*/
public IConstant> get(E e, int index);
/**
* Return a new instance in which all occurrences of the given variable have
* been replaced by the specified constant.
*
* Note: The optimal {@link IKeyOrder} often changes when binding a variable
* on a predicate.
*
* @param var
* The variable.
* @param val
* The constant.
*
* @return A new instance of the predicate in which all occurrences of the
* variable have been replaced by the constant.
*
* @throws IllegalArgumentException
* if either argument is null
.
*/
public Predicate asBound(final IVariable> var, final IConstant> val);
/**
* Return a new instance in which all occurrences of the variable appearing
* in the binding set have been replaced by their bound values.
*
* Note: The optimal {@link IKeyOrder} often changes when binding a variable
* on a predicate.
*
* @param bindingSet
* The binding set.
*
* @return The as-bound {@link IPredicate} -or- null
if the
* {@link IPredicate} can not be unified with the
* {@link IBindingSet}.
*
* @see #815 (RDR query does too much work)
*/
public IPredicate asBound(IBindingSet bindingSet);
/**
* Extract the as bound value from the predicate. When the predicate is not
* bound at that index, the value of the variable is taken from the binding
* set.
*
* @param index
* The index into that predicate.
* @param bindingSet
* The binding set.
*
* @return The bound value -or- null
if no binding is available
* (the predicate is not bound at that index and the variable at
* that index in the predicate is not bound in the binding set).
*
* @throws IndexOutOfBoundsException
* unless the index is in [0:{@link #arity()-1], inclusive.
* @throws IllegalArgumentException
* if the bindingSet is null
.
*/
public Object asBound(int index, IBindingSet bindingSet);
/**
* A copy of this {@link IPredicate} in which the relationName(s)
* replace the existing set of relation name(s).
*
* @param relationName
* The relation name(s).
*
* @throws IllegalArgumentException
* if relationName is empty.
* @throws IllegalArgumentException
* if relationName is null
* @throws IllegalArgumentException
* if any element of relationName is null
*
* @deprecated This will be modified to use a scalar relation name per
* {@link #getOnlyRelationName()}.
*/
public IPredicate setRelationName(String[] relationName);
/**
* Representation of the predicate without variable bindings.
*/
public String toString();
/**
* Representation of the predicate with variable bindings.
*
* @param bindingSet
* The variable bindings
*/
public String toString(IBindingSet bindingSet);
/**
* Compares the bindings of two predicates for equality.
*
* @param other
* Another predicate.
*
* @return true iff the predicate have the same arity and their ordered
* bindings are the same. when both predicates have a variable at a
* given index, the names of the variables must be the same.
*/
public boolean equals(Object other);
/**
* The hash code is defined as
*
*
* get(0).hashCode()*31ˆ(n-1) + get(1).hashCode()*31ˆ(n-2) + ... + get(n-1).hashCode()
*
*
* using int
arithmetic, where n
is the
* {@link #arity()} of the predicate, and ^
indicates
* exponentiation.
*
* Note: This is similar to how {@link String#hashCode()} is defined.
*/
public int hashCode();
/**
* Sets the {@link com.bigdata.bop.BOp.Annotations#BOP_ID} annotation.
*
* @param bopId
* The bop id.
*
* @return The newly annotated {@link IPredicate}.
*/
public IPredicate setBOpId(int bopId);
/**
* Return a copy of this predicate with a different {@link IVariableOrConstant}
* for the arg specified by the supplied index parameter.
*/
@SuppressWarnings("rawtypes")
public IPredicate setArg(int index, IVariableOrConstant arg);
// /**
// * Return true
iff this operator is an access path which writes
// * on the database.
// *
// * @see Annotations#MUTATION
// */
// boolean isMutation();
/**
* The timestamp or transaction identifier on which the operator will read
* or write.
*
* @see Annotations#TIMESTAMP
*
* @throws IllegalStateException
* if {@link Annotations#TIMESTAMP} was not specified.
*/
long getTimestamp();
}