![JAR search and dependency download from the Maven repository](/logo.png)
com.bigdata.bop.join.DistinctTermScanOp Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Aug 25, 2010
*/
package com.bigdata.bop.join;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.Constant;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IConstraint;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.bindingSet.ListBindingSet;
import com.bigdata.bop.engine.BOpStats;
import com.bigdata.btree.IRangeQuery;
import com.bigdata.btree.ITuple;
import com.bigdata.btree.filter.Advancer;
import com.bigdata.btree.filter.TupleFilter;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.IVUtility;
import com.bigdata.rdf.lexicon.ITermIVFilter;
import com.bigdata.rdf.spo.DistinctMultiTermAdvancer;
import com.bigdata.rdf.spo.DistinctTermAdvancer;
import com.bigdata.rdf.spo.SPO;
import com.bigdata.rdf.spo.SPOKeyOrder;
import com.bigdata.rdf.spo.SPORelation;
import com.bigdata.relation.IRelation;
import com.bigdata.relation.accesspath.AccessPath;
import com.bigdata.relation.accesspath.IAccessPath;
import com.bigdata.relation.accesspath.IBlockingBuffer;
import com.bigdata.relation.accesspath.UnsyncLocalOutputBuffer;
import com.bigdata.striterator.ChunkedWrappedIterator;
import com.bigdata.striterator.IChunkedIterator;
import com.bigdata.striterator.IKeyOrder;
import cutthecrap.utils.striterators.Resolver;
import cutthecrap.utils.striterators.Striterator;
/**
* This operator performs a distinct terms scan for an {@link IPredicate},
* binding the distinct values for the specified variable(s) from the
* {@link IAccessPath} for the {@link IPredicate}. This is done using a
* {@link DistinctTermAdvancer} to skip over any duplicate solutions in the
* index. Thus the cost of this operator is O(N) where N is the number of
* distinct solutions that exist in the index.
*
* @see DISTINCT PREDICATEs
* query is slow
* @see DistinctTermAdvancer
*
* @author Bryan Thompson
*/
public class DistinctTermScanOp extends PipelineOp {
/**
*
*/
private static final long serialVersionUID = 1L;
public interface Annotations extends AccessPathJoinAnnotations {
/**
* The name of the variable whose distinct projection against the
* {@link IAccessPath} associated with the as-bound {@link IPredicate}
* is output by this operator.
*/
String DISTINCT_VAR = DistinctTermScanOp.class.getName()
+ ".distinctVar";
}
/**
* Deep copy constructor.
*
* @param op
*/
public DistinctTermScanOp(final DistinctTermScanOp op) {
super(op);
}
/**
* Shallow copy constructor.
*
* @param args
* @param annotations
*/
public DistinctTermScanOp(final BOp[] args,
final Map annotations) {
super(args, annotations);
// MUST be given.
getDistinctVar();
getRequiredProperty(Annotations.PREDICATE);
if (isOptional()) {
/*
* TODO OPTIONAL is not implemented for this operator.
*/
throw new UnsupportedOperationException();
}
}
public DistinctTermScanOp(final BOp[] args, final NV... annotations) {
this(args, NV.asMap(annotations));
}
/**
* @see Annotations#DISTINCT_VAR
*/
protected IVariable> getDistinctVar() {
return (IVariable>) getRequiredProperty(Annotations.DISTINCT_VAR);
}
/**
* @see Annotations#SELECT
*/
protected IVariable>[] getSelect() {
return getProperty(Annotations.SELECT, null/* defaultValue */);
}
/**
* @see Annotations#CONSTRAINTS
*/
protected IConstraint[] constraints() {
return getProperty(Annotations.CONSTRAINTS, null/* defaultValue */);
}
@SuppressWarnings("unchecked")
public IPredicate getPredicate() {
return (IPredicate) getRequiredProperty(Annotations.PREDICATE);
}
/**
* Return the value of {@link IPredicate#isOptional()} for the
* {@link IPredicate} associated with this join.
*
* @see IPredicate.Annotations#OPTIONAL
*/
private boolean isOptional() {
return getPredicate().isOptional();
}
@Override
public FutureTask eval(final BOpContext context) {
return new FutureTask(new ChunkTask(this, context));
}
/**
* Copy the source to the sink.
*/
static private class ChunkTask implements Callable {
private final DistinctTermScanOp op;
private final BOpContext context;
/**
* The variable that gets bound to the distinct values by the scan.
*/
private final IVariable> distinctVar;
/**
* The source for the elements to be joined.
*/
private final IPredicate predicate;
/**
* The relation associated with the {@link #predicate} operand.
*/
private final IRelation relation;
ChunkTask(final DistinctTermScanOp op,
final BOpContext context) {
this.op = op;
this.context = context;
this.distinctVar = op.getDistinctVar();
this.predicate = op.getPredicate();
this.relation = context.getRelation(predicate);
}
@Override
public Void call() throws Exception {
final BOpStats stats = context.getStats();
// Convert source solutions to array (assumes low cardinality).
final IBindingSet[] leftSolutions = BOpUtility.toArray(
context.getSource(), stats);
// default sink
final IBlockingBuffer sink = context.getSink();
final UnsyncLocalOutputBuffer unsyncBuffer = new UnsyncLocalOutputBuffer(
op.getChunkCapacity(), sink);
final IVariable>[] selectVars = op.getSelect();
final IConstraint[] constraints = op.constraints();
try {
/*
* TODO If there are multiple left solutions (from the pipeline)
* then we could generate their fromKeys and order them to
* improve cache locality. See PipelineJoin for an example of
* how this is done. For the distinct-term-scan this could
* provide a reasonable improvement in cache locality for the
* index.
*/
// For each source solution.
for (IBindingSet bindingSet : leftSolutions) {
// constrain the predicate to the given bindings.
IPredicate asBound = predicate.asBound(bindingSet);
if (asBound == null) {
/*
* This can happen for a SIDS mode join if some of the
* (s,p,o,[c]) and SID are bound on entry and they can not
* be unified. For example, the s position might be
* inconsistent with the Subject that can be decoded from
* the SID binding.
*
* @see #815 (RDR query does too much work)
*/
continue;
}
// if (partitionId != -1) {
//
// /*
// * Constrain the predicate to the desired index
// * partition.
// *
// * Note: we do this for scale-out joins since the
// * access path will be evaluated by a JoinTask
// * dedicated to this index partition, which is part
// * of how we give the JoinTask to gain access to the
// * local index object for an index partition.
// */
//
// asBound = asBound.setPartitionId(partitionId);
//
// }
/**
* The {@link IAccessPath} corresponding to the asBound
* {@link IPredicate} for this join dimension. The asBound
* {@link IPredicate} is {@link IAccessPath#getPredicate()}.
*
* FIXME What do we do if there is a local filter or an
* access path filter? Do we have to NOT generate this
* operator? It is probably not safe to ignore those
* filters....
*/
final IAccessPath accessPath = context.getAccessPath(
relation, asBound);
if (accessPath.getPredicate().getIndexLocalFilter() != null) {
// index has local filter. requires scan.
throw new AssertionError();
}
if (accessPath.getPredicate().getAccessPathFilter() != null) {
// access path filter exists. requires scan.
throw new AssertionError();
}
// TODO Cast to AccessPath is not type safe.
final IChunkedIterator rightItr = distinctTermScan(
(AccessPath) accessPath, null/* termIdFilter */);
while (rightItr.hasNext()) {
// New binding set.
final IBindingSet right = new ListBindingSet();
// Bind the distinctTermVar.
right.set(distinctVar, new Constant(rightItr.next()));
// See if the solutions join.
final IBindingSet outSolution = BOpContext.bind(//
bindingSet,// left
right,//
constraints,//
selectVars//
);
if (outSolution != null) {
// Output the solution.
unsyncBuffer.add(outSolution);
}
}
}
// flush the unsync buffer.
unsyncBuffer.flush();
// flush the sink.
sink.flush();
// Done.
return null;
} finally {
sink.close();
context.getSource().close();
}
}
/**
* Efficient scan of the distinct term identifiers that appear in the
* first position of the keys for the statement index corresponding to
* the specified {@link IKeyOrder}. For example, using
* {@link SPOKeyOrder#POS} will give you the term identifiers for the
* distinct predicates actually in use within statements in the
* {@link SPORelation}.
*
* @param keyOrder
* The selected index order.
* @param fromKey
* The first key for the scan -or- null
to start
* the scan at the head of the index.
* @param toKey
* The last key (exclusive upper bound) for the scan -or-
* null
to scan until the end of the index.
* @param termIdFilter
* An optional filter on the visited {@link IV}s.
*
* @return An iterator visiting the distinct term identifiers.
*
* TODO Move this method to {@link AccessPath}. Also, refactor
* {@link SPORelation#distinctTermScan(IKeyOrder)} to use this
* code.
*/
private static IChunkedIterator distinctTermScan(
final AccessPath ap, final ITermIVFilter termIdFilter) {
final IKeyOrder keyOrder = ap.getKeyOrder();
final byte[] fromKey = ap.getFromKey();
final byte[] toKey = ap.getToKey();
// if there are predicate positions bound to constants, we use
// the distinct multi term advancer, otherwise the simple distinct
// term advancer is sufficient
List predicateArgs = ap.getPredicate().args();
int nrConsts = 0;
for (int i=0; i filter = nrConsts==0 ?
new DistinctTermAdvancer(keyOrder.getKeyArity()) :
new DistinctMultiTermAdvancer(keyOrder.getKeyArity(), nrConsts);
/*
* Layer in the logic to advance to the tuple that will have the
* next distinct term identifier in the first position of the key.
*/
if (termIdFilter != null) {
/*
* Layer in a filter for only the desired term types.
*/
filter.addFilter(new TupleFilter() {
private static final long serialVersionUID = 1L;
@Override
protected boolean isValid(final ITuple tuple) {
final byte[] key = tuple.getKey();
final IV[] ivs = IVUtility.decode(key,nrConstsFinal+1);
final IV iv = ivs[nrConstsFinal];
return termIdFilter.isValid(iv);
}
});
}
@SuppressWarnings("unchecked")
final Iterator itr = new Striterator(ap.getIndex(/*keyOrder*/)
.rangeIterator(fromKey, toKey,//
0/* capacity */, IRangeQuery.KEYS | IRangeQuery.CURSOR,
filter)).addFilter(new Resolver() {
private static final long serialVersionUID = 1L;
/**
* Resolve tuple to IV.
*/
@Override
protected IV resolve(final Object obj) {
final byte[] key = ((ITuple>) obj).getKey();
final IV[] ivs = IVUtility.decode(key,nrConstsFinal+1);
return ivs[nrConstsFinal];
}
});
return new ChunkedWrappedIterator(itr, ap.getChunkCapacity(),
IV.class);
}
} // class ChunkTask
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy