com.bigdata.bop.solutions.HTreeDistinctBindingSetsOp Maven / Gradle / Ivy
package com.bigdata.bop.solutions;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.HTreeAnnotations;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IQueryAttributes;
import com.bigdata.bop.ISingleThreadedOp;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.controller.INamedSolutionSetRef;
import com.bigdata.bop.controller.NamedSetAnnotations;
import com.bigdata.bop.engine.BOpStats;
import com.bigdata.bop.join.HTreeHashJoinUtility;
import com.bigdata.bop.join.JoinTypeEnum;
import com.bigdata.htree.HTree;
import com.bigdata.rdf.internal.IV;
import com.bigdata.relation.accesspath.IBlockingBuffer;
import com.bigdata.relation.accesspath.UnsyncLocalOutputBuffer;
import cutthecrap.utils.striterators.ICloseableIterator;
/**
* A pipelined DISTINCT operator based on the persistence capable {@link HTree}
* suitable for very large solution sets. Only the variables which are used to
* determine the DISTINCT solutions are projected from the operator. The
* operator is specific to the RDF data model (it relies on encoded {@link IV}
* s).
*
* Note: This implementation is a single-threaded pipelined operator which
* inspects each chunk of solutions as they arrive and those solutions which are
* distinct for each chunk passed on.
*
* Note: {@link PipelineOp.Annotations#MAX_MEMORY} is currently ignored by this
* operator. This value could be used to trigger the switch to an external
* memory DISTINCT (on a backing store) or to fail a query which attempts to put
* too much data into the native heap. Right now, it will just keep adding data
* on the native heap and eventually the machine will begin to swap.
*
* @author Bryan Thompson
*/
public class HTreeDistinctBindingSetsOp extends PipelineOp implements
ISingleThreadedOp {
// private final static transient Logger log = Logger
// .getLogger(DistinctBindingSetsWithHTreeOp.class);
/**
*
*/
private static final long serialVersionUID = 1L;
public interface Annotations extends PipelineOp.Annotations,
HTreeAnnotations, DistinctAnnotations, NamedSetAnnotations {
// /**
// * The name of {@link IQueryAttributes} attribute under which the
// * {@link HTreeHashJoinState} for this operator is stored. The attribute
// * name includes the query UUID. The query UUID must be extracted and
// * used to lookup the {@link IRunningQuery} to which the solution set
// * was attached.
// *
// * @see NamedSolutionSetRef
// */
// final String NAMED_SET_REF = HTreeNamedSubqueryOp.Annotations.NAMED_SET_REF;
}
/**
* Constructor required for {@link com.bigdata.bop.BOpUtility#deepCopy(FilterNode)}.
*/
public HTreeDistinctBindingSetsOp(final HTreeDistinctBindingSetsOp op) {
super(op);
}
/**
* Required shallow copy constructor.
*/
public HTreeDistinctBindingSetsOp(final BOp[] args,
final Map annotations) {
super(args, annotations);
switch (getEvaluationContext()) {
case CONTROLLER:
case HASHED:
break;
default:
throw new UnsupportedOperationException(
Annotations.EVALUATION_CONTEXT + "="
+ getEvaluationContext());
}
assertMaxParallelOne();
// // shared state is used to share the hash table.
// if (!isSharedState()) {
// throw new UnsupportedOperationException(Annotations.SHARED_STATE
// + "=" + isSharedState());
// }
final INamedSolutionSetRef namedSetRef = (INamedSolutionSetRef) getRequiredProperty(Annotations.NAMED_SET_REF);
final IVariable>[] vars = (IVariable[]) getProperty(Annotations.VARIABLES);
if (vars == null || vars.length == 0)
throw new IllegalArgumentException();
}
public HTreeDistinctBindingSetsOp(final BOp[] args, NV... annotations) {
this(args, NV.asMap(annotations));
}
public FutureTask eval(final BOpContext context) {
return new FutureTask(new DistinctTask(this, context));
}
/**
* Task executing on the node.
*/
static private class DistinctTask implements Callable {
private final HTreeDistinctBindingSetsOp op;
private final BOpContext context;
private final HTreeHashJoinUtility state;
DistinctTask(final HTreeDistinctBindingSetsOp op,
final BOpContext context) {
this.op = op;
this.context = context;
/** Metadata to identify the named solution set. */
final INamedSolutionSetRef namedSetRef = (INamedSolutionSetRef) op
.getRequiredProperty(Annotations.NAMED_SET_REF);
/*
* First, see if the map already exists.
*
* Note: Since the operator is not thread-safe, we do not need to
* use a putIfAbsent pattern here.
*/
/*
* Lookup the attributes for the query on which we will hang the
* solution set. See BLZG-1493 (if queryId is null, use the query
* attributes for this running query).
*/
final IQueryAttributes attrs = context.getQueryAttributes(namedSetRef.getQueryId());
HTreeHashJoinUtility state = (HTreeHashJoinUtility) attrs
.get(namedSetRef);
if (state == null) {
state = new HTreeHashJoinUtility(
context.getMemoryManager(namedSetRef.getQueryId()), op,
JoinTypeEnum.Filter);
if (attrs.putIfAbsent(namedSetRef, state) != null)
throw new AssertionError();
}
this.state = state;
}
@Override
public Void call() throws Exception {
final BOpStats stats = context.getStats();
final ICloseableIterator itr = context
.getSource();
final IBlockingBuffer sink = context.getSink();
try {
final UnsyncLocalOutputBuffer unsyncBuffer = new UnsyncLocalOutputBuffer(
op.getChunkCapacity(), sink);
state.filterSolutions(itr, stats, unsyncBuffer);
unsyncBuffer.flush();
sink.flush();
// done.
return null;
} finally {
if (context.isLastInvocation()) {
state.release();
}
sink.close();
}
}
} // class DistinctTask
}