com.bigdata.bop.join.PipelineJoin Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Aug 18, 2010
*/
package com.bigdata.bop.join;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.FutureTask;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.log4j.Logger;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.BOpEvaluationContext;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstraint;
import com.bigdata.bop.IElement;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.IShardwisePipelineOp;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.engine.AbstractRunningQuery;
import com.bigdata.bop.engine.QueryTimeoutException;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.concurrent.FutureTaskMon;
import com.bigdata.relation.IRelation;
import com.bigdata.relation.accesspath.AbstractUnsynchronizedArrayBuffer;
import com.bigdata.relation.accesspath.AccessPath;
import com.bigdata.relation.accesspath.ArrayAccessPath;
import com.bigdata.relation.accesspath.BlockingBuffer;
import com.bigdata.relation.accesspath.BufferClosedException;
import com.bigdata.relation.accesspath.EmptyAccessPath;
import com.bigdata.relation.accesspath.IAccessPath;
import com.bigdata.relation.accesspath.IAsynchronousIterator;
import com.bigdata.relation.accesspath.IBindingSetAccessPath;
import com.bigdata.relation.accesspath.IBlockingBuffer;
import com.bigdata.relation.accesspath.ThreadLocalBufferFactory;
import com.bigdata.relation.accesspath.UnsyncLocalOutputBuffer;
import com.bigdata.relation.rule.IRule;
import com.bigdata.relation.rule.IStarJoin;
import com.bigdata.relation.rule.IStarJoin.IStarConstraint;
import com.bigdata.relation.rule.eval.ISolution;
import com.bigdata.service.DataService;
import com.bigdata.striterator.IChunkedOrderedIterator;
import com.bigdata.striterator.IKeyOrder;
import com.bigdata.util.BytesUtil;
import com.bigdata.util.concurrent.Haltable;
import com.bigdata.util.concurrent.LatchedExecutor;
import cutthecrap.utils.striterators.ICloseableIterator;
/**
* Pipelined join operator for online (selective) queries. The pipeline join
* accepts chunks of binding sets from its operand, combines each binding set in
* turn with its {@link IPredicate} annotation to produce an "asBound"
* predicate, and then executes a nested indexed subquery against that asBound
* predicate, writing out a new binding set for each element returned by the
* asBound predicate which satisfies the join constraint.
*
* Note: In order to support pipelining, query plans need to be arranged in a
* "left-deep" manner.
*
* Note: In scale-out, the {@link PipelineJoin} is generally annotated as a
* {@link BOpEvaluationContext#SHARDED} or {@link BOpEvaluationContext#HASHED}
* operator and the {@link IPredicate} is annotated for local access paths. If
* you need to use remote access paths, then the {@link PipelineJoin} should be
* annotated as a {@link BOpEvaluationContext#ANY} operator since it will be
* issuing a remote read from each node on which it has source solutions to be
* joined.
*
* @author Bryan Thompson
* @version $Id$
*
* @todo Break the star join logic out into its own join operator and test suite
* (or just discard it).
*/
public class PipelineJoin extends PipelineOp implements
IShardwisePipelineOp {
static private final transient Logger log = Logger
.getLogger(PipelineJoin.class);
/**
*
*/
private static final long serialVersionUID = 1L;
public interface Annotations extends AccessPathJoinAnnotations {
// /**
// * An optional {@link IVariable}[] identifying the variables to be
// * retained in the {@link IBindingSet}s written out by the operator. All
// * variables are retained unless this annotation is specified.
// */
// String SELECT = (PipelineJoin.class.getName() + ".select").intern();
//
// Note: The OPTIONAL annotation is on the *predicate*.
//
// /**
// * Marks the join as "optional" in the SPARQL sense. Binding sets which
// * fail the join will be routed to the alternative sink as specified by
// * either {@link PipelineOp.Annotations#ALT_SINK_REF} or
// * {@link PipelineOp.Annotations#ALT_SINK_GROUP}.
// *
// * @see #DEFAULT_OPTIONAL
// *
// * @deprecated We should just inspect
// * {@link IPredicate.Annotations#OPTIONAL}.
// */
// String OPTIONAL = PipelineJoin.class.getName() + ".optional";
//
// boolean DEFAULT_OPTIONAL = false;
// /**
// * An {@link IConstraint}[] which places restrictions on the legal
// * patterns in the variable bindings (optional).
// */
// String CONSTRAINTS = (PipelineJoin.class.getName() + ".constraints").intern();
/**
* The maximum parallelism with which the pipeline will consume the
* source {@link IBindingSet}[] chunk.
*
* Note: When ZERO (0), everything will run in the caller's
* {@link Thread}, but there will still be one thread per pipeline join
* task which is executing concurrently against different source chunks.
* When GT ZERO (0), tasks will run on an {@link ExecutorService} with
* the specified maximum parallelism.
*
* Note: This is NOT the same as
* {@link PipelineOp.Annotations#MAX_PARALLEL}. This option (
* {@link #MAX_PARALLEL_CHUNKS} limits the #of chunks for a single task
* which may be processed concurrently.
* {@link PipelineOp.Annotations#MAX_PARALLEL} limits the #of task
* instances which may run concurrently.
*
* @see #DEFAULT_MAX_PARALLEL_CHUNKS
*
* @todo Chunk level parallelism does not appear to benefit us when
* using the ChunkedRunningQuery class and the new QueryEngine, so
* this option might well go away which would allow us to simplify
* the PipelineJoin implementation. (All the thread local buffer
* stuff would go away as would much of the complexity of the
* haltable since we could directly interrupt the task if it all
* runs in the caller's thread).
*/
String MAX_PARALLEL_CHUNKS = (PipelineJoin.class.getName() + ".maxParallelChunks").intern();
int DEFAULT_MAX_PARALLEL_CHUNKS = 0;
/**
* When true
, binding sets observed in the same chunk which
* have the binding pattern on the variables for the access path will be
* coalesced into a single access path (default
* {@value #DEFAULT_COALESCE_DUPLICATE_ACCESS_PATHS}). This option
* increases the efficiency of the join since it reads the access path
* once per set of input binding sets which are coalesced. This option
* does NOT reduce the #of solutions generated.
*
* This option can cause some error in the join hit ratio when it is
* estimated from a cutoff join.
*
* @see PipelineJoinStats#getJoinHitRatio()
*
* @todo unit tests when (en|dis)abled.
*/
String COALESCE_DUPLICATE_ACCESS_PATHS = (PipelineJoin.class.getName()
+ ".coalesceDuplicateAccessPaths").intern();
boolean DEFAULT_COALESCE_DUPLICATE_ACCESS_PATHS = true;
/**
* When true
, access paths will be reordered to maximize
* locality.
*
* Note: This needs to be turned off when the RTO uses row identifiers
* to correlate the input and output solutions for complex joins (those
* which required materialization of RDF Values for FILTER evaluation).
*
* @todo unit tests when (en|dis)abled.
*/
String REORDER_ACCESS_PATHS = (PipelineJoin.class.getName() + ".reorderAccessPaths")
.intern();
boolean DEFAULT_REORDER_ACCESS_PATHS = true;
/**
* The minimum number of (estimated) data points assigned to a task. This basically
* defines the threshold upon which parallelization starts to pay out. Currently only
* implemented for the geospatial feature (see BLZG-1478), but this might be implemented
* more generally for the pipelined join in the future
*/
public String MIN_DATAPOINTS_PER_TASK =
(PipelineJoin.class.getName() + ".minDatapointsPerTask").intern();
// note: the default might be a bit high, it is used for the GeoSpatial stuff only, where
// we effectively only investigate a small part of the range; we may decrease for
// normal access paths and, in turn, add an adjustment factor for usage in the geospatial
// context
public int DEFAULT_MIN_DATAPOINTS_PER_TASK = 100000;
/**
* Desired number of access path tasks generated per thread in case the range is large enough.
* The default is one task per thread, which is a good choice if there's not much skew in
* the data. If there's a lot of skew, it might be beneficial to increase this parameter to
* avoid a situation where a certain thread gets assigned longer-running tasks s.t. other
* threads are finished earlier (leading to, effectively, decreased parallelism). On the downside,
* increasing this parameter leads to overhead in setting up additional subtasks,
* so values >5 are generally discouraged.
*
* Note that this is an upper bound only: we always consider the MIN_DATAPOINTS_PER_TASK
* threshold in order to avoid generating threads with a low workload.
*
* Currently only implemented for the geospatial feature (see BLZG-1478), but this might be
* implemented more generally for the pipelined join in the future.
*
* Must be a value >= 1.
*/
public String NUM_TASKS_PER_THREAD =
(PipelineJoin.class.getName() + ".numTasksPerThread").intern();
public int DEFAULT_NUM_TASKS_PER_THREAD = 1;
}
/**
* Deep copy constructor.
*
* @param op
*/
public PipelineJoin(final PipelineJoin op) {
super(op);
}
/**
* Shallow copy vararg constructor.
*
* @param args
* @param annotations
*/
public PipelineJoin(final BOp[] args, final NV... annotations) {
this(args, NV.asMap(annotations));
}
/**
* Shallow copy constructor.
*
* @param args
* @param annotations
*/
public PipelineJoin(final BOp[] args, final Map annotations) {
super(args, annotations);
/*
* TODO We should be checking this operator's required properties,
* especially when used for the RTO.
*/
// if (arity() != 1)
// throw new IllegalArgumentException();
// if (left() == null)
// throw new IllegalArgumentException();
}
/**
* {@inheritDoc}
*
* @see Annotations#PREDICATE
*/
@SuppressWarnings("unchecked")
@Override
public IPredicate getPredicate() {
return (IPredicate) getRequiredProperty(Annotations.PREDICATE);
}
/**
* Return the value of {@link IPredicate#isOptional()} for the
* {@link IPredicate} associated with this join.
*
* @see IPredicate.Annotations#OPTIONAL
*/
private boolean isOptional() {
// return getProperty(Annotations.OPTIONAL, Annotations.DEFAULT_OPTIONAL);
return getPredicate().isOptional();
}
/**
*
* @see Annotations#CONSTRAINTS
*/
public IConstraint[] constraints() {
return getProperty(Annotations.CONSTRAINTS, null/* defaultValue */);
}
/**
* @see Annotations#MAX_PARALLEL_CHUNKS
*/
public int getMaxParallelChunks() {
// return 5;
return getProperty(Annotations.MAX_PARALLEL_CHUNKS,
Annotations.DEFAULT_MAX_PARALLEL_CHUNKS);
}
/**
* @see Annotations#SELECT
*/
public IVariable>[] variablesToKeep() {
return getProperty(Annotations.SELECT, null/* defaultValue */);
}
@Override
public PipelineJoinStats newStats() {
return new PipelineJoinStats();
}
@Override
public FutureTask eval(final BOpContext context) {
return new FutureTaskMon(new JoinTask(this, context));
}
/**
* Pipeline join impl.
*/
private static class JoinTask extends Haltable implements
Callable {
/**
* The join that is being executed.
*/
final private PipelineJoin> joinOp;
/**
* The constraint (if any) specified for the join operator.
*/
final private IConstraint[] constraints;
/**
* The maximum parallelism with which the {@link JoinTask} will consume
* the source {@link IBindingSet}s.
*
* @see Annotations#MAX_PARALLEL_CHUNKS
*/
final private int maxParallelChunks;
/**
* The service used for executing subtasks (optional).
*
* @see #maxParallelChunks
*/
final private Executor service;
/**
* True iff the {@link #predicate} operand is an optional pattern (aka
* if this is a SPARQL style left join).
*/
final private boolean optional;
/**
* The variables to be retained by the join operator. Variables not
* appearing in this list will be stripped before writing out the
* binding set onto the output sink(s).
*/
final private IVariable>[] variablesToKeep;
/**
* The source for the elements to be joined.
*/
final private IPredicate predicate;
/**
* The relation associated with the {@link #predicate} operand.
*/
final private IRelation relation;
/**
* The partition identifier -or- -1
if we are not reading
* on an index partition.
*/
final private int partitionId;
/**
* The evaluation context.
*/
final private BOpContext context;
// /**
// * When true
, the {@link #stats} will be tracked. This is
// * false
unless logging is requested for {@link QueryLog}
// * or stats are explicitly request (e.g., to support cutoff joins).
// */
// final private boolean trackStats;
/**
* The statistics for this {@link JoinTask}.
*/
final private PipelineJoinStats stats;
/**
* An optional limit on the #of solutions to be produced. The limit is
* ignored if it is {@link Long#MAX_VALUE}.
*
* Note: Invoking {@link #halt(Object)} is necessary to enforce the
* limit.
*
* @see Annotations#LIMIT
*/
final private long limit;
/**
* When true
an attempt will be made to coalesce as-bound
* predicates which result in the same access path.
*
* @see Annotations#COALESCE_DUPLICATE_ACCESS_PATHS
*/
final private boolean coalesceAccessPaths;
/**
* When true
, access paths will be reordered to maximize
* locality.
*
* @see Annotations#REORDER_ACCESS_PATHS
*/
final private boolean reorderAccessPaths;
/**
* Used to enforce the {@link Annotations#LIMIT} iff one is specified.
*/
final private AtomicLong exactOutputCount = new AtomicLong();
/**
* The source from which we read the binding set chunks.
*
* Note: In keeping with the top-down evaluation of the operator tree
* the source should not be set until we begin to execute the
* {@link #left} operand and that should not happen until we are in
* {@link #call()} in order to ensure that the producer will be
* terminated if there is a problem setting up this join. Given that, it
* might need to be an atomic reference or volatile or the like.
*/
final private ICloseableIterator source;
/**
* Where the join results are written.
*
* Chunks of bindingSets are written pre-Thread unsynchronized buffers
* by {@link ChunkTask}. Those unsynchronized buffers overflow onto the
* per-JoinTask {@link #sink}, which is a thread-safe
* {@link IBlockingBuffer}. The downstream pipeline operator drains that
* {@link IBlockingBuffer} using its iterator(). When the {@link #sink}
* is closed and everything in it has been drained, then the downstream
* operator will conclude that no more bindingSets are available and it
* will terminate.
*/
final private IBlockingBuffer sink;
/**
* The alternative sink to use when the join is {@link #optional} AND
* {@link BOpContext#getSink2()} returns a distinct buffer for the
* alternative sink. The binding sets from the source are copied onto
* the alternative sink for an optional join if the join fails. Normally
* the {@link BOpContext#getSink()} can be used for both the joins which
* succeed and those which fail. The alternative sink is only necessary
* when the failed join needs to jump out of a join group rather than
* routing directly to the ancestor in the operator tree.
*/
final private IBlockingBuffer sink2;
/**
* The thread-local buffer factory for the default sink.
*/
final private TLBFactory threadLocalBufferFactory;
/**
* The thread-local buffer factory for the optional sink (iff the
* optional sink is defined).
*/
final private TLBFactory threadLocalBufferFactory2;
/**
* Instances of this class MUST be created in the appropriate execution
* context of the target {@link DataService} so that the federation and
* the joinNexus references are both correct and so that it has access
* to the local index object for the specified index partition.
*
* @param joinOp
* @param context
*/
public JoinTask(//
final PipelineJoin joinOp,//
final BOpContext context) {
if (joinOp == null)
throw new IllegalArgumentException();
if (context == null)
throw new IllegalArgumentException();
this.joinOp = joinOp;
this.predicate = joinOp.getPredicate();
this.constraints = joinOp.constraints();
this.maxParallelChunks = joinOp.getMaxParallelChunks();
if (maxParallelChunks < 0)
throw new IllegalArgumentException(Annotations.MAX_PARALLEL_CHUNKS
+ "=" + maxParallelChunks);
if (maxParallelChunks > 0) {
// shared service.
service = new LatchedExecutor(context.getIndexManager()
.getExecutorService(), maxParallelChunks);
} else {
// run in the caller's thread.
service = null;
}
this.optional = joinOp.isOptional();
this.variablesToKeep = joinOp.variablesToKeep();
this.context = context;
this.relation = context.getRelation(predicate);
this.source = context.getSource();
this.sink = context.getSink();
this.sink2 = context.getSink2();
this.partitionId = context.getPartitionId();
this.stats = (PipelineJoinStats) context.getStats();
this.limit = joinOp.getProperty(Annotations.LIMIT,
Annotations.DEFAULT_LIMIT);
this.coalesceAccessPaths = joinOp.getProperty(
Annotations.COALESCE_DUPLICATE_ACCESS_PATHS,
Annotations.DEFAULT_COALESCE_DUPLICATE_ACCESS_PATHS);
this.reorderAccessPaths = joinOp.getProperty(
Annotations.REORDER_ACCESS_PATHS,
Annotations.DEFAULT_REORDER_ACCESS_PATHS);
this.threadLocalBufferFactory = new TLBFactory(sink);
this.threadLocalBufferFactory2 = sink2 == null ? null
: new TLBFactory(sink2);
if (log.isDebugEnabled())
log.debug("joinOp=" + joinOp);
}
@Override
public String toString() {
return getClass().getName() + "{ joinOp=" + joinOp + "}";
}
/**
* Runs the {@link JoinTask}.
*
* @return null
.
*/
@Override
public Void call() throws Exception {
// final long begin = System.currentTimeMillis();
if (log.isDebugEnabled())
log.debug("joinOp=" + joinOp);
try {
/*
* Consume bindingSet chunks from the source JoinTask(s).
*/
consumeSource();
/*
* Flush and close the thread-local output buffers.
*/
threadLocalBufferFactory.flush();
if (threadLocalBufferFactory2 != null)
threadLocalBufferFactory2.flush();
// flush the sync buffer
flushAndCloseBuffersAndAwaitSinks();
if (log.isDebugEnabled())
log.debug("JoinTask done: joinOp=" + joinOp);
halted();
return null;
} catch (Throwable t) {
/*
* This is used for processing errors and also if this task is
* interrupted (because the sink has been closed).
*/
// ensure query halts.
halt(t);
// reset the unsync buffers.
try {
// resetUnsyncBuffers();
threadLocalBufferFactory.reset();
if (threadLocalBufferFactory2 != null)
threadLocalBufferFactory2.reset();
} catch (Throwable t2) {
log.error(t2.getLocalizedMessage(), t2);
}
// reset the sync buffer and cancel the sink JoinTasks.
try {
cancelSinks();
} catch (Throwable t2) {
log.error(t2.getLocalizedMessage(), t2);
}
/*
* Close source iterators, which will cause any source JoinTasks
* that are still executing to throw a CancellationException
* when the Future associated with the source iterator is
* cancelled.
*/
try {
closeSources();
} catch (Throwable t2) {
log.error(t2.getLocalizedMessage(), t2);
}
if (getCause() != null) {
// abnormal termination.
throw new RuntimeException(t);
}
// normal termination - ignore exception.
return null;
// } finally {
//
// stats.elapsed.add(System.currentTimeMillis() - begin);
// } finally {
// System.err.println(joinOp.toString());
// System.err.println(stats.toString());
}
}
/**
* Consume {@link IBindingSet} chunks from the {@link #source}.
*
* @throws Exception
*/
protected void consumeSource() throws Exception {
IBindingSet[] chunk;
while (!isDone() && (chunk = nextChunk()) != null) {
if (chunk.length == 0) {
/*
* This can happen if you feed in an empty IBindingSet[] to
* the join.
*/
continue;
}
/*
* Consume the chunk until done using either the caller's thread
* or the executor service as appropriate to run subtasks.
*/
if (chunk.length <= 1) {
/*
* Run on the caller's thread anyway since there is just one
* binding set to be consumed.
*/
new BindingSetConsumerTask(null/* service */, chunk)
.call();
} else {
/*
* Run subtasks on either the caller's thread or the shared
* executed service depending on the configured value of
* [maxParallel].
*/
new BindingSetConsumerTask(service, chunk).call();
}
}
}
/**
* Closes the {@link #source} specified to the ctor.
*/
protected void closeSources() {
if (log.isInfoEnabled())
log.info(toString());
source.close();
}
/**
* Flush and close all output buffers and await sink {@link JoinTask}
* (s).
*
* Note: You MUST close the {@link BlockingBuffer} from which each sink
* reads before invoking this method in order for those sinks
* to terminate. Otherwise the source {@link IAsynchronousIterator}(s)
* on which the sink is reading will remain open and the sink will never
* decide that it has exhausted its source(s).
*
* @throws InterruptedException
* @throws ExecutionException
*/
protected void flushAndCloseBuffersAndAwaitSinks()
throws InterruptedException, ExecutionException {
if (log.isDebugEnabled())
log.debug("joinOp=" + joinOp);
/*
* Close the thread-safe output buffer. For any JOIN except the
* last, this buffer will be the source for one or more sink
* JoinTasks for the next join dimension. Once this buffer is
* closed, the asynchronous iterator draining the buffer will
* eventually report that there is nothing left for it to process.
*
* Note: This is a BlockingBuffer. BlockingBuffer#flush() is a NOP.
*/
sink.flush();
sink.close();
if (sink2 != null) {
sink2.flush();
sink2.close();
}
}
/**
* {@inheritDoc}
*
* Note: The {@link JoinTask} extends {@link Haltable}. We want to treat
* the {@link QueryTimeoutException} as a normal termination cause for a
* {@link JoinTask}. The {@link Haltable} for the
* {@link AbstractRunningQuery} will have its root cause set to the
* {@link QueryTimeoutException} and from there the exception will get
* eventually converted back into the appropriate openrdf exception. We
* do things differently here because the {@link JoinTask} termination
* is not the same as the {@link AbstractRunningQuery} termination.
* {@link JoinTask} is the only place right now where we extend haltable
* and the only place where we have to make this specific override.
*
* @see
* Query timeout only checked at operator start/stop.
*/
@Override
protected boolean isNormalTerminationCause(final Throwable cause) {
return super.isNormalTerminationCause(cause)
|| super.isDeadlineTerminationCause(cause);
}
/**
* Cancel sink {@link JoinTask}(s).
*/
protected void cancelSinks() {
if (log.isDebugEnabled())
log.debug("joinOp=" + joinOp);
sink.reset();
if (sink.getFuture() != null) {
sink.getFuture().cancel(true/* mayInterruptIfRunning */);
}
if (sink2 != null) {
sink2.reset();
if (sink2.getFuture() != null) {
sink2.getFuture().cancel(true/* mayInterruptIfRunning */);
}
}
}
/**
* Return a chunk of {@link IBindingSet}s from source.
*
* @return The next chunk -or- null
iff the source is
* exhausted.
*/
protected IBindingSet[] nextChunk() throws InterruptedException {
if (log.isDebugEnabled())
log.debug("joinOp=" + joinOp);
/*
* We no longer have chunks which are being read from a remote
* process. All chunks are fully materialized before a PipelineOp
* begins to execute. Therefore there is no longer any reason to use
* the non-blocking API in PipelineJoin.
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/475
*/
// while (!source.isExhausted()) {
//
// halted();
//
// // note: uses timeout to avoid blocking w/o testing [halt].
// if (source.hasNext(10, TimeUnit.MILLISECONDS)) {
//
// // read the chunk.
// final IBindingSet[] chunk = source.next();
//
// stats.chunksIn.increment();
// stats.unitsIn.add(chunk.length);
//
// if (log.isDebugEnabled())
// log.debug("Read chunk from source: chunkSize="
// + chunk.length + ", joinOp=" + joinOp);
//
// return chunk;
//
// }
//
// }
while (source.hasNext()) {
halted();
// read the chunk.
final IBindingSet[] chunk = source.next();
stats.chunksIn.increment();
stats.unitsIn.add(chunk.length);
if (log.isDebugEnabled())
log.debug("Read chunk from source: chunkSize="
+ chunk.length + ", joinOp=" + joinOp);
return chunk;
}
/*
* Termination condition: the source is exhausted.
*/
if (log.isDebugEnabled())
log.debug("Source exhausted: joinOp=" + joinOp);
return null;
}
/**
* Class consumes a chunk of binding set executing a nested indexed join
* until canceled, interrupted, or all the binding sets are exhausted.
* For each {@link IBindingSet} in the chunk, an {@link AccessPathTask}
* is created which will consume that {@link IBindingSet}. The
* {@link AccessPathTask}s are sorted based on their
* fromKey
so as to order the execution of those tasks in a
* manner that will maximize the efficiency of index reads. The ordered
* {@link AccessPathTask}s are then submitted to the caller's
* {@link Executor} or run in the caller's thread if the executor is
* null
.
*
* @author Bryan
* Thompson
*/
protected class BindingSetConsumerTask implements Callable {
private final Executor executor;
private final IBindingSet[] chunk;
/**
*
* @param executor
* The service that will execute the generated
* {@link AccessPathTask}s -or- null
IFF you
* want the {@link AccessPathTask}s to be executed in the
* caller's thread.
* @param chunk
* A chunk of binding sets from the upstream producer.
*/
public BindingSetConsumerTask(final Executor executor,
final IBindingSet[] chunk) {
if (chunk == null)
throw new IllegalArgumentException();
this.executor = executor;
this.chunk = chunk;
}
/**
* Read chunks from one or more sources until canceled, interrupted,
* or all sources are exhausted and submits {@link AccessPathTask}s
* to the caller's {@link ExecutorService} -or- executes those tasks
* in the caller's thread if no {@link ExecutorService} was provided
* to the ctor.
*
* Note: When running with an {@link ExecutorService}, the caller is
* responsible for waiting on that {@link ExecutorService} until the
* {@link AccessPathTask}s to complete and must verify all tasks
* completed successfully.
*
* @return null
*
* @throws BufferClosedException
* if there is an attempt to output a chunk of
* {@link IBindingSet}s or {@link ISolution}s and the
* output buffer is an {@link IBlockingBuffer} (true for
* all join dimensions exception the lastJoin and also
* true for query on the lastJoin) and that
* {@link IBlockingBuffer} has been closed.
*/
@Override
public Void call() throws Exception {
try {
if (chunk.length == 1) {
// fast path if the chunk has a single binding set.
runOneTask();
return null;
}
/*
* Generate (and optionally coalesce) the access path tasks.
*/
final AccessPathTask[] tasks = generateAccessPaths(chunk);
/*
* Reorder those tasks for better index read performance.
*/
if (reorderAccessPaths)
reorderTasks(tasks);
/*
* Execute the tasks (either in the caller's thread or on
* the supplied service).
*/
executeTasks(tasks);
return null;
} catch (Throwable t) {
// ensure query halts.
halt(t);
if (getCause() != null) {
// abnormal termination.
// log.error("Halting join (abnormal termination): t="+t+" : cause="+getCause());
// throw new RuntimeException("Halting join: " + t, t);
throw new RuntimeException(t);
}
// normal termination - ignore exception.
if (log.isDebugEnabled())
log.debug("Caught and ignored exception: " + t);
return null;
}
}
/**
* There is exactly one {@link IBindingSet} in the chunk, so run
* exactly one {@link AccessPathTask}.
*
* @throws Exception
*/
private void runOneTask() throws Exception {
if (chunk.length != 1)
throw new AssertionError();
final IBindingSet bindingSet = chunk[0];
// constrain the predicate to the given bindings.
IPredicate asBound = predicate.asBound(bindingSet);
if (asBound == null) {
/*
* This can happen for a SIDS mode join if some of the
* (s,p,o,[c]) and SID are bound on entry and they can not
* be unified. For example, the s position might be
* inconsistent with the Subject that can be decoded from
* the SID binding.
*
* @see #815 (RDR query does too much work)
*/
return;
}
if (partitionId != -1) {
/*
* Constrain the predicate to the desired index partition.
*
* Note: we do this for scale-out joins since the access
* path will be evaluated by a JoinTask dedicated to this
* index partition, which is part of how we give the
* JoinTask to gain access to the local index object for an
* index partition.
*/
asBound = asBound.setPartitionId(partitionId);
}
new JoinTask.AccessPathTask(asBound, Arrays.asList(chunk))
.call();
}
/**
* Generate (and optionally coalesce) the {@link AccessPathTask}s
* for the chunk.
*
* @param chunk
* The chunk.
*
* @return The tasks to process that chunk.
*/
protected AccessPathTask[] generateAccessPaths(
final IBindingSet[] chunk) {
final AccessPathTask[] tasks;
if (coalesceAccessPaths) {
/*
* Aggregate the source bindingSets that license the same
* asBound predicate. The predicates in the keys of this map
* as "as-bound".
*/
final Map, Collection> map = combineBindingSets(chunk);
/*
* Generate an AccessPathTask from each distinct asBound
* predicate that will consume all of the source bindingSets
* in the chunk which resulted in the same asBound
* predicate.
*/
tasks = getAccessPathTasks(map);
} else {
/*
* Do not coalesce access paths.
*/
final List tmp = new LinkedList();
for (int i = 0; i < chunk.length; i++) {
final IBindingSet bindingSet = chunk[i];
// constrain the predicate to the given bindings.
IPredicate asBound = predicate.asBound(bindingSet);
if (asBound == null) {
/*
* This can happen for a SIDS mode join if some of the
* (s,p,o,[c]) and SID are bound on entry and they can not
* be unified. For example, the s position might be
* inconsistent with the Subject that can be decoded from
* the SID binding.
*
* @see #815 (RDR query does too much work)
*/
continue;
}
if (partitionId != -1) {
/*
* Constrain the predicate to the desired index
* partition.
*
* Note: we do this for scale-out joins since the
* access path will be evaluated by a JoinTask
* dedicated to this index partition, which is part
* of how we give the JoinTask to gain access to the
* local index object for an index partition.
*/
asBound = asBound.setPartitionId(partitionId);
}
tmp.add(new AccessPathTask(asBound, Collections
.singletonList(bindingSet)));
}
// Exact fit array.
tasks = tmp
.toArray(new JoinTask.AccessPathTask[tmp.size()]);
}
return tasks;
}
/**
* Populates a map of asBound predicates paired to a set of
* bindingSets.
*
* Note: The {@link AccessPathTask} will apply each bindingSet to
* each element visited by the {@link IAccessPath} obtained for the
* asBound {@link IPredicate}. This has the natural consequence of
* eliminating subqueries within the chunk.
*
* @param chunk
* A chunk of bindingSets from the source join dimension.
*
* @return A map which pairs the distinct asBound predicates to the
* bindingSets in the chunk from which the predicate was
* generated.
*/
protected Map, Collection> combineBindingSets(
final IBindingSet[] chunk) {
if (log.isDebugEnabled())
log.debug("chunkSize=" + chunk.length);
/*
* Note: HashMap is used in preference to LinkedHashMap for
* better speed. We do not need to maintain order in this
* collection.
*/
final Map, Collection> map = new HashMap, Collection>(
chunk.length);
for (IBindingSet bindingSet : chunk) {
halted();
// constrain the predicate to the given bindings.
IPredicate asBound = predicate.asBound(bindingSet);
if (asBound == null) {
/*
* This can happen for a SIDS mode join if some of the
* (s,p,o,[c]) and SID are bound on entry and they can not
* be unified. For example, the s position might be
* inconsistent with the Subject that can be decoded from
* the SID binding.
*
* @see #815 (RDR query does too much work)
*/
continue;
}
if (partitionId != -1) {
/*
* Constrain the predicate to the desired index
* partition.
*
* Note: we do this for scale-out joins since the access
* path will be evaluated by a JoinTask dedicated to
* this index partition, which is part of how we give
* the JoinTask to gain access to the local index object
* for an index partition.
*/
asBound = asBound.setPartitionId(partitionId);
}
// lookup the asBound predicate in the map.
// final HashedPredicate hashedPred = new HashedPredicate(
// asBound);
final IPredicate hashedPred = asBound;
Collection values = map.get(hashedPred);
if (values == null) {
/*
* This is the first bindingSet for this asBound
* predicate. We create a collection of bindingSets to
* be paired with that predicate and put the collection
* into the map using that predicate as the key.
*/
values = new LinkedList();
map.put(hashedPred, values);
} else {
// more than one bindingSet will use the same access
// path.
stats.accessPathDups.increment();
}
/*
* Add the bindingSet to the collection of bindingSets
* paired with the asBound predicate.
*/
values.add(bindingSet);
}
if (log.isDebugEnabled())
log.debug("chunkSize=" + chunk.length
+ ", #distinct predicates=" + map.size());
return map;
}
/**
* Creates an {@link AccessPathTask} for each {@link IBindingSet} in
* the given chunk.
*
* @param chunk
* A chunk of {@link IBindingSet}s from one or more
* source {@link JoinTask}s.
*
* @return A chunk of {@link AccessPathTask} in a desirable
* execution order.
*
* @throws Exception
*/
protected AccessPathTask[] getAccessPathTasks(
final Map, Collection> map) {
final int n = map.size();
if (log.isDebugEnabled())
log.debug("#distinct predicates=" + n);
final AccessPathTask[] tasks = new JoinTask.AccessPathTask[n];
final Iterator, Collection>> itr = map
.entrySet().iterator();
int i = 0;
while (itr.hasNext()) {
halted();
final Map.Entry, Collection> entry = itr
.next();
tasks[i++] = new AccessPathTask(entry.getKey(), entry
.getValue());
}
return tasks;
}
/**
* The tasks are ordered based on the fromKey for the
* associated {@link IAccessPath} as licensed by each
* {@link IBindingSet}. This order tends to focus the reads on the
* same parts of the index partitions with a steady progression in
* the fromKey as we process a chunk of {@link IBindingSet}s.
*
* @param tasks
* The tasks.
* @see
* RDR query fails with ArrayIndexOutOfBoundsException
*/
protected void reorderTasks(final AccessPathTask[] tasks) {
if (tasks.length == 0) {
// See #1192.
return;
}
// @todo layered access paths do not expose a fromKey.
if (tasks[0].accessPath instanceof AccessPath>) {
// reorder the tasks.
Arrays.sort(tasks);
}
}
/**
* Either execute the tasks in the caller's thread or schedule them
* for execution on the supplied service.
*
* @param tasks
* The tasks.
*
* @throws Exception
*/
protected void executeTasks(final AccessPathTask[] tasks)
throws Exception {
if (executor == null) {
/*
* No Executor, so run each task in the caller's thread.
*/
for (AccessPathTask task : tasks) {
task.call();
}
return;
}
/*
* Build list of FutureTasks. This list is used to check all
* tasks for errors and ensure that any running tasks are
* cancelled.
*/
final List> futureTasks = new LinkedList>();
for (AccessPathTask task : tasks) {
final FutureTask ft = new FutureTaskMon(task);
futureTasks.add(ft);
}
try {
/*
* Execute all tasks.
*/
for (FutureTask ft : futureTasks) {
halted();
// Queue for execution.
executor.execute(ft);
} // next task.
/*
* Wait for each task. If any task throws an exception, then
* [halt] will become true and any running tasks will error
* out quickly. Once [halt := true], we do not wait for any
* more tasks, but proceed to cancel all tasks in the
* finally {} clause below.
*/
for (FutureTask ft : futureTasks) {
// Wait for a task.
if (!isDone())
ft.get();
}
} finally {
/*
* Ensure that all tasks are cancelled, regardless of
* whether they were started or have already finished.
*/
for (FutureTask ft : futureTasks) {
ft.cancel(true/* mayInterruptIfRunning */);
}
}
}
}
/**
* Accepts an asBound {@link IPredicate} and a (non-empty) collection of
* {@link IBindingSet}s each of which licenses the same asBound
* predicate for the current join dimension. The task obtains the
* corresponding {@link IAccessPath} and delegates each chunk visited on
* that {@link IAccessPath} to a {@link ChunkTask}. Note that optionals
* are also handled by this task.
*
* @author Bryan
* Thompson
*/
protected class AccessPathTask implements Callable,
Comparable {
/**
* The {@link IBindingSet}s from the source join dimension to be
* combined with each element visited on the {@link #accessPath}. If
* there is only a single source {@link IBindingSet} in a given
* chunk of source {@link IBindingSet}s that results in the same
* asBound {@link IPredicate} then this will be an array with a
* single element. However, if multiple source {@link IBindingSet}s
* result in the same asBound {@link IPredicate} within the same
* chunk then those are aggregated and appear together in this
* array.
*
* Note: An array is used for thread-safe traversal.
*/
final private IBindingSet[] bindingSets;
/**
* An array correlated with the {@link #bindingSets} whose values
* are the #of solutions generated for each of the source binding
* sets consumed by this {@link AccessPathTask}. This array is used
* to determine, whether or not any solutions were produced for a
* given {@link IBindingSet}. When the join is optional and no
* solutions were produced for a given {@link IBindingSet}, the
* {@link IBindingSet} is output anyway.
*/
final private int[] naccepted;
/**
* The {@link IAccessPath} corresponding to the asBound
* {@link IPredicate} for this join dimension. The asBound
* {@link IPredicate} is {@link IAccessPath#getPredicate()}.
*/
final private IAccessPath accessPath;
/**
* Return the fromKey for the {@link IAccessPath} generated
* from the {@link IBindingSet} for this task.
*
* @todo Layered access paths do not expose a fromKey, but the
* information we need is available
* {@link IKeyOrder#getFromKey(IKeyBuilder, IPredicate)}.
*
* @see
* ClassCastException in SIDs mode query
*/
protected byte[] getFromKey() {
if (accessPath instanceof EmptyAccessPath) {
// EmptyAccessPath does not extend AccessPath.
return BytesUtil.EMPTY;
}
return ((AccessPath) accessPath).getFromKey();
}
@Override
public int hashCode() {
return super.hashCode();
}
/**
* Return true
iff the tasks are equivalent (same as
* bound predicate). This test may be used to eliminate duplicates
* that arise when different source {@link JoinTask}s generate the
* same {@link IBindingSet}.
*
* @param o
* Another task.
*
* @return if the as bound predicate is equals().
*/
@Override
public boolean equals(final Object o) {
if (this == o)
return true;
if (!(o instanceof JoinTask.AccessPathTask))
return false;
return accessPath.getPredicate().equals(
((AccessPathTask) o).accessPath.getPredicate());
}
/**
* Evaluate an {@link IBindingSet} for the join dimension. When the
* task runs, it will pair each element visited on the
* {@link IAccessPath} with the asBound {@link IPredicate}. For each
* element visited, if the binding is acceptable for the constraints
* on the asBound {@link IPredicate}, then the task will emit one
* {@link IBindingSet} for each source {@link IBindingSet}.
*
* @param predicate
* The asBound {@link IPredicate}.
* @param bindingSets
* A collection of {@link IBindingSet}s from the source
* join dimension that all result in the same asBound
* {@link IPredicate}.
*/
public AccessPathTask(final IPredicate predicate,
final Collection bindingSets) {
if (predicate == null)
throw new IllegalArgumentException();
if (bindingSets == null)
throw new IllegalArgumentException();
/*
* Note: this needs to be the access path for the local index
* partition. We handle this by (a) constraining the predicate
* to the desired index partition; (b) using an IJoinNexus that
* is initialized once the JoinTask starts to execute inside of
* the ConcurrencyManager; (c) declaring; and (d) using the
* index partition name NOT the scale-out index name.
*/
final int n = bindingSets.size();
if (n == 0)
throw new IllegalArgumentException();
this.accessPath = context.getAccessPath(relation, predicate);
if (log.isDebugEnabled()) {
log.debug("joinOp=" + joinOp);
log.debug("#bindingSets=" + n);
log.debug("accessPath=" + accessPath);
}
// convert to array for thread-safe traversal.
this.bindingSets = bindingSets.toArray(new IBindingSet[n]);
this.naccepted = new int[n];
}
@Override
public String toString() {
return JoinTask.this.getClass().getSimpleName() + "{ joinOp="
+ joinOp + ", #bindingSets=" + bindingSets.length + "}";
}
/**
* Evaluate the {@link #accessPath} against the {@link #bindingSets}
* . If nothing is accepted and {@link IPredicate#isOptional()} then
* the {@link #bindingSets} is output anyway (this implements the
* semantics of OPTIONAL).
*
* @return null
.
*
* @throws BufferClosedException
* if there is an attempt to output a chunk of
* {@link IBindingSet}s or {@link ISolution}s and the
* output buffer is an {@link IBlockingBuffer} (true for
* all join dimensions exception the lastJoin and also
* true for query on the lastJoin) and that
* {@link IBlockingBuffer} has been closed.
*/
@Override
public Void call() throws Exception {
halted();
if (limit != Long.MAX_VALUE && exactOutputCount.get() > limit) {
// break query @ limit.
if (log.isInfoEnabled())
log.info("Breaking query @ limit: limit=" + limit
+ ", exactOutputCount="
+ exactOutputCount.get());
halt((Void) null);
return null;
}
// range count of the as-bound access path (should be cached).
final long rangeCount = accessPath
.rangeCount(false/* exact */);
if (log.isDebugEnabled()) {
log.debug("range count: " + rangeCount);
}
stats.accessPathCount.increment();
stats.accessPathRangeCount.add(rangeCount);
if (accessPath.getPredicate() instanceof IStarJoin>) {
/*
* Star join. This has not yet proven to be more efficient.
* Presumably we wind up with sufficiently good IO locality
* with a normal pipeline join that the star join does not
* improve matters further.
*/
handleStarJoin();
} else {
if(true && accessPath instanceof IBindingSetAccessPath) {
// Handle join in terms of an IBindingSet iterator.
handleJoin2();
} else {
// Handle join in terms of an IElement[] iterator.
handleJoin();
}
}
return null;
}
/**
* A vectored pipeline join (chunk at a time processing) for
* {@link IElement}s.
*/@Deprecated // by handleJoin2()
protected void handleJoin() {
final long cutoffLimit = predicate.getProperty(
IPredicate.Annotations.CUTOFF_LIMIT,
IPredicate.Annotations.DEFAULT_CUTOFF_LIMIT);
// Obtain the iterator for the current join dimension.
final IChunkedOrderedIterator> itr = accessPath.iterator(0,
cutoffLimit, 0/* capacity */);
try {
// Each thread gets its own buffer.
final AbstractUnsynchronizedArrayBuffer unsyncBuffer = threadLocalBufferFactory
.get();
// #of input solutions consumed (pre-increment).
stats.inputSolutions.add(bindingSets.length);
while (itr.hasNext()) {
halted();
final Object[] chunk = itr.nextChunk();
stats.accessPathChunksIn.increment();
// System.err.println("#chunks="+stats.accessPathChunksIn+", chunkSize="+chunk.length);
// process the chunk in the caller's thread.
new ChunkTask(bindingSets, naccepted, unsyncBuffer,
chunk).call();
} // next chunk.
if (optional) {
/*
* Note: when NO binding sets were accepted AND the
* predicate is OPTIONAL then we output the _original_
* binding set(s) to the sink join task(s). The
* CONSTRAINT(s) are NOT applied for the optional
* solutions.
*/
// Thread-local buffer iff optional sink is in use.
final AbstractUnsynchronizedArrayBuffer unsyncBuffer2 = threadLocalBufferFactory2 == null ? null
: threadLocalBufferFactory2.get();
for (int bindex = 0; bindex < bindingSets.length; bindex++) {
if (naccepted[bindex] > 0)
continue;
final IBindingSet bs = bindingSets[bindex];
// if (constraints != null) {
// if(!BOpUtility.isConsistent(constraints, bs)) {
// // Failed by the constraint on the join.
// continue;
// }
// }
if (log.isTraceEnabled())
log
.trace("Passing on solution which fails an optional join: "
+ bs);
if (limit != Long.MAX_VALUE
&& exactOutputCount.incrementAndGet() > limit) {
// break query @ limit.
if (log.isInfoEnabled())
log.info("Breaking query @ limit: limit=" + limit
+ ", exactOutputCount="
+ exactOutputCount.get());
halt((Void) null);
break;
}
if (unsyncBuffer2 == null) {
// use the default sink.
unsyncBuffer.add(bs);
} else {
// use the alternative sink.
unsyncBuffer2.add(bs);
}
stats.outputSolutions.increment();
}
}
return;
} catch (Throwable t) {
// ensure query halts.
halt(t);
if (getCause() != null) {
// abnormal termination.
throw new RuntimeException(t);
}
// normal termination - ignore exception.
} finally {
itr.close();
}
}
/**
* A vectored pipeline join (chunk at a time processing) based on
* the visitation of {@link IBindingSet}s
*/
protected void handleJoin2() {
final long cutoffLimit = predicate.getProperty(
IPredicate.Annotations.CUTOFF_LIMIT,
IPredicate.Annotations.DEFAULT_CUTOFF_LIMIT);
// Obtain the iterator for the current join dimension.
final ICloseableIterator itr = ((IBindingSetAccessPath>) accessPath)
.solutions(context, cutoffLimit, stats);
try {
// Each thread gets its own buffer.
final AbstractUnsynchronizedArrayBuffer unsyncBuffer = threadLocalBufferFactory
.get();
// #of input solutions consumed (pre-increment).
stats.inputSolutions.add(bindingSets.length);
while (itr.hasNext()) {
halted();
int naccepted = 0;
final IBindingSet[] rightChunk = itr.next(); // AP solution chunk.
for(IBindingSet right : rightChunk) { // next solution from AP.
int bindex = 0; // index 1:1 with bindingSets[].
for (IBindingSet left : bindingSets) { // upstream pipeline solutions.
// join solutions.
final IBindingSet bset = BOpContext.bind(left, right,
// true/* leftIsPipeline */,
constraints,
variablesToKeep);
if (bset != null) {
// solutions joined.
if (limit != Long.MAX_VALUE
&& exactOutputCount.incrementAndGet() > limit) {
// break query @ limit.
if (log.isInfoEnabled())
log.info("Breaking query @ limit: limit=" + limit
+ ", exactOutputCount="
+ exactOutputCount.get());
halt((Void) null);
break;
}
// Accept this binding set.
unsyncBuffer.add(bset);
// #of binding sets accepted.
naccepted++;
// #of elements accepted for this binding set.
this.naccepted[bindex]++;
// #of output solutions generated.
stats.outputSolutions.increment();
}
if (bindex++ % 50 == 0) {
// Periodically check for an interrupt.
if (Thread.interrupted())
throw new InterruptedException();
}
}
if (log.isDebugEnabled())
if (naccepted == 0) {
log.debug("Rejected solution: " + right);
} else {
log.debug("Accepted solution for " + naccepted
+ " of " + bindingSets.length
+ " possible bindingSet combinations: "
+ right);
}
} // next left (upstream) solution from leftChunk (aka bindingSets).
} // next right (AP) solution from rightChunk
if (optional) {
/*
* Note: when NO binding sets were accepted AND the
* predicate is OPTIONAL then we output the _original_
* binding set(s) to the sink join task(s). The
* CONSTRAINT(s) are NOT applied for the optional
* solutions.
*/
// Thread-local buffer iff optional sink is in use.
final AbstractUnsynchronizedArrayBuffer unsyncBuffer2 = threadLocalBufferFactory2 == null ? null
: threadLocalBufferFactory2.get();
for (int bindex = 0; bindex < bindingSets.length; bindex++) {
if (naccepted[bindex] > 0)
continue;
final IBindingSet bs = bindingSets[bindex];
if (log.isTraceEnabled())
log
.trace("Passing on solution which fails an optional join: "
+ bs);
if (limit != Long.MAX_VALUE
&& exactOutputCount.incrementAndGet() > limit) {
// break query @ limit.
if (log.isInfoEnabled())
log.info("Breaking query @ limit: limit=" + limit
+ ", exactOutputCount="
+ exactOutputCount.get());
halt((Void) null);
break;
}
if (unsyncBuffer2 == null) {
// use the default sink.
unsyncBuffer.add(bs);
} else {
// use the alternative sink.
unsyncBuffer2.add(bs);
}
stats.outputSolutions.increment();
}
}
return;
} catch (Throwable t) {
// ensure query halts.
halt(t);
if (getCause() != null) {
// abnormal termination.
throw new RuntimeException(t);
}
// normal termination - ignore exception.
} finally {
itr.close();
}
}
protected void handleStarJoin() {
IBindingSet[] solutions = this.bindingSets;
final IStarJoin starJoin = (IStarJoin) accessPath
.getPredicate();
/*
* FIXME The star join does not handle the alternative sink yet.
* See the ChunkTask for the normal join.
*/
final AbstractUnsynchronizedArrayBuffer unsyncBuffer = threadLocalBufferFactory
.get();
// Obtain the iterator for the current join dimension.
final IChunkedOrderedIterator> itr = accessPath.iterator();
// The actual #of elements scanned.
int numElements = 0;
try {
/*
* Note: The fast range count would give us an upper bound,
* unless expanders are used, in which case there can be
* more elements visited.
*/
final Object[] elements;
{
/*
* First, gather all chunks.
*/
int nchunks = 0;
final List