com.bigdata.bop.join.HTreeHashJoinUtility Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Aug 30, 2011
*/
package com.bigdata.bop.join;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.log4j.Logger;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.Constant;
import com.bigdata.bop.HTreeAnnotations;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IConstraint;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.IndexAnnotations;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.controller.INamedSolutionSetRef;
import com.bigdata.bop.engine.BOpStats;
import com.bigdata.btree.Checkpoint;
import com.bigdata.btree.DefaultTupleSerializer;
import com.bigdata.btree.HTreeIndexMetadata;
import com.bigdata.btree.ITuple;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.btree.ITupleSerializer;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.keys.ASCIIKeyBuilderFactory;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.btree.raba.codec.FrontCodedRabaCoderDupKeys;
import com.bigdata.btree.raba.codec.SimpleRabaCoder;
import com.bigdata.counters.CAT;
import com.bigdata.htree.HTree;
import com.bigdata.io.ByteArrayBuffer;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.IVCache;
import com.bigdata.rdf.internal.encoder.IBindingSetDecoder;
import com.bigdata.rdf.internal.encoder.IVBindingSetEncoderWithIVCache;
import com.bigdata.rdf.internal.impl.literal.XSDBooleanIV;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.relation.accesspath.BufferClosedException;
import com.bigdata.relation.accesspath.IBuffer;
import com.bigdata.rwstore.sector.IMemoryManager;
import com.bigdata.rwstore.sector.MemStore;
import com.bigdata.rwstore.sector.MemoryManagerClosedException;
import com.bigdata.util.Bytes;
import com.bigdata.util.BytesUtil;
import com.bigdata.util.InnerCause;
import cutthecrap.utils.striterators.Expander;
import cutthecrap.utils.striterators.ICloseableIterator;
import cutthecrap.utils.striterators.IStriterator;
import cutthecrap.utils.striterators.Resolver;
import cutthecrap.utils.striterators.SingleValueIterator;
import cutthecrap.utils.striterators.Striterator;
import cutthecrap.utils.striterators.Visitor;
/**
* Utility methods to support hash index builds and hash index joins using a
* scalable native memory data structures.
*
* Vectoring and IV encoding
*
* In order to provide efficient encoding and persistence of solutions on the
* {@link HTree}, this class is written directly to the RDF data model. Rather
* than POJO serialization, solutions are encoded as logical {@link IV}[]s in a
* manner very similar to how we represent the keys of the statement indices.
*
* Since this encoding does not persist the {@link IV#getValue() cache}, a
* separate mapping must be maintained from {@link IV} to {@link BigdataValue}
* for those {@link IV}s which have a materialized {@link BigdataValue}.
*
* TODO Do a 64-bit hash version which could be used for hash indices having
* more than 500M distinct join variable combinations. Note that at 500M
* distinct join variable combinations we have a 1 in 4 chance of a hash
* collision. Whether or not that turns into a cost really depends on the
* cardinality of the solutions per distinct combination of the join variables.
* If there is only one solution per join variable combination, then those
* collisions will cause basically no increase in the work to be done. However,
* if there are 50,000 solutions per distinct combination of the join variables
* then we would be better off using a 64-bit hash code.
*
* FIXME Vector resolution of ivCache. Various methods use
* {@link IVBindingSetEncoderWithIVCache#resolveCachedValues(IBindingSet)}
*
* @author Bryan Thompson
* @version $Id: HTreeHashJoinUtility.java 5568 2011-11-07 19:39:12Z thompsonbry
*/
public class HTreeHashJoinUtility implements IHashJoinUtility {
static private final transient Logger log = Logger
.getLogger(HTreeHashJoinUtility.class);
/**
* Singleton {@link IHashJoinUtilityFactory} that can be used to create a
* new {@link HTreeHashJoinUtility}.
*/
static public final IHashJoinUtilityFactory factory =
new IHashJoinUtilityFactory() {
private static final long serialVersionUID = 1L;
public IHashJoinUtility create(//
final BOpContext context,//
final INamedSolutionSetRef namedSetRef,//
final PipelineOp op,//
final JoinTypeEnum joinType//
) {
return new HTreeHashJoinUtility(
context.getMemoryManager(namedSetRef.getQueryId()),
op, joinType);
}
};
/**
* Note: If joinVars is an empty array, then the solutions will all hash to
* ONE (1).
*/
private static final int ONE = 1;
/**
* Return the hash code which will be used as the key given the ordered
* as-bound values for the join variables.
*
* @param joinVars
* The join variables.
* @param bset
* The bindings whose as-bound hash code for the join variables
* will be computed.
* @param ignoreUnboundVariables
* If a variable without a binding should be silently ignored.
*
* @return The hash code.
*
* @throws JoinVariableNotBoundException
* if there is no binding for a join variable.
*
* FIXME Does anything actually rely on the
* {@link JoinVariableNotBoundException}? It would seem that
* this exception could only be thrown if the joinvars[] was
* incorrectly formulated as it should only include
* "known bound" variables. (I think that this is related to
* incorrectly passing along empty solutions for named subquery
* hash joins.)
*/
private static int hashCode(final IVariable>[] joinVars,
final IBindingSet bset, final boolean ignoreUnboundVariables)
throws JoinVariableNotBoundException {
int h = ONE;
for (IVariable> v : joinVars) {
final IConstant> c = bset.get(v);
if (c == null) {
if(ignoreUnboundVariables)
continue;
// Reject any solution which does not have a binding for a join
// variable.
throw new JoinVariableNotBoundException(v.getName());
}
// Works Ok.
h = 31 * h + c.hashCode();
/*
* TODO Martyn's version. Also works Ok. Compare rate of hash
* collisions and impact on join performance. Also compare use of
* 64-bit hash codes and impact on join performance (there should be
* fewer hash collisions).
*/
// @see http://burtleburtle.net/bob/hash/integer.html
//
// final int hc = c.hashCode();
// h += ~(hc<<15);
// h ^= (hc>>10);
// h += (hc<<3);
// h ^= (hc>>6);
}
if (log.isTraceEnabled())
log.trace("hashCode=" + h + ", joinVars="
+ Arrays.toString(joinVars) + " : " + bset);
return h;
}
protected AtomicBoolean getOpen() {
return open;
}
protected IVBindingSetEncoderWithIVCache getEncoder() {
return encoder;
}
protected long getNoJoinVarsLimit() {
return noJoinVarsLimit;
}
protected boolean getOutputDistintcJVs() {
return outputDistinctJVs;
}
/**
* true
until the state is discarded by {@link #release()}.
*/
private final AtomicBoolean open = new AtomicBoolean(true);
/**
* The operator whose annotations are used to initialize this object.
*
* Note: This was added to support the DISTINCT FILTER in
* {@link #outputSolutions(IBuffer)}.
*/
private final PipelineOp op;
// /**
// * This basically controls the vectoring of the hash join.
// */
// private final int chunkSize = 1000;//ChunkedWrappedIterator.DEFAULT_CHUNK_SIZE;
/**
* Utility class for compactly and efficiently encoding and decoding
* {@link IBindingSet}s.
*/
private final IVBindingSetEncoderWithIVCache encoder;
/**
* The type of join to be performed.
*/
private final JoinTypeEnum joinType;
/**
* true
iff the join is OPTIONAL.
*/
private final boolean optional;
/**
* true
iff this is a DISTINCT filter.
*/
private final boolean filter;
// /**
// * The operator which was used to construct the {@link IHashJoinUtility}
// * state.
// *
// * Note: This is NOT necessarily the operator which is currently executing.
// * Hash indices are often built by one operator and then consumed by
// * other(s).
// */
// private final PipelineOp op;
/**
* @see HashJoinAnnotations#ASK_VAR
*/
private final IVariable> askVar;
/**
* The join variables.
*/
private final IVariable>[] joinVars;
/**
* The variables to be retained (optional, all variables are retained if
* not specified).
*/
private final IVariable>[] selectVars;
/**
* True if the hash join utility class is to output the distinct join
* variables.
*/
private boolean outputDistinctJVs = false;
/**
* The join constraints (optional).
*/
private final IConstraint[] constraints;
/**
* The backing {@link IRawStore}.
*/
private final IRawStore store;
/**
* The hash index. The keys are int32 hash codes built from the join
* variables. The values are an {@link IV}[], similar to the encoding in
* the statement indices. The mapping from the index positions in the
* {@link IV}s to the variables is managed by the {@link #encoder}.
*/
private final AtomicReference rightSolutions = new AtomicReference();
/**
* The set of distinct source solutions which joined. This set is maintained
* iff the join is optional and is null
otherwise.
*/
private final AtomicReference joinSet = new AtomicReference();
/**
* The maximum #of (left,right) solution joins that will be considered
* before failing the join. This is used IFF there are no join variables.
*
* TODO HINTS: Annotation and query hint for this. Probably on
* {@link HashJoinAnnotations}.
*/
private final long noJoinVarsLimit = HashJoinAnnotations.DEFAULT_NO_JOIN_VARS_LIMIT;
/**
* The #of left solutions considered for a join.
*/
protected final CAT nleftConsidered = new CAT();
/**
* The #of right solutions considered for a join.
*/
protected final CAT nrightConsidered = new CAT();
/**
* The #of solution pairs considered for a join.
*/
protected final CAT nJoinsConsidered = new CAT();
/**
* The hash index.
*/
protected HTree getRightSolutions() {
return rightSolutions.get();
}
/**
* The set of distinct source solutions which joined. This set is
* maintained iff the join is optional and is null
* otherwise.
*/
protected HTree getJoinSet() {
return joinSet.get();
}
/**
* Human readable representation of the {@link IHashJoinUtility} metadata
* (but not the solutions themselves).
*/
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append(getClass().getSimpleName());
sb.append("{open=" + open);
sb.append(",joinType=" + joinType);
// sb.append(",chunkSize=" + chunkSize);
// sb.append(",optional=" + optional);
// sb.append(",filter=" + filter);
if (askVar != null)
sb.append(",askVar=" + askVar);
sb.append(",joinVars=" + Arrays.toString(joinVars));
sb.append(",outputDistinctJVs=" + outputDistinctJVs);
if (selectVars != null)
sb.append(",selectVars=" + Arrays.toString(selectVars));
if (constraints != null)
sb.append(",constraints=" + Arrays.toString(constraints));
sb.append(",size=" + getRightSolutionCount());
sb.append(",considered(left=" + nleftConsidered + ",right="
+ nrightConsidered + ",joins=" + nJoinsConsidered + ")");
if (joinSet.get() != null)
sb.append(",joinSetSize=" + getJoinSetSize());
// sb.append(",encoder="+encoder);
sb.append("}");
return sb.toString();
}
@Override
public boolean isEmpty() {
return getRightSolutionCount() == 0;
}
@Override
public long getRightSolutionCount() {
final HTree htree = getRightSolutions();
if (htree != null) {
return htree.getEntryCount();
}
return 0L;
}
protected long getJoinSetSize() {
final HTree htree = getJoinSet();
if (htree != null) {
return htree.getEntryCount();
}
return 0L;
}
@Override
public JoinTypeEnum getJoinType() {
return joinType;
}
@Override
public IVariable> getAskVar() {
return askVar;
}
@Override
public IVariable>[] getJoinVars() {
return joinVars;
}
@Override
public IVariable>[] getSelectVars() {
return selectVars;
}
@Override
public boolean isOutputDistinctJoinVars() {
return outputDistinctJVs;
}
@Override
public IConstraint[] getConstraints() {
return constraints;
}
/**
* Setup the {@link IndexMetadata} for {@link #rightSolutions} or
* {@link #joinSet}.
*/
protected static HTreeIndexMetadata getIndexMetadata(final PipelineOp op) {
final HTreeIndexMetadata metadata = new HTreeIndexMetadata(
UUID.randomUUID());
final int addressBits = op.getProperty(HTreeAnnotations.ADDRESS_BITS,
HTreeAnnotations.DEFAULT_ADDRESS_BITS);
// final int branchingFactor = 2 ^ addressBits;
final int ratio = 32; // TODO Config/tune.
metadata.setAddressBits(addressBits);
metadata.setRawRecords(op.getProperty(//
HTreeAnnotations.RAW_RECORDS,
HTreeAnnotations.DEFAULT_RAW_RECORDS));
metadata.setMaxRecLen(op.getProperty(//
HTreeAnnotations.MAX_RECLEN,
HTreeAnnotations.DEFAULT_MAX_RECLEN));
metadata.setWriteRetentionQueueCapacity(op.getProperty(
IndexAnnotations.WRITE_RETENTION_QUEUE_CAPACITY,
IndexAnnotations.DEFAULT_WRITE_RETENTION_QUEUE_CAPACITY));
metadata.setKeyLen(Bytes.SIZEOF_INT); // int32 hash code keys.
@SuppressWarnings("rawtypes")
final ITupleSerializer, ?> tupleSer = new DefaultTupleSerializer(
new ASCIIKeyBuilderFactory(Bytes.SIZEOF_INT),
// new FrontCodedRabaCoder(ratio),// keys : TODO Optimize for int32!
new FrontCodedRabaCoderDupKeys(ratio),// keys : TODO Optimize for int32!
new SimpleRabaCoder() // vals
);
metadata.setTupleSerializer(tupleSer);
return metadata;
}
/**
*
* @param mmgr
* The IMemoryManager which will back the named solution set.
* @param op
* The operator whose annotation will inform construction the
* hash index. The {@link HTreeAnnotations} may be specified for
* this operator and will control the initialization of the
* various {@link HTree} instances.
* @param joinType
* The type of join to be performed.
*
* @see HTreeHashJoinAnnotations
*/
public HTreeHashJoinUtility(final IMemoryManager mmgr, final PipelineOp op,
final JoinTypeEnum joinType) {
if (mmgr == null)
throw new IllegalArgumentException();
if (op == null)
throw new IllegalArgumentException();
if(joinType == null)
throw new IllegalArgumentException();
this.op = op;
this.joinType = joinType;
this.optional = joinType == JoinTypeEnum.Optional;
this.filter = joinType == JoinTypeEnum.Filter;
// Optional variable used for (NOT) EXISTS.
this.askVar = (IVariable>) op
.getProperty(HashJoinAnnotations.ASK_VAR);
// The join variables (required).
this.joinVars = (IVariable>[]) op
.getRequiredProperty(HashJoinAnnotations.JOIN_VARS);
// The projected variables (optional and equal to the join variables iff
// this is a DISTINCT filter).
this.outputDistinctJVs =
op.getProperty(
HashIndexOp.Annotations.OUTPUT_DISTINCT_JVs, false);
this.selectVars = filter ? joinVars : (IVariable>[]) op
.getProperty(JoinAnnotations.SELECT);
/*
* This wraps an efficient raw store interface around a child memory
* manager created from the IMemoryManager which will back the named
* solution set.
*/
store = new MemStore(mmgr.createAllocationContext());
// Setup the encoder. The ivCache will be backed by the memory manager.
this.encoder = new IVBindingSetEncoderWithIVCache(store, filter, op);
/*
* Note: This is not necessary. We will encounter the join variables in
* the solutions are they are processed and they will automatically
* become part of the schema maintained by the encoder.
*/
// // Initialize the schema with the join variables.
// encoder.updateSchema(joinVars);
// The join constraints (optional).
this.constraints = (IConstraint[]) op
.getProperty(JoinAnnotations.CONSTRAINTS);
// Will support incremental eviction and persistence.
rightSolutions.set(HTree.create(store, getIndexMetadata(op)));
switch (joinType) {
case Optional:
case Exists:
case NotExists:
// The join set is used to handle optionals.
joinSet.set(HTree.create(store, getIndexMetadata(op)));
break;
}
}
/**
* The backing {@link IRawStore}.
*/
public IRawStore getStore() {
return store;
}
/**
* {@inheritDoc}
*
* This implementation checkpoints the {@link HTree} instance(s) used to
* buffer the source solutions ({@link #rightSolutions} and {@link #ivCache}
* ) and then re-load the them in a read-only mode from their checkpoint(s).
* This exposes a view of the {@link HTree} which is safe for concurrent
* readers.
*/
@Override
public void saveSolutionSet() {
if (!open.get())
throw new IllegalStateException();
checkpointHTree(rightSolutions);
encoder.saveSolutionSet();
/*
* Note: DO NOT checkpoint the joinSet here. That index is not even
* written upon until we begin to evaluate the joins, which happens
* after we checkpoint the source solutions.
*/
}
// /**
// * Checkpoint the join set (used to buffer the optional solutions).
// *
// * Note: Since we always output the solutions which did not join from a
// * single thread as part of last pass evaluation there is no need to
// * checkpoint the {@link #joinSet}.
// */
// public void checkpointJoinSet() {
//
// if (!open.get())
// throw new IllegalStateException();
//
// checkpointHTree(joinSet);
//
// }
private void checkpointHTree(final AtomicReference ref) {
final HTree tmp = ref.get();
if (tmp != null) {
// Checkpoint the HTree.
final Checkpoint checkpoint = tmp.writeCheckpoint2();
if (log.isInfoEnabled())
log.info(checkpoint.toString());
final HTree readOnly = HTree.load(store,
checkpoint.getCheckpointAddr(), true/* readOnly */);
// Get a read-only view of the HTree.
if (!ref.compareAndSet(tmp/* expect */, readOnly)) {
throw new IllegalStateException();
}
}
}
@Override
public void release() {
if (open.compareAndSet(true/* expect */, false/* update */)) {
// Already closed.
return;
}
encoder.release();
HTree tmp = rightSolutions.getAndSet(null/* newValue */);
if (tmp != null) {
tmp.close();
}
tmp = joinSet.getAndSet(null/* newValue */);
if (tmp != null) {
tmp.close();
}
store.close();
}
@Override
public long acceptSolutions(final ICloseableIterator itr,
final BOpStats stats) {
if (!open.get())
throw new IllegalStateException();
if (itr == null)
throw new IllegalArgumentException();
if (stats == null)
throw new IllegalArgumentException();
try {
long naccepted = 0L;
final HTree htree = getRightSolutions();
final IKeyBuilder keyBuilder = htree.getIndexMetadata()
.getKeyBuilder();
// Note: We no longer re-chunk here.
final ICloseableIterator it = itr;
try {
final AtomicInteger vectorSize = new AtomicInteger();
while (it.hasNext()) {
// Vector a chunk of solutions.
final BS[] a = vector(it.next(), joinVars,
null/* selectVars */,
false/* ignoreUnboundVariables */, vectorSize);
final int n = vectorSize.get();
stats.chunksIn.increment();
stats.unitsIn.add(a.length);
// Insert solutions into HTree in key order.
for (int i = 0; i < n; i++) {
final BS tmp = a[i];
// Encode the key.
final byte[] key = keyBuilder.reset().append(tmp.hashCode)
.getKey();
// Encode the solution.
final byte[] val = encoder.encodeSolution(tmp.bset);
//log.warn("insert: key="+BytesUtil.toString(key));
// Insert binding set under hash code for that key.
htree.insert(key, val);
}
naccepted += a.length;
// Vectored update of the IV Cache.
// encoder.updateIVCache(cache);
encoder.flush();
}
} finally {
it.close();
}
if (log.isInfoEnabled())
log.info("naccepted=" + naccepted + ", nright="
+ htree.getEntryCount());
return naccepted;
} catch(Throwable t) {
throw launderThrowable(t);
}
}
@Override
public long filterSolutions(final ICloseableIterator itr,
final BOpStats stats, final IBuffer sink) {
if (itr == null)
throw new IllegalArgumentException();
if (stats == null)
throw new IllegalArgumentException();
try {
long naccepted = 0L;
final HTree htree = getRightSolutions();
final IKeyBuilder keyBuilder = htree.getIndexMetadata().getKeyBuilder();
// Note: We no longer rechunk here.
final Iterator it = itr;
final AtomicInteger vectorSize = new AtomicInteger();
while (it.hasNext()) {
// Vector a chunk of solutions.
final BS[] a = vector(it.next(), joinVars, selectVars,
true/* ignoreUnboundVariables */, vectorSize);
final int n = vectorSize.get();
stats.chunksIn.increment();
stats.unitsIn.add(a.length);
for (int i = 0; i < n; i++) {
final BS tmp = a[i];
// Encode the key.
final byte[] key = keyBuilder.reset().append(tmp.hashCode)
.getKey();
/*
* Encode the solution. Do not update the cache since we are
* only encoding so we can probe the hash index.
*/
final byte[] val = encoder
.encodeSolution(tmp.bset, false/* updateCache */);
/*
* Search the hash index for a match.
*
* TODO VECTOR: This does not take explicit advantage of the
* fact that different source solutions will fall into the
* same hash bucket in the HTree. The solutions are ordered
* by hashCode by vector() above, but we are using one lookupAll()
* invocation per source solution here rather than recognizing that
* multiple source solutions will hit the same hash bucket.
*/
boolean found = false;
final ITupleIterator> titr = htree.lookupAll(key);
while (titr.hasNext()) {
final ITuple> t = titr.next();
final ByteArrayBuffer tb = t.getValueBuffer();
if (0 == BytesUtil.compareBytesWithLenAndOffset(
0/* aoff */, val.length/* alen */, val,//
0/* boff */, tb.limit()/* blen */, tb.array()/* b */
)) {
found = true;
break;
}
}
if (!found) {
// Add to the hash index.
htree.insert(key, val);
// Write onto the sink.
sink.add(tmp.bset);
naccepted++;
}
}
// Note: The IV=>Value cache is NOT maintained for DISTINCT.
// encoder.flush();
// updateIVCache(cache, ivCache.get());
}
return naccepted;
} catch(Throwable t) {
throw launderThrowable(t);
}
}
/**
* Decode a solution from an encoded {@link IV}[].
*
* Note: The {@link IVCache} associated are NOT resolved by this method. The
* resolution step is relatively expensive since it must do lookups in
* persistence capable data structures. The caller MUST use
* {@link IBindingSetDecoder#resolveCachedValues(IBindingSet)} to resolve
* the {@link IVCache} associations once they decide that the decoded
* solution can join.
*
* Note: This instance method is required by the MERGE JOIN logic which
* associates the schema with the first {@link IHashJoinUtility} instance.
*
* @param t
* A tuple whose value is an encoded {@link IV}[].
*
* @return The decoded {@link IBindingSet}.
*/
protected IBindingSet decodeSolution(final ITuple> t) {
final ByteArrayBuffer b = t.getValueBuffer();
return encoder
.decodeSolution(b.array(), 0, b.limit(), false/* resolveCachedValues */);
}
/**
* Glue class for hash code and binding set used when the hash code is for
* just the join variables rather than the entire binding set.
*/
public static class BS implements Comparable {
final int hashCode;
final IBindingSet bset;
BS(final int hashCode, final IBindingSet bset) {
this.hashCode = hashCode;
this.bset = bset;
}
@Override
public int compareTo(final BS o) {
if (this.hashCode < o.hashCode)
return -1;
if (this.hashCode > o.hashCode)
return 1;
return 0;
}
@Override
public String toString() {
return getClass().getName() + "{hashCode=" + hashCode + ",bset="
+ bset + "}";
}
}
/**
* Glue class for hash code and encoded binding set used when we already
* have the binding set encoded.
*/
static class BS2 implements Comparable {
final int hashCode;
final byte[] value;
BS2(final int hashCode, final byte[] value) {
this.hashCode = hashCode;
this.value = value;
}
@Override
public int compareTo(final BS2 o) {
if (this.hashCode < o.hashCode)
return -1;
if (this.hashCode > o.hashCode)
return 1;
return 0;
}
@Override
public String toString() {
return getClass().getName() + "{hashCode=" + hashCode + ",value="
+ BytesUtil.toString(value) + "}";
}
}
@Override
public void hashJoin(//
final ICloseableIterator leftItr,//
final BOpStats stats,
final IBuffer outputBuffer//
) {
hashJoin2(leftItr, stats, outputBuffer, constraints);
}
/*
* The hash join is vectored. We compute the hashCode for each source
* solution from the leftItr and then sort those left solutions. This gives
* us an ordered progression through the hash buckets for the HTree.
* Further, since we know that any left solution having the same hash code
* will read on the same hash bucket, we probe that hash bucket once for all
* left solutions that hash into the same bucket.
*/
@Override
public void hashJoin2(//
final ICloseableIterator leftItr,//
final BOpStats stats,//
final IBuffer outputBuffer,//
final IConstraint[] constraints//
) {
if (!open.get())
throw new IllegalStateException();
// Note: We no longer rechunk in this method.
final Iterator it;
it = leftItr;// incremental.
/*
* Note: This forces all source chunks into a single chunk. This could
* improve vectoring, but was really added for debugging.
*/
// it = new SingletonIterator(BOpUtility.toArray(leftItr, null/*stats*/));
try {
final HTree rightSolutions = this.getRightSolutions();
if (log.isInfoEnabled()) {
log.info("rightSolutions: #nnodes="
+ rightSolutions.getNodeCount() + ",#leaves="
+ rightSolutions.getLeafCount() + ",#entries="
+ rightSolutions.getEntryCount());
}
final IKeyBuilder keyBuilder = rightSolutions.getIndexMetadata()
.getKeyBuilder();
// true iff there are no join variables.
final boolean noJoinVars = joinVars.length == 0;
final AtomicInteger vectorSize = new AtomicInteger();
while (it.hasNext()) {
final BS[] a; // vectored solutions.
final int n; // #of valid elements in a[].
{
// Next chunk of solutions from left.
final IBindingSet[] b = it.next();
if (stats != null) {
stats.chunksIn.increment();
stats.unitsIn.add(b.length);
}
// Vector a chunk of solutions, ordering by hashCode.
a = vector(b, joinVars, null/* selectVars */,
false/* ignoreUnboundVariables */, vectorSize);
// The size of that vector.
n = vectorSize.get();
nleftConsidered.add(n);
}
int fromIndex = 0;
while (fromIndex < n) {
/*
* Figure out how many left solutions in the current chunk
* have the same hash code. We will use the same iterator
* over the right solutions for that hash code against the
* HTree.
*/
// The next hash code to be processed.
final int hashCode = a[fromIndex].hashCode;
// scan for the first hash code which is different.
int toIndex = n; // assume upper bound.
for (int i = fromIndex + 1; i < n; i++) {
if (a[i].hashCode != hashCode) {
toIndex = i;
break;
}
}
// #of left solutions having the same hash code.
final int bucketSize = toIndex - fromIndex;
if (log.isTraceEnabled())
log.trace("hashCode=" + hashCode + ": #left="
+ bucketSize + ", vectorSize=" + n
+ ", firstLeft=" + a[fromIndex]);
/*
* Note: all source solutions in [fromIndex:toIndex) have
* the same hash code. They will be vectored together.
*/
// All solutions which join for that collision bucket
final LinkedList joined;
switch (joinType) {
case Optional:
case Exists:
case NotExists:
joined = new LinkedList();
break;
default:
joined = null;
break;
}
// #of solutions which join for that collision bucket.
int njoined = 0;
// #of solutions which did not join for that collision bucket.
int nrejected = 0;
{
final byte[] key = keyBuilder.reset().append(hashCode)
.getKey();
/**
* Visit all source solutions having the same hash code.
*
* @see
* Stochastic results with Analytic Query Mode)
*
*
* FIXME This appears to be the crux of the problem
* for #764. If you replace lookupAll(key) with
* rangeIterator() then the hash join is correct.
* Of course, it is also scanning all tuples each
* time so it is very inefficient. The root cause
* is the FrontCodedRabaCoder. It is doing a binary
* search on the BucketPage. However, the
* FrontCodedRabaCoder was not developed to deal
* with duplicates on the page. Therefore it is
* returning an offset into the middle of a run of
* duplicate keys when it does its binary search.
* We will either need to modify this IRabaCoder to
* handle this case (where duplicate keys are
* allowed) or write a new IRabaCoder that is smart
* about duplicates.
*/
final ITupleIterator> titr;
if (true) {// scan just the hash bucket for that key.
//log.warn(" probe: key="+BytesUtil.toString(key));
titr = rightSolutions.lookupAll(key);
} else { // do a full scan on the HTree.
titr = rightSolutions.rangeIterator();
}
long sameHashCodeCount = 0;
while (titr.hasNext()) {
sameHashCodeCount++;
final ITuple> t = titr.next();
/*
* Note: The map entries must be the full source
* binding set, not just the join variables, even
* though the key and equality in the key is defined
* in terms of just the join variables.
*
* Note: Solutions which have the same hash code but
* whose bindings are inconsistent will be rejected
* by bind() below.
*/
final IBindingSet rightSolution = decodeSolution(t);
nrightConsidered.increment();
for (int i = fromIndex; i < toIndex; i++) {
final IBindingSet leftSolution = a[i].bset;
// Join.
final IBindingSet outSolution = BOpContext
.bind(leftSolution, rightSolution,
constraints,
selectVars);
nJoinsConsidered.increment();
if (noJoinVars
&& nJoinsConsidered.get() == noJoinVarsLimit) {
if (nleftConsidered.get() > 1
&& nrightConsidered.get() > 1) {
throw new UnconstrainedJoinException();
}
}
if (outSolution == null) {
nrejected++;
if (log.isTraceEnabled())
log.trace("Does not join"//
+": hashCode="+ hashCode//
+ ", sameHashCodeCount="+ sameHashCodeCount//
+ ", #left=" + bucketSize//
+ ", #joined=" + njoined//
+ ", #rejected=" + nrejected//
+ ", left=" + leftSolution//
+ ", right=" + rightSolution//
);
} else {
njoined++;
if (log.isDebugEnabled())
log.debug("JOIN"//
+ ": hashCode=" + hashCode//
+ ", sameHashCodeCount="+ sameHashCodeCount//
+ ", #left="+ bucketSize//
+ ", #joined=" + njoined//
+ ", #rejected=" + nrejected//
+ ", solution=" + outSolution//
);
}
switch(joinType) {
case Normal:
case Optional: {
if (outSolution == null) {
// Join failed.
continue;
}
// Resolve against ivCache.
encoder.resolveCachedValues(outSolution);
// Output this solution.
outputBuffer.add(outSolution);
if (optional) {
// Accumulate solutions to vector into
// the joinSet.
joined.add(new BS2(rightSolution
.hashCode(), t.getValue()));
}
break;
}
case Exists: {
/*
* The right solution is output iff there is
* at least one left solution which joins
* with that right solution. Each right
* solution is output at most one time. This
* amounts to outputting the joinSet after
* we have run the entire join. As long as
* the joinSet does not allow duplicates it
* will be contain the solutions that we
* want.
*/
if (outSolution != null) {
// Accumulate solutions to vector into
// the joinSet.
joined.add(new BS2(rightSolution
.hashCode(), t.getValue()));
}
break;
}
case NotExists: {
/*
* The right solution is output iff there
* does not exist any left solution which
* joins with that right solution. This
* basically an optional join where the
* solutions which join are not output.
*/
if (outSolution != null) {
// Accumulate solutions to vector into
// the joinSet.
joined.add(new BS2(rightSolution
.hashCode(), t.getValue()));
}
break;
}
default:
throw new AssertionError();
}
} // next left in the same bucket.
} // next rightSolution with the same hash code.
if (joined != null && !joined.isEmpty()) {
/*
* Vector the inserts into the [joinSet].
*/
final BS2[] a2 = joined.toArray(new BS2[njoined]);
Arrays.sort(a2, 0, njoined);
for (int i = 0; i < njoined; i++) {
final BS2 tmp = a2[i];
saveInJoinSet(tmp.hashCode, tmp.value);
}
}
} // end block of leftSolutions having the same hash code.
fromIndex = toIndex;
} // next slice of source solutions with the same hash code.
} // while(itr.hasNext()
if (log.isInfoEnabled())
log.info("done: " + toString());
} catch(Throwable t) {
throw launderThrowable(t);
} finally {
leftItr.close();
}
} // handleJoin
/**
* Vector a chunk of solutions.
*
* @param leftSolutions
* The solutions.
* @param joinVars
* The variables on which the hash code will be computed.
* @param selectVars
* When non-null
, all other variables are dropped.
* (This is used when we are modeling a DISTINCT solutions filter
* since we need to drop anything which is not part of the
* DISTINCT variables list.)
* @param ignoreUnboundVariables
* When true
, an unbound variable will not cause a
* {@link JoinVariableNotBoundException} to be thrown.
* @param vectorSize
* The vector size (set by side-effect). This will be LTE the
* number of solutions in leftSolutions
. (If some
* solutions are eliminated because they lack a binding for a
* required join variable, then vectorSize is LT the number of
* leftSolutions
).
*
* @return The vectored chunk of solutions ordered by hash code.
*/
protected BS[] vector(final IBindingSet[] leftSolutions,
final IVariable>[] joinVars,
final IVariable>[] selectVars,
final boolean ignoreUnboundVariables,
final AtomicInteger vectorSize) {
final BS[] a = new BS[leftSolutions.length];
int n = 0; // The #of non-dropped source solutions.
int ndropped = 0; // The #of dropped solutions.
for (int i = 0; i < a.length; i++) {
/*
* Note: If this is a DISINCT FILTER, then we need to drop the
* variables which are not being considered immediately. Those
* variables MUST NOT participate in the computed hash code.
*/
final IBindingSet bset = selectVars == null ? leftSolutions[i]
: leftSolutions[i].copy(selectVars);
// Compute hash code from bindings on the join vars.
int hashCode = ONE;
try {
hashCode = HTreeHashJoinUtility.hashCode(joinVars,
bset, ignoreUnboundVariables);
} catch (JoinVariableNotBoundException ex) {
if (!optional) {// Drop solution
if (log.isTraceEnabled())
log.trace(ex);
ndropped++;
continue;
}
}
a[n++] = new BS(hashCode, bset);
}
/*
* Sort by the computed hash code. This not only orders the accesses
* into the HTree but it also allows us to handle all source solutions
* which have the same hash code with a single scan of the appropriate
* collision bucket in the HTree.
*/
Arrays.sort(a, 0, n);
// Indicate the actual vector size to the caller via a side-effect.
vectorSize.set(n);
if (log.isTraceEnabled())
log.trace("Vectoring chunk for HTree locality: naccepted=" + n
+ ", ndropped=" + ndropped);
return a;
}
/**
* Add to 2nd hash tree of all solutions which join.
*
* Note: the hash key is based on the entire solution (not just the join
* variables). The values are the full encoded {@link IBindingSet}.
*/
protected void saveInJoinSet(final int joinSetHashCode, final byte[] val) {
final HTree joinSet = this.getJoinSet();
if (true) {
/*
* Do not insert if there is already an entry for that solution in
* the join set.
*
* Note: EXISTS depends on this to have the correct cardinality. If
* EXISTS allows duplicate solutions into the join set then having
* multiple left solutions which satisfy the EXISTS filter will
* cause multiple copies of the right solution to be output! If you
* change the joinSet to allow duplicates, then it MUST NOT allow
* them for EXISTS!
*/
final IKeyBuilder keyBuilder = joinSet.getIndexMetadata()
.getKeyBuilder();
final byte[] key = keyBuilder.reset().append(joinSetHashCode)
.getKey();
// visit all joinSet solutions having the same hash code
final ITupleIterator> xitr = joinSet.lookupAll(key);
while (xitr.hasNext()) {
final ITuple> xt = xitr.next();
final ByteArrayBuffer b = xt.getValueBuffer();
if (0 == BytesUtil.compareBytesWithLenAndOffset(0/* aoff */,
val.length/* alen */, val/* a */, 0/* boff */,
b.limit()/* blen */, b.array())) {
return;
}
}
}
joinSet.insert(joinSetHashCode, val);
}
@Override
public void outputOptionals(final IBuffer outputBuffer) {
if (!open.get())
throw new IllegalStateException();
try {
@SuppressWarnings({ "rawtypes", "unchecked" })
final Constant f = askVar == null ? null : new Constant(
XSDBooleanIV.FALSE);
if (log.isInfoEnabled()) {
final HTree htree = this.getRightSolutions();
log.info("rightSolutions: #nnodes=" + htree.getNodeCount()
+ ",#leaves=" + htree.getLeafCount() + ",#entries="
+ htree.getEntryCount());
final HTree joinSet = this.getJoinSet();
log.info("joinSet: #nnodes=" + joinSet.getNodeCount()
+ ",#leaves=" + joinSet.getLeafCount() + ",#entries="
+ joinSet.getEntryCount());
}
final HTree joinSet = getJoinSet();
final IKeyBuilder keyBuilder = joinSet.getIndexMetadata()
.getKeyBuilder();
// Visit all source solutions.
final ITupleIterator> sitr = getRightSolutions().rangeIterator();
while (sitr.hasNext()) {
final ITuple> t = sitr.next();
final ByteArrayBuffer tb = t.getValueBuffer();
/*
* Note: This MUST be treated as effectively immutable since we
* may have to output multiple solutions for each rightSolution.
* Those output solutions MUST NOT side-effect [rightSolutions].
*/
final IBindingSet rightSolution = decodeSolution(t);
// The hash code is based on the entire solution for the
// joinSet.
final int hashCode = rightSolution.hashCode();
final byte[] key = keyBuilder.reset().append(hashCode).getKey();
// Probe the join set for this source solution.
final ITupleIterator> jitr = joinSet.lookupAll(key);
boolean found = false;
while (jitr.hasNext()) {
// Note: Compare full solutions, not just the hash code!
final ITuple> xt = jitr.next();
final ByteArrayBuffer xb = xt.getValueBuffer();
if (0 == BytesUtil.compareBytesWithLenAndOffset(
0/* aoff */, tb.limit()/* alen */,
tb.array()/* a */, 0/* boff */,
xb.limit()/* blen */, xb.array())) {
found = true;
break;
}
}
if (!found) {
/*
* Since the source solution is not in the join set, output
* it as an optional solution.
*/
IBindingSet bs = rightSolution;
if (selectVars != null) {
// Drop variables which are not projected.
bs = bs.copy(selectVars);
}
encoder.resolveCachedValues(bs);
if (f != null) {
if (bs == rightSolution)
bs = rightSolution.clone();
bs.set(askVar, f);
}
outputBuffer.add(bs);
}
}
} catch (Throwable t) {
throw launderThrowable(t);
}
} // outputOptionals.
@SuppressWarnings("unchecked")
@Override
public ICloseableIterator indexScan() {
final HTree rightSolutions = getRightSolutions();
if (log.isInfoEnabled()) {
log.info("rightSolutions: #nnodes="
+ rightSolutions.getNodeCount() + ",#leaves="
+ rightSolutions.getLeafCount() + ",#entries="
+ rightSolutions.getEntryCount());
}
// source.
final ITupleIterator> solutionsIterator = rightSolutions
.rangeIterator();
IStriterator itr = new Striterator(solutionsIterator);
/**
* Add resolution step.
*/
itr = itr.addFilter(new Resolver(){
private static final long serialVersionUID = 1L;
@Override
protected Object resolve(Object obj) {
final ITuple> t = ((ITuple>) obj);
// Decode the solution.
IBindingSet bset = decodeSolution(t);
// if (selectVars != null) {
//
// // Drop variables which are not projected.
// bset = bset.copy(selectVars);
//
// }
// Resolve ivCache associations.
encoder.resolveCachedValues(bset);
return bset;
}
});
return (ICloseableIterator) itr;
}
@Override
public void outputSolutions(final IBuffer out) {
if (!open.get())
throw new IllegalStateException();
try {
final HTree rightSolutions = getRightSolutions();
if (log.isInfoEnabled()) {
log.info("rightSolutions: #nnodes="
+ rightSolutions.getNodeCount() + ",#leaves="
+ rightSolutions.getLeafCount() + ",#entries="
+ rightSolutions.getEntryCount());
}
/*
* Used to impose distinct JV on solutions having the same hash
* code. Together with lastHashCode, used to decide when we enter a
* new hash bucket.
*/
HashSet distinctSet = null;
int lastHashCode = -1;
// source.
final ITupleIterator> solutionsIterator = rightSolutions
.rangeIterator();
while (solutionsIterator.hasNext()) {
final ITuple> t = solutionsIterator.next();
IBindingSet bset = decodeSolution(t);
if (outputDistinctJVs) {
// Drop any bindings that are not in the join variables.
bset = bset.copy(joinVars);
final int newHashCode =
hashCode(joinVars, bset, true/* ignoreUnboundVariables */);
final boolean newBucket = distinctSet == null
|| newHashCode != lastHashCode;
if (newBucket) {
// New bucket? New DISTINCT set.
// TODO This is not on the native heap. But it only
// handles a single bucket. Still, it is possible for
// a bucket to get very large.
distinctSet =
outputDistinctJVs ? new HashSet() : null;
lastHashCode = newHashCode;
}
if (!distinctSet.add(bset)) {
// Duplicate solution on JVs within current bucket.
continue;
}
// if (distinctFilter != null) {
//
// if ((bset = distinctFilter.accept(bset)) == null) {
//
// // Drop duplicate solutions.
// continue;
//
// }
} else if (selectVars != null) {
/*
* FIXME We should be using projectedInVars here since
* outputSolutions() is used to stream solutions into
* the child join group (at least for some kinds of
* joins, but there might be exceptions for joining with
* a named solution set).
*/
// Drop variables which are not projected.
bset = bset.copy(selectVars);
}
encoder.resolveCachedValues(bset);
out.add(bset);
}
} catch (Throwable t) {
throw launderThrowable(t);
}
} // outputSolutions
@Override
public void outputJoinSet(final IBuffer out) {
try {
@SuppressWarnings({ "rawtypes", "unchecked" })
final Constant t = askVar == null ? null : new Constant(
XSDBooleanIV.TRUE);
final HTree joinSet = getJoinSet();
if (log.isInfoEnabled()) {
log.info("joinSet: #nnodes="
+ joinSet.getNodeCount() + ",#leaves="
+ joinSet.getLeafCount() + ",#entries="
+ joinSet.getEntryCount());
}
// source.
final ITupleIterator> solutionsIterator = joinSet
.rangeIterator();
while (solutionsIterator.hasNext()) {
IBindingSet bset = decodeSolution(solutionsIterator.next());
if (selectVars != null) {
// Drop variables which are not projected.
bset = bset.copy(selectVars);
}
if (t != null) {
if (selectVars == null)
bset = bset.clone();
bset.set(askVar, t);
}
encoder.resolveCachedValues(bset);
out.add(bset);
}
} catch (Throwable t) {
throw launderThrowable(t);
}
} // outputJoinSet
/**
* {@inheritDoc}
*
* Note: For the {@link HTree}, the entries are in key order. Those keys are
* hash codes computed from the solutions using the join variables. Since
* the keys are hash codes and not the join variable bindings, each hash
* code identifies a collision bucket from the perspective of the merge join
* algorithm. Of course, from the perspective of the {@link HTree} those
* solutions are just consequective tuples readily identified using
* {@link HTree#lookupAll(int)}.
*
* FIXME Either always project everything or raise [select] into a parameter
* for this method. We DO NOT want to only project whatever was projected by
* the first source.
*/
@Override
public void mergeJoin(
//
final IHashJoinUtility[] others,
final IBuffer outputBuffer,
final IConstraint[] constraints, final boolean optional) {
try {
/*
* Validate arguments.
*/
if (others == null)
throw new IllegalArgumentException();
if (others.length == 0)
throw new IllegalArgumentException();
if (outputBuffer == null)
throw new IllegalArgumentException();
final HTreeHashJoinUtility[] all = new HTreeHashJoinUtility[others.length + 1];
{
all[0] = this;
for (int i = 0; i < others.length; i++) {
final HTreeHashJoinUtility o = (HTreeHashJoinUtility) others[i];
if (o == null)
throw new IllegalArgumentException();
if (!Arrays.equals(joinVars, o.joinVars)) {
// Must have the same join variables.
throw new IllegalArgumentException();
}
all[i + 1] = o;
}
}
if (isEmpty()) {
// NOP
return;
}
/*
* Combine constraints for each source with the given constraints.
*/
final IConstraint[] c = JVMHashJoinUtility.combineConstraints(
constraints, all);
/*
* MERGE JOIN
*
* We follow the iterator on the first source. For each hash code which
* it visits, we synchronize iterators against the remaining sources. If
* the join is optional, then the iterator will be null for a source
* which does not have that hash code. Otherwise false is returned if
* any source lacks tuples for the current hash code.
*/
long njoined = 0, nrejected = 0;
{
// if optional then if there are no solutions don't try and
// expand further, we need a place-holder object
final Object NULL_VALUE = "NULL";
final int nsources = all.length;
final ITuple>[] set = new ITuple>[nsources + 1];
// Visit everything in the first source.
final Striterator sols0 = new Striterator(all[0].getRightSolutions()
.rangeIterator());
{
sols0.addFilter(new Visitor() {
private static final long serialVersionUID = 1L;
/**
* Set the tuple for the first source each time it advances.
*
* @param obj
* The tuple.
*/
@Override
protected void visit(final Object obj) {
set[0] = (ITuple>) obj;
}
});
// now add in Expanders and Visitors for each remaining source.
for (int i = 1; i < nsources; i++) {
// Final variables used inside inner classes.
final int slot = i;
final HTree thisTree = all[slot].getRightSolutions();
sols0.addFilter(new Expander() {
private static final long serialVersionUID = 1L;
/**
* Expansion pattern gives solutions for source @ slot.
*
* @param obj
* The tuple in set[slot-1].
*/
@Override
protected Iterator> expand(final Object obj) {
if (obj == NULL_VALUE) {
assert optional;
return new SingleValueIterator(NULL_VALUE);
}
// Sync itr for this source to key for prior src.
final byte[] key2 = ((ITuple>) obj).getKey();
final ITupleIterator> ret = thisTree.lookupAll(key2);
if (optional && !ret.hasNext()) {
/*
* Nothing for that key from this source. Return
* a single marker value so we can proceed to
* the remaining sources rather than halting.
*/
return new SingleValueIterator(NULL_VALUE);
} else {
/*
* Iterator visiting solutions from this source
* for the current key in the prior source.
*/
return ret;
}
}
});
sols0.addFilter(new Visitor() {
private static final long serialVersionUID = 1L;
/**
* Assign tuple to set[slot].
*
* Note: If [obj==NULL_VALUE] then no solutions for that
* slot.
*/
@Override
protected void visit(final Object obj) {
set[slot] = (ITuple>) (obj == NULL_VALUE ? null : obj);
}
});
}
} // end of striterator setup.
/*
* This will visit after all expansions. That means that we will
* observe the cross product of the solutions from the remaining
* sources having the same hash for each from the first source.
*
* Each time we visit something, set[] is the tuple[] which
* describes a specific set of solutions from that cross product.
*
* TODO Lift out the decodeSolution() for all slots into the
* expander pattern.
*/
while (sols0.hasNext()) {
sols0.next();
IBindingSet in = all[0].decodeSolution(set[0]);
// FIXME apply constraint to source[0] (JVM version also).
for (int i = 1; i < set.length; i++) {
// See if the solutions join.
final IBindingSet left = in;
if (set[i] != null) {
final IBindingSet right = all[i].decodeSolution(set[i]);
in = BOpContext.bind(//
left,//
right,//
c,// TODO constraint[][]
null//
);
}
if (in == null) {
// if(optional) {
// in = left;
// continue;
// }
// Join failed.
break;
}
}
// Accept this binding set.
if (in != null) {
if (log.isDebugEnabled())
log.debug("Output solution: " + in);
encoder.resolveCachedValues(in);
outputBuffer.add(in);
}
// // now clear set!
// for (int i = 1; i < set.length; i++) {
// set[i] = null;
// }
}
}
} catch(Throwable t) {
throw launderThrowable(t);
}
}
/**
* Adds metadata about the {@link IHashJoinUtility} state to the stack
* trace.
*
* @param t
* The thrown error.
*
* @return The laundered exception.
*
* @throws Exception
*
* @see http://sourceforge.net/apps/trac/bigdata/ticket/508 (LIMIT causes
* hash join utility to log errors)
* @see BLZG-1658 MemoryManager should know when it has been closed
*/
private RuntimeException launderThrowable(final Throwable t) {
final String msg = "cause=" + t + ", state=" + toString();
/*
* Note: Per BLZG-1658, the MemoryManager.close() is invoked when a
* query is done. Thus, any exception having a root cause indicating
* that the MemoryManager is closed may be taken as direct evidence that
* the query is done. Thus, if an attempt by the HTree or BTree to read
* on the backing store (the MemoryManager) fails because the store is
* closed, we interpret this as a concurrent termination of the query
* for some other root cause and ignore the exception here (this is
* just like ignoring InterruptedException or BufferClosedException).
*/
if (!InnerCause.isInnerCause(t, InterruptedException.class)
&& !InnerCause.isInnerCause(t, BufferClosedException.class)
&& !InnerCause.isInnerCause(t, MemoryManagerClosedException.class)) {
/*
* Some sort of unexpected exception.
*/
log.error(msg, t);
}
return new RuntimeException(msg, t);
}
}