com.bigdata.bop.join.HTreeHashJoinUtility Maven / Gradle / Ivy

Go to download
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
/*
 * Created on Aug 30, 2011
 */

package com.bigdata.bop.join;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;

import org.apache.log4j.Logger;

import com.bigdata.bop.BOpContext;
import com.bigdata.bop.Constant;
import com.bigdata.bop.HTreeAnnotations;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IConstraint;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.IndexAnnotations;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.controller.INamedSolutionSetRef;
import com.bigdata.bop.engine.BOpStats;
import com.bigdata.btree.Checkpoint;
import com.bigdata.btree.DefaultTupleSerializer;
import com.bigdata.btree.HTreeIndexMetadata;
import com.bigdata.btree.ITuple;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.btree.ITupleSerializer;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.keys.ASCIIKeyBuilderFactory;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.btree.raba.codec.FrontCodedRabaCoderDupKeys;
import com.bigdata.btree.raba.codec.SimpleRabaCoder;
import com.bigdata.counters.CAT;
import com.bigdata.htree.HTree;
import com.bigdata.io.ByteArrayBuffer;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.IVCache;
import com.bigdata.rdf.internal.encoder.IBindingSetDecoder;
import com.bigdata.rdf.internal.encoder.IVBindingSetEncoderWithIVCache;
import com.bigdata.rdf.internal.impl.literal.XSDBooleanIV;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.relation.accesspath.BufferClosedException;
import com.bigdata.relation.accesspath.IBuffer;
import com.bigdata.rwstore.sector.IMemoryManager;
import com.bigdata.rwstore.sector.MemStore;
import com.bigdata.rwstore.sector.MemoryManagerClosedException;
import com.bigdata.util.Bytes;
import com.bigdata.util.BytesUtil;
import com.bigdata.util.InnerCause;

import cutthecrap.utils.striterators.Expander;
import cutthecrap.utils.striterators.ICloseableIterator;
import cutthecrap.utils.striterators.IStriterator;
import cutthecrap.utils.striterators.Resolver;
import cutthecrap.utils.striterators.SingleValueIterator;
import cutthecrap.utils.striterators.Striterator;
import cutthecrap.utils.striterators.Visitor;

/**
 * Utility methods to support hash index builds and hash index joins using a
 * scalable native memory data structures.
 * 
 * Vectoring and IV encoding
 * 
 * In order to provide efficient encoding and persistence of solutions on the
 * {@link HTree}, this class is written directly to the RDF data model. Rather
 * than POJO serialization, solutions are encoded as logical {@link IV}[]s in a
 * manner very similar to how we represent the keys of the statement indices.
 * 
 * Since this encoding does not persist the {@link IV#getValue() cache}, a
 * separate mapping must be maintained from {@link IV} to {@link BigdataValue}
 * for those {@link IV}s which have a materialized {@link BigdataValue}.
 * 
 * TODO Do a 64-bit hash version which could be used for hash indices having
 * more than 500M distinct join variable combinations. Note that at 500M
 * distinct join variable combinations we have a 1 in 4 chance of a hash
 * collision. Whether or not that turns into a cost really depends on the
 * cardinality of the solutions per distinct combination of the join variables.
 * If there is only one solution per join variable combination, then those
 * collisions will cause basically no increase in the work to be done. However,
 * if there are 50,000 solutions per distinct combination of the join variables
 * then we would be better off using a 64-bit hash code.
 * 
 * FIXME Vector resolution of ivCache. Various methods use
 * {@link IVBindingSetEncoderWithIVCache#resolveCachedValues(IBindingSet)}
 * 
 * @author Bryan Thompson
 * @version $Id: HTreeHashJoinUtility.java 5568 2011-11-07 19:39:12Z thompsonbry
 */
public class HTreeHashJoinUtility implements IHashJoinUtility {

    static private final transient Logger log = Logger
            .getLogger(HTreeHashJoinUtility.class);

    /**
     * Singleton {@link IHashJoinUtilityFactory} that can be used to create a 
     * new {@link HTreeHashJoinUtility}.
     */
    static public final IHashJoinUtilityFactory factory =
            new IHashJoinUtilityFactory() {

        private static final long serialVersionUID = 1L;

        public IHashJoinUtility create(//
                final BOpContext context,//
                final INamedSolutionSetRef namedSetRef,//
                final PipelineOp op,//
                final JoinTypeEnum joinType//
                ) {

            return new HTreeHashJoinUtility(
                    context.getMemoryManager(namedSetRef.getQueryId()),
                    op, joinType);

        }
    };

    /**
     * Note: If joinVars is an empty array, then the solutions will all hash to
     * ONE (1).
     */
    private static final int ONE = 1;
    
    /**
     * Return the hash code which will be used as the key given the ordered
     * as-bound values for the join variables.
     * 
     * @param joinVars
     *            The join variables.
     * @param bset
     *            The bindings whose as-bound hash code for the join variables
     *            will be computed.
     * @param ignoreUnboundVariables
     *            If a variable without a binding should be silently ignored.
     * 
     * @return The hash code.
     * 
     * @throws JoinVariableNotBoundException
     *             if there is no binding for a join variable.
     * 
     *             FIXME Does anything actually rely on the
     *             {@link JoinVariableNotBoundException}? It would seem that
     *             this exception could only be thrown if the joinvars[] was
     *             incorrectly formulated as it should only include
     *             "known bound" variables. (I think that this is related to
     *             incorrectly passing along empty solutions for named subquery
     *             hash joins.)
     */
    private static int hashCode(final IVariable[] joinVars,
            final IBindingSet bset, final boolean ignoreUnboundVariables)
            throws JoinVariableNotBoundException {

        int h = ONE;

        for (IVariable v : joinVars) {

            final IConstant c = bset.get(v);

            if (c == null) {

                if(ignoreUnboundVariables)
                    continue;
                
                // Reject any solution which does not have a binding for a join
                // variable.
                throw new JoinVariableNotBoundException(v.getName());
                
            }
            
            // Works Ok.
            h = 31 * h + c.hashCode();
            
            /*
             * TODO Martyn's version. Also works Ok. Compare rate of hash
             * collisions and impact on join performance. Also compare use of
             * 64-bit hash codes and impact on join performance (there should be
             * fewer hash collisions).
             */
            // @see http://burtleburtle.net/bob/hash/integer.html
//            
//            final int hc = c.hashCode();
//            h += ~(hc<<15);
//            h ^=  (hc>>10);
//            h +=  (hc<<3);
//            h ^=  (hc>>6);

        }
        
        if (log.isTraceEnabled())
            log.trace("hashCode=" + h + ", joinVars="
                    + Arrays.toString(joinVars) + " : " + bset);

        return h;

    }

    protected AtomicBoolean getOpen() {
        return open;
    }

    protected IVBindingSetEncoderWithIVCache getEncoder() {
        return encoder;
    }

    protected long getNoJoinVarsLimit() {
        return noJoinVarsLimit;
    }
    
    protected boolean getOutputDistintcJVs() {
        return outputDistinctJVs;
    }
    
    /**
     * true until the state is discarded by {@link #release()}.
     */
    private final AtomicBoolean open = new AtomicBoolean(true);
    
    /**
     * The operator whose annotations are used to initialize this object.
     * 

     * Note: This was added to support the DISTINCT FILTER in
     * {@link #outputSolutions(IBuffer)}.
     */
    private final PipelineOp op;
    
//    /**
//     * This basically controls the vectoring of the hash join.
//     */
//    private final int chunkSize = 1000;//ChunkedWrappedIterator.DEFAULT_CHUNK_SIZE;

    /**
     * Utility class for compactly and efficiently encoding and decoding
     * {@link IBindingSet}s.
     */
    private final IVBindingSetEncoderWithIVCache encoder;
    
    /**
     * The type of join to be performed.
     */
    private final JoinTypeEnum joinType;
    
    /**
     * true iff the join is OPTIONAL.
     */
    private final boolean optional;
    
    /**
     * true iff this is a DISTINCT filter.
     */
    private final boolean filter;
    
//    /**
//     * The operator which was used to construct the {@link IHashJoinUtility}
//     * state.
//     * 

//     * Note: This is NOT necessarily the operator which is currently executing.
//     * Hash indices are often built by one operator and then consumed by
//     * other(s).
//     */
//    private final PipelineOp op;
    
    /**
     * @see HashJoinAnnotations#ASK_VAR
     */
    private final IVariable askVar;
    
    /**
     * The join variables.
     */
    private final IVariable[] joinVars;
    
    /**
     * The variables to be retained (optional, all variables are retained if
     * not specified).
     */
    private final IVariable[] selectVars;

    /**
     * True if the hash join utility class is to output the distinct join
     * variables.
     */
    private boolean outputDistinctJVs = false;
    
    /**
     * The join constraints (optional).
     */
    private final IConstraint[] constraints;

    /**
     * The backing {@link IRawStore}.
     */
    private final IRawStore store;
    
    /**
     * The hash index. The keys are int32 hash codes built from the join
     * variables. The values are an {@link IV}[], similar to the encoding in
     * the statement indices. The mapping from the index positions in the
     * {@link IV}s to the variables is managed by the {@link #encoder}.
     */
    private final AtomicReference rightSolutions = new AtomicReference();

    /**
     * The set of distinct source solutions which joined. This set is maintained
     * iff the join is optional and is null otherwise.
     */
    private final AtomicReference joinSet = new AtomicReference();
    
    /**
     * The maximum #of (left,right) solution joins that will be considered
     * before failing the join. This is used IFF there are no join variables.
     * 
     * TODO HINTS: Annotation and query hint for this. Probably on
     * {@link HashJoinAnnotations}.
     */
    private final long noJoinVarsLimit = HashJoinAnnotations.DEFAULT_NO_JOIN_VARS_LIMIT;
    
    /**
     * The #of left solutions considered for a join.
     */
    protected final CAT nleftConsidered = new CAT();

    /**
     * The #of right solutions considered for a join.
     */
    protected final CAT nrightConsidered = new CAT();

    /**
     * The #of solution pairs considered for a join.
     */
    protected final CAT nJoinsConsidered = new CAT();
    
    /**
     * The hash index.
     */
    protected HTree getRightSolutions() {
        
        return rightSolutions.get();
        
    }
    
    /**
     * The set of distinct source solutions which joined. This set is
     * maintained iff the join is optional and is null
     * otherwise.
     */
    protected HTree getJoinSet() {

        return joinSet.get();
        
    }
    
    /**
     * Human readable representation of the {@link IHashJoinUtility} metadata
     * (but not the solutions themselves).
     */
    @Override
    public String toString() {

        final StringBuilder sb = new StringBuilder();
        
        sb.append(getClass().getSimpleName());
        
        sb.append("{open=" + open);
        sb.append(",joinType=" + joinType);
//        sb.append(",chunkSize=" + chunkSize);
//        sb.append(",optional=" + optional);
//        sb.append(",filter=" + filter);
        if (askVar != null)
            sb.append(",askVar=" + askVar);
        sb.append(",joinVars=" + Arrays.toString(joinVars));
        sb.append(",outputDistinctJVs=" + outputDistinctJVs);
        if (selectVars != null)
            sb.append(",selectVars=" + Arrays.toString(selectVars));
        if (constraints != null)
            sb.append(",constraints=" + Arrays.toString(constraints));
        sb.append(",size=" + getRightSolutionCount());
        sb.append(",considered(left=" + nleftConsidered + ",right="
                + nrightConsidered + ",joins=" + nJoinsConsidered + ")");
        if (joinSet.get() != null)
            sb.append(",joinSetSize=" + getJoinSetSize());
//        sb.append(",encoder="+encoder);
        sb.append("}");
        
        return sb.toString();
        
    }

    @Override
    public boolean isEmpty() {
        
        return getRightSolutionCount() == 0;
        
    }
    
    @Override
    public long getRightSolutionCount() {

        final HTree htree = getRightSolutions();

        if (htree != null) {

            return htree.getEntryCount();

        }

        return 0L;
        
    }

    protected long getJoinSetSize() {

        final HTree htree = getJoinSet();

        if (htree != null) {

            return htree.getEntryCount();

        }

        return 0L;
        
    }

    @Override
    public JoinTypeEnum getJoinType() {
        
        return joinType;
        
    }

    @Override
    public IVariable getAskVar() {
        
        return askVar;
        
    }

    @Override
    public IVariable[] getJoinVars() {

        return joinVars;
        
    }

    @Override
    public IVariable[] getSelectVars() {
        
        return selectVars;
        
    }
    
    @Override
    public boolean isOutputDistinctJoinVars() {
       return outputDistinctJVs;
    }


    @Override
    public IConstraint[] getConstraints() {
        
        return constraints;
        
    }
    
    /**
     * Setup the {@link IndexMetadata} for {@link #rightSolutions} or
     * {@link #joinSet}.
     */
    protected static HTreeIndexMetadata getIndexMetadata(final PipelineOp op) {

		final HTreeIndexMetadata metadata = new HTreeIndexMetadata(
				UUID.randomUUID());

        final int addressBits = op.getProperty(HTreeAnnotations.ADDRESS_BITS,
                HTreeAnnotations.DEFAULT_ADDRESS_BITS);

//        final int branchingFactor = 2 ^ addressBits;
        
        final int ratio = 32; // TODO Config/tune.
        
        metadata.setAddressBits(addressBits);

        metadata.setRawRecords(op.getProperty(//
                HTreeAnnotations.RAW_RECORDS,
                HTreeAnnotations.DEFAULT_RAW_RECORDS));

        metadata.setMaxRecLen(op.getProperty(//
                HTreeAnnotations.MAX_RECLEN,
                HTreeAnnotations.DEFAULT_MAX_RECLEN));

        metadata.setWriteRetentionQueueCapacity(op.getProperty(
                IndexAnnotations.WRITE_RETENTION_QUEUE_CAPACITY,
                IndexAnnotations.DEFAULT_WRITE_RETENTION_QUEUE_CAPACITY));

        metadata.setKeyLen(Bytes.SIZEOF_INT); // int32 hash code keys.

        @SuppressWarnings("rawtypes")
        final ITupleSerializer tupleSer = new DefaultTupleSerializer(
                new ASCIIKeyBuilderFactory(Bytes.SIZEOF_INT),
                // new FrontCodedRabaCoder(ratio),// keys : TODO Optimize for int32!
                new FrontCodedRabaCoderDupKeys(ratio),// keys : TODO Optimize for int32!
                new SimpleRabaCoder() // vals
        );

        metadata.setTupleSerializer(tupleSer);
        
        return metadata;

    }
    
    /**
     * 
     * @param mmgr
     *            The IMemoryManager which will back the named solution set.
     * @param op
     *            The operator whose annotation will inform construction the
     *            hash index. The {@link HTreeAnnotations} may be specified for
     *            this operator and will control the initialization of the
     *            various {@link HTree} instances.
     * @param joinType
     *            The type of join to be performed.
     * 
     * @see HTreeHashJoinAnnotations
     */
    public HTreeHashJoinUtility(final IMemoryManager mmgr, final PipelineOp op,
            final JoinTypeEnum joinType) {

        if (mmgr == null)
            throw new IllegalArgumentException();

        if (op == null)
            throw new IllegalArgumentException();

        if(joinType == null)
            throw new IllegalArgumentException();
        
        this.op = op;
        this.joinType = joinType;
        this.optional = joinType == JoinTypeEnum.Optional;
        this.filter = joinType == JoinTypeEnum.Filter;

        // Optional variable used for (NOT) EXISTS.
        this.askVar = (IVariable) op
                .getProperty(HashJoinAnnotations.ASK_VAR);

        // The join variables (required).
        this.joinVars = (IVariable[]) op
                .getRequiredProperty(HashJoinAnnotations.JOIN_VARS);

        // The projected variables (optional and equal to the join variables iff
        // this is a DISTINCT filter).
        this.outputDistinctJVs = 
           op.getProperty(
              HashIndexOp.Annotations.OUTPUT_DISTINCT_JVs, false);

        
        this.selectVars = filter ? joinVars : (IVariable[]) op
                .getProperty(JoinAnnotations.SELECT);

        /*
         * This wraps an efficient raw store interface around a child memory
         * manager created from the IMemoryManager which will back the named
         * solution set.
         */
        store = new MemStore(mmgr.createAllocationContext());

        // Setup the encoder.  The ivCache will be backed by the memory manager.
        this.encoder = new IVBindingSetEncoderWithIVCache(store, filter, op);

        /*
         * Note: This is not necessary. We will encounter the join variables in
         * the solutions are they are processed and they will automatically
         * become part of the schema maintained by the encoder.
         */
//        // Initialize the schema with the join variables.
//        encoder.updateSchema(joinVars);

        // The join constraints (optional).
        this.constraints = (IConstraint[]) op
                .getProperty(JoinAnnotations.CONSTRAINTS);
        
        // Will support incremental eviction and persistence.
        rightSolutions.set(HTree.create(store, getIndexMetadata(op)));

        switch (joinType) {
        case Optional:
        case Exists:
        case NotExists:
            // The join set is used to handle optionals.
            joinSet.set(HTree.create(store, getIndexMetadata(op)));
            break;
        }

    }

    /**
     * The backing {@link IRawStore}.
     */
    public IRawStore getStore() {
    
        return store;
        
    }
    
    /**
     * {@inheritDoc}
     * 

     * This implementation checkpoints the {@link HTree} instance(s) used to
     * buffer the source solutions ({@link #rightSolutions} and {@link #ivCache}
     * ) and then re-load the them in a read-only mode from their checkpoint(s).
     * This exposes a view of the {@link HTree} which is safe for concurrent
     * readers.
     */
    @Override
    public void saveSolutionSet() {

        if (!open.get())
            throw new IllegalStateException();

        checkpointHTree(rightSolutions);

        encoder.saveSolutionSet();

        /*
         * Note: DO NOT checkpoint the joinSet here. That index is not even
         * written upon until we begin to evaluate the joins, which happens
         * after we checkpoint the source solutions.
         */

    }

//    /**
//     * Checkpoint the join set (used to buffer the optional solutions).
//     * 

//     * Note: Since we always output the solutions which did not join from a
//     * single thread as part of last pass evaluation there is no need to
//     * checkpoint the {@link #joinSet}.
//     */
//    public void checkpointJoinSet() {
//
//        if (!open.get())
//            throw new IllegalStateException();
//
//        checkpointHTree(joinSet);
//
//    }

    private void checkpointHTree(final AtomicReference ref) {

        final HTree tmp = ref.get();

        if (tmp != null) {

            // Checkpoint the HTree.
            final Checkpoint checkpoint = tmp.writeCheckpoint2();

            if (log.isInfoEnabled())
                log.info(checkpoint.toString());
            
            final HTree readOnly = HTree.load(store,
                    checkpoint.getCheckpointAddr(), true/* readOnly */);

            // Get a read-only view of the HTree.
            if (!ref.compareAndSet(tmp/* expect */, readOnly)) {

                throw new IllegalStateException();

            }

        }

    }

    @Override
    public void release() {

        if (open.compareAndSet(true/* expect */, false/* update */)) {
            // Already closed.
            return;
        }

        encoder.release();
        
        HTree tmp = rightSolutions.getAndSet(null/* newValue */);

        if (tmp != null) {

            tmp.close();

        }

        tmp = joinSet.getAndSet(null/* newValue */);
        
        if (tmp != null) {

            tmp.close();

        }

        store.close();

    }

    @Override
    public long acceptSolutions(final ICloseableIterator itr,
            final BOpStats stats) {

        if (!open.get())
            throw new IllegalStateException();
        
        if (itr == null)
            throw new IllegalArgumentException();
        
        if (stats == null)
            throw new IllegalArgumentException();

        try {

            long naccepted = 0L;

            final HTree htree = getRightSolutions();

            final IKeyBuilder keyBuilder = htree.getIndexMetadata()
                    .getKeyBuilder();

            // Note: We no longer re-chunk here.
            final ICloseableIterator it = itr;
            
            try {
                
                final AtomicInteger vectorSize = new AtomicInteger();
                
                while (it.hasNext()) {
    
                    // Vector a chunk of solutions.
                    final BS[] a = vector(it.next(), joinVars,
                            null/* selectVars */,
                            false/* ignoreUnboundVariables */, vectorSize);
    
                    final int n = vectorSize.get();
    
                    stats.chunksIn.increment();
                    stats.unitsIn.add(a.length);
    
                    // Insert solutions into HTree in key order.
                    for (int i = 0; i < n; i++) {
    
                        final BS tmp = a[i];
    
                        // Encode the key.
                        final byte[] key = keyBuilder.reset().append(tmp.hashCode)
                                .getKey();
    
                        // Encode the solution.
                        final byte[] val = encoder.encodeSolution(tmp.bset);
//log.warn("insert: key="+BytesUtil.toString(key));
                        // Insert binding set under hash code for that key.
                        htree.insert(key, val);
    
                    }
    
                    naccepted += a.length;
    
                    // Vectored update of the IV Cache.
    //                encoder.updateIVCache(cache);
                    encoder.flush();
    
                }

            } finally {
            
                it.close();
                
            }

            if (log.isInfoEnabled())
                log.info("naccepted=" + naccepted + ", nright="
                        + htree.getEntryCount());
            
            return naccepted;

        } catch(Throwable t) {
            throw launderThrowable(t);
        }

    }

    @Override
    public long filterSolutions(final ICloseableIterator itr,
            final BOpStats stats, final IBuffer sink) {

        if (itr == null)
            throw new IllegalArgumentException();
        
        if (stats == null)
            throw new IllegalArgumentException();

        try {
        
        long naccepted = 0L;
        
        final HTree htree = getRightSolutions();

        final IKeyBuilder keyBuilder = htree.getIndexMetadata().getKeyBuilder();

        // Note: We no longer rechunk here.
        final Iterator it = itr;

        final AtomicInteger vectorSize = new AtomicInteger();
        while (it.hasNext()) {

            // Vector a chunk of solutions.
            final BS[] a = vector(it.next(), joinVars, selectVars,
                    true/* ignoreUnboundVariables */, vectorSize);
  
            final int n = vectorSize.get();

            stats.chunksIn.increment();
            stats.unitsIn.add(a.length);

            for (int i = 0; i < n; i++) {

                final BS tmp = a[i];
                
                // Encode the key.
                final byte[] key = keyBuilder.reset().append(tmp.hashCode)
                        .getKey();

                /*
                 * Encode the solution. Do not update the cache since we are
                 * only encoding so we can probe the hash index.
                 */
                final byte[] val = encoder
                        .encodeSolution(tmp.bset, false/* updateCache */);

                /*
                 * Search the hash index for a match.
                 * 
                 * TODO VECTOR: This does not take explicit advantage of the
                 * fact that different source solutions will fall into the
                 * same hash bucket in the HTree. The solutions are ordered
                 * by hashCode by vector() above, but we are using one lookupAll()
                 * invocation per source solution here rather than recognizing that
                 * multiple source solutions will hit the same hash bucket.
                 */
                boolean found = false;
            
                final ITupleIterator titr = htree.lookupAll(key);

                while (titr.hasNext()) {

                    final ITuple t = titr.next();
                    
                    final ByteArrayBuffer tb = t.getValueBuffer();

                    if (0 == BytesUtil.compareBytesWithLenAndOffset(
                            0/* aoff */, val.length/* alen */, val,//
                            0/* boff */, tb.limit()/* blen */, tb.array()/* b */
                    )) {

                        found = true;

                        break;
                        
                    }
                    
                }

                if (!found) {
                
                    // Add to the hash index.
                    htree.insert(key, val);

                    // Write onto the sink.
                    sink.add(tmp.bset);

                    naccepted++;
                    
                }
                
            }

            // Note: The IV=>Value cache is NOT maintained for DISTINCT.
//            encoder.flush();
//            updateIVCache(cache, ivCache.get());

        }
        
        return naccepted;

        } catch(Throwable t) {
            throw launderThrowable(t);
        }

    }
    
    /**
     * Decode a solution from an encoded {@link IV}[].
     * 

     * Note: The {@link IVCache} associated are NOT resolved by this method. The
     * resolution step is relatively expensive since it must do lookups in
     * persistence capable data structures. The caller MUST use
     * {@link IBindingSetDecoder#resolveCachedValues(IBindingSet)} to resolve
     * the {@link IVCache} associations once they decide that the decoded
     * solution can join.
     * 

     * Note: This instance method is required by the MERGE JOIN logic which
     * associates the schema with the first {@link IHashJoinUtility} instance.
     * 
     * @param t
     *            A tuple whose value is an encoded {@link IV}[].
     * 
     * @return The decoded {@link IBindingSet}.
     */
    protected IBindingSet decodeSolution(final ITuple t) {
        
        final ByteArrayBuffer b = t.getValueBuffer();

        return encoder
                .decodeSolution(b.array(), 0, b.limit(), false/* resolveCachedValues */);
        
    }

    /**
     * Glue class for hash code and binding set used when the hash code is for
     * just the join variables rather than the entire binding set.
     */
    public static class BS implements Comparable {

        final int hashCode;

        final IBindingSet bset;

        BS(final int hashCode, final IBindingSet bset) {
            this.hashCode = hashCode;
            this.bset = bset;
        }

        @Override
        public int compareTo(final BS o) {
            if (this.hashCode < o.hashCode)
                return -1;
            if (this.hashCode > o.hashCode)
                return 1;
            return 0;
        }
        
        @Override
        public String toString() {
            return getClass().getName() + "{hashCode=" + hashCode + ",bset="
                    + bset + "}";
        }
        
    }
    
    /**
     * Glue class for hash code and encoded binding set used when we already
     * have the binding set encoded.
     */
    static class BS2 implements Comparable {

        final int hashCode;

        final byte[] value;

        BS2(final int hashCode, final byte[] value) {
            this.hashCode = hashCode;
            this.value = value;
        }

        @Override
        public int compareTo(final BS2 o) {
            if (this.hashCode < o.hashCode)
                return -1;
            if (this.hashCode > o.hashCode)
                return 1;
            return 0;
        }
        
        @Override
        public String toString() {
            return getClass().getName() + "{hashCode=" + hashCode + ",value="
                    + BytesUtil.toString(value) + "}";
        }
        
    }
    
    @Override
    public void hashJoin(//
            final ICloseableIterator leftItr,//
            final BOpStats stats,
            final IBuffer outputBuffer//
            ) {

        hashJoin2(leftItr, stats, outputBuffer, constraints);
        
    }

    /*
     * The hash join is vectored. We compute the hashCode for each source
     * solution from the leftItr and then sort those left solutions. This gives
     * us an ordered progression through the hash buckets for the HTree.
     * Further, since we know that any left solution having the same hash code
     * will read on the same hash bucket, we probe that hash bucket once for all
     * left solutions that hash into the same bucket.
     */
    @Override
    public void hashJoin2(//
            final ICloseableIterator leftItr,//
            final BOpStats stats,//
            final IBuffer outputBuffer,//
            final IConstraint[] constraints//
            ) {
        
        if (!open.get())
            throw new IllegalStateException();


        // Note: We no longer rechunk in this method.
        final Iterator it;
        it = leftItr;// incremental.
        /*
         * Note: This forces all source chunks into a single chunk. This could
         * improve vectoring, but was really added for debugging.
         */
//        it = new SingletonIterator(BOpUtility.toArray(leftItr, null/*stats*/)); 

        try {

            final HTree rightSolutions = this.getRightSolutions();

            if (log.isInfoEnabled()) {
                log.info("rightSolutions: #nnodes="
                        + rightSolutions.getNodeCount() + ",#leaves="
                        + rightSolutions.getLeafCount() + ",#entries="
                        + rightSolutions.getEntryCount());
            }
            
            final IKeyBuilder keyBuilder = rightSolutions.getIndexMetadata()
                    .getKeyBuilder();

            // true iff there are no join variables.
            final boolean noJoinVars = joinVars.length == 0;
            
            final AtomicInteger vectorSize = new AtomicInteger();
            
            while (it.hasNext()) {

                final BS[] a; // vectored solutions.
                final int n; // #of valid elements in a[].
                {
                    // Next chunk of solutions from left.
                    final IBindingSet[] b = it.next();
                    if (stats != null) {
                        stats.chunksIn.increment();
                        stats.unitsIn.add(b.length);
                    }

                    // Vector a chunk of solutions, ordering by hashCode.
                    a = vector(b, joinVars, null/* selectVars */,
                            false/* ignoreUnboundVariables */, vectorSize);

                    // The size of that vector.
                    n = vectorSize.get();
                    
                    nleftConsidered.add(n);

                }
                
                int fromIndex = 0;

                while (fromIndex < n) {

                    /*
                     * Figure out how many left solutions in the current chunk
                     * have the same hash code. We will use the same iterator
                     * over the right solutions for that hash code against the
                     * HTree.
                     */
                    
                    // The next hash code to be processed.
                    final int hashCode = a[fromIndex].hashCode;

                    // scan for the first hash code which is different.
                    int toIndex = n; // assume upper bound.
                    for (int i = fromIndex + 1; i < n; i++) {
                        if (a[i].hashCode != hashCode) {
                            toIndex = i;
                            break;
                        }
                    }
                    // #of left solutions having the same hash code.
                    final int bucketSize = toIndex - fromIndex;

                    if (log.isTraceEnabled())
                        log.trace("hashCode=" + hashCode + ": #left="
                                + bucketSize + ", vectorSize=" + n
                                + ", firstLeft=" + a[fromIndex]);

                    /*
                     * Note: all source solutions in [fromIndex:toIndex) have
                     * the same hash code. They will be vectored together.
                     */
                    // All solutions which join for that collision bucket
                    final LinkedList joined;
                    switch (joinType) {
                    case Optional:
                    case Exists:
                    case NotExists:
                        joined = new LinkedList();
                        break;
                    default:
                        joined = null;
                        break;
                    }
                    // #of solutions which join for that collision bucket.
                    int njoined = 0;
                    // #of solutions which did not join for that collision bucket.
                    int nrejected = 0;
                    {

                        final byte[] key = keyBuilder.reset().append(hashCode)
                                .getKey();
                        
                        /**
                         * Visit all source solutions having the same hash code.
                         * 
                         * @see 
                         *      Stochastic results with Analytic Query Mode)
                         *      
                         * 
                         *      FIXME This appears to be the crux of the problem
                         *      for #764. If you replace lookupAll(key) with
                         *      rangeIterator() then the hash join is correct.
                         *      Of course, it is also scanning all tuples each
                         *      time so it is very inefficient. The root cause
                         *      is the FrontCodedRabaCoder. It is doing a binary
                         *      search on the BucketPage. However, the
                         *      FrontCodedRabaCoder was not developed to deal
                         *      with duplicates on the page. Therefore it is
                         *      returning an offset into the middle of a run of
                         *      duplicate keys when it does its binary search.
                         *      We will either need to modify this IRabaCoder to
                         *      handle this case (where duplicate keys are
                         *      allowed) or write a new IRabaCoder that is smart
                         *      about duplicates.
                         */
                        final ITupleIterator titr;
                        if (true) {// scan just the hash bucket for that key.
//log.warn(" probe: key="+BytesUtil.toString(key));
                            titr = rightSolutions.lookupAll(key);
                        } else { // do a full scan on the HTree. 
                            titr = rightSolutions.rangeIterator();
                        }

                        long sameHashCodeCount = 0;
                        
                        while (titr.hasNext()) {

                            sameHashCodeCount++;
                            
                            final ITuple t = titr.next();

                            /*
                             * Note: The map entries must be the full source
                             * binding set, not just the join variables, even
                             * though the key and equality in the key is defined
                             * in terms of just the join variables.
                             * 
                             * Note: Solutions which have the same hash code but
                             * whose bindings are inconsistent will be rejected
                             * by bind() below.
                             */
                            final IBindingSet rightSolution = decodeSolution(t);

                            nrightConsidered.increment();

                            for (int i = fromIndex; i < toIndex; i++) {

                                final IBindingSet leftSolution = a[i].bset;
                                
                                // Join.
                                final IBindingSet outSolution = BOpContext
                                        .bind(leftSolution, rightSolution,
                                                constraints,
                                                selectVars);

                                nJoinsConsidered.increment();

                                if (noJoinVars
                                        && nJoinsConsidered.get() == noJoinVarsLimit) {

                                    if (nleftConsidered.get() > 1
                                            && nrightConsidered.get() > 1) {

                                        throw new UnconstrainedJoinException();

                                    }

                                }

                                if (outSolution == null) {
                                    
                                    nrejected++;
                                    
                                    if (log.isTraceEnabled())
                                        log.trace("Does not join"//
                                                +": hashCode="+ hashCode//
                                                + ", sameHashCodeCount="+ sameHashCodeCount//
                                                + ", #left=" + bucketSize//
                                                + ", #joined=" + njoined//
                                                + ", #rejected=" + nrejected//
                                                + ", left=" + leftSolution//
                                                + ", right=" + rightSolution//
                                                );

                                } else {

                                    njoined++;

                                    if (log.isDebugEnabled())
                                        log.debug("JOIN"//
                                            + ": hashCode=" + hashCode//
                                            + ", sameHashCodeCount="+ sameHashCodeCount//
                                            + ", #left="+ bucketSize//
                                            + ", #joined=" + njoined//
                                            + ", #rejected=" + nrejected//
                                            + ", solution=" + outSolution//
                                            );
                                
                                }

                                switch(joinType) {
                                case Normal:
                                case Optional: {
                                    if (outSolution == null) {
                                        // Join failed.
                                        continue;
                                    }
                                    // Resolve against ivCache.
                                    encoder.resolveCachedValues(outSolution);
                                    
                                    // Output this solution.
                                    outputBuffer.add(outSolution);

                                    if (optional) {
                                        // Accumulate solutions to vector into
                                        // the joinSet.
                                        joined.add(new BS2(rightSolution
                                                .hashCode(), t.getValue()));
                                    }
                                    
                                    break;
                                }
                                case Exists: {
                                    /*
                                     * The right solution is output iff there is
                                     * at least one left solution which joins
                                     * with that right solution. Each right
                                     * solution is output at most one time. This
                                     * amounts to outputting the joinSet after
                                     * we have run the entire join. As long as
                                     * the joinSet does not allow duplicates it
                                     * will be contain the solutions that we
                                     * want.
                                     */
                                    if (outSolution != null) {
                                        // Accumulate solutions to vector into
                                        // the joinSet.
                                        joined.add(new BS2(rightSolution
                                                .hashCode(), t.getValue()));
                                    }
                                    break;
                                }
                                case NotExists: {
                                    /*
                                     * The right solution is output iff there
                                     * does not exist any left solution which
                                     * joins with that right solution. This
                                     * basically an optional join where the
                                     * solutions which join are not output.
                                     */
                                    if (outSolution != null) {
                                        // Accumulate solutions to vector into
                                        // the joinSet.
                                        joined.add(new BS2(rightSolution
                                                .hashCode(), t.getValue()));
                                    }
                                    break;
                                }
                                default:
                                    throw new AssertionError();
                                }

                            } // next left in the same bucket.

                        } // next rightSolution with the same hash code.

                        if (joined != null && !joined.isEmpty()) {
                            /*
                             * Vector the inserts into the [joinSet].
                             */
                            final BS2[] a2 = joined.toArray(new BS2[njoined]);
                            Arrays.sort(a2, 0, njoined);
                            for (int i = 0; i < njoined; i++) {
                                final BS2 tmp = a2[i];
                                saveInJoinSet(tmp.hashCode, tmp.value);
                            }
                        }

                    } // end block of leftSolutions having the same hash code.

                    fromIndex = toIndex;
                    
                } // next slice of source solutions with the same hash code.

            } // while(itr.hasNext()

            if (log.isInfoEnabled())
                log.info("done: " + toString());

        } catch(Throwable t) {

            throw launderThrowable(t);
            
        } finally {

            leftItr.close();

        }

    } // handleJoin

    /**
     * Vector a chunk of solutions.
     * 
     * @param leftSolutions
     *            The solutions.
     * @param joinVars
     *            The variables on which the hash code will be computed.
     * @param selectVars
     *            When non-null, all other variables are dropped.
     *            (This is used when we are modeling a DISTINCT solutions filter
     *            since we need to drop anything which is not part of the
     *            DISTINCT variables list.)
     * @param ignoreUnboundVariables
     *            When true, an unbound variable will not cause a
     *            {@link JoinVariableNotBoundException} to be thrown.
     * @param vectorSize
     *            The vector size (set by side-effect). This will be LTE the
     *            number of solutions in leftSolutions. (If some
     *            solutions are eliminated because they lack a binding for a
     *            required join variable, then vectorSize is LT the number of
     *            leftSolutions).
     * 
     * @return The vectored chunk of solutions ordered by hash code.
     */
    protected BS[] vector(final IBindingSet[] leftSolutions,
            final IVariable[] joinVars,
            final IVariable[] selectVars,
            final boolean ignoreUnboundVariables,
            final AtomicInteger vectorSize) {

        final BS[] a = new BS[leftSolutions.length];

        int n = 0; // The #of non-dropped source solutions.
        int ndropped = 0; // The #of dropped solutions.
        
        for (int i = 0; i < a.length; i++) {

            /*
             * Note: If this is a DISINCT FILTER, then we need to drop the
             * variables which are not being considered immediately. Those
             * variables MUST NOT participate in the computed hash code.
             */

            final IBindingSet bset = selectVars == null ? leftSolutions[i]
                    : leftSolutions[i].copy(selectVars);

            // Compute hash code from bindings on the join vars.
            int hashCode = ONE;
            try {

                hashCode = HTreeHashJoinUtility.hashCode(joinVars,
                        bset, ignoreUnboundVariables);
                
            } catch (JoinVariableNotBoundException ex) {

                if (!optional) {// Drop solution
                
                    if (log.isTraceEnabled())
                        log.trace(ex);

                    ndropped++;
                    
                    continue;
                    
                }
                
            }
            
            a[n++] = new BS(hashCode, bset);
            
        }

        /*
         * Sort by the computed hash code. This not only orders the accesses
         * into the HTree but it also allows us to handle all source solutions
         * which have the same hash code with a single scan of the appropriate
         * collision bucket in the HTree.
         */
        Arrays.sort(a, 0, n);
        
        // Indicate the actual vector size to the caller via a side-effect.
        vectorSize.set(n);

        if (log.isTraceEnabled())
            log.trace("Vectoring chunk for HTree locality: naccepted=" + n
                    + ", ndropped=" + ndropped);

        return a;
        
    }

    /**
     * Add to 2nd hash tree of all solutions which join.
     * 

     * Note: the hash key is based on the entire solution (not just the join
     * variables). The values are the full encoded {@link IBindingSet}.
     */
    protected void saveInJoinSet(final int joinSetHashCode, final byte[] val) {

        final HTree joinSet = this.getJoinSet();

        if (true) {
            
            /*
             * Do not insert if there is already an entry for that solution in
             * the join set.
             * 
             * Note: EXISTS depends on this to have the correct cardinality. If
             * EXISTS allows duplicate solutions into the join set then having
             * multiple left solutions which satisfy the EXISTS filter will
             * cause multiple copies of the right solution to be output! If you
             * change the joinSet to allow duplicates, then it MUST NOT allow
             * them for EXISTS!
             */

            final IKeyBuilder keyBuilder = joinSet.getIndexMetadata()
                    .getKeyBuilder();

            final byte[] key = keyBuilder.reset().append(joinSetHashCode)
                    .getKey();

            // visit all joinSet solutions having the same hash code
            final ITupleIterator xitr = joinSet.lookupAll(key);

            while (xitr.hasNext()) {

                final ITuple xt = xitr.next();

                final ByteArrayBuffer b = xt.getValueBuffer();

                if (0 == BytesUtil.compareBytesWithLenAndOffset(0/* aoff */,
                        val.length/* alen */, val/* a */, 0/* boff */,
                        b.limit()/* blen */, b.array())) {

                    return;

                }

            }
            
        }

        joinSet.insert(joinSetHashCode, val);

    }
    
    @Override
    public void outputOptionals(final IBuffer outputBuffer) {

        if (!open.get())
            throw new IllegalStateException();

        try {

            @SuppressWarnings({ "rawtypes", "unchecked" })
            final Constant f = askVar == null ? null : new Constant(
                    XSDBooleanIV.FALSE);

            if (log.isInfoEnabled()) {
                final HTree htree = this.getRightSolutions();
                log.info("rightSolutions: #nnodes=" + htree.getNodeCount()
                        + ",#leaves=" + htree.getLeafCount() + ",#entries="
                        + htree.getEntryCount());

                final HTree joinSet = this.getJoinSet();
                log.info("joinSet: #nnodes=" + joinSet.getNodeCount()
                        + ",#leaves=" + joinSet.getLeafCount() + ",#entries="
                        + joinSet.getEntryCount());
            }

            final HTree joinSet = getJoinSet();

            final IKeyBuilder keyBuilder = joinSet.getIndexMetadata()
                    .getKeyBuilder();

            // Visit all source solutions.
            final ITupleIterator sitr = getRightSolutions().rangeIterator();

            while (sitr.hasNext()) {

                final ITuple t = sitr.next();

                final ByteArrayBuffer tb = t.getValueBuffer();

                /*
                 * Note: This MUST be treated as effectively immutable since we
                 * may have to output multiple solutions for each rightSolution.
                 * Those output solutions MUST NOT side-effect [rightSolutions].
                 */
                final IBindingSet rightSolution = decodeSolution(t);

                // The hash code is based on the entire solution for the
                // joinSet.
                final int hashCode = rightSolution.hashCode();

                final byte[] key = keyBuilder.reset().append(hashCode).getKey();

                // Probe the join set for this source solution.
                final ITupleIterator jitr = joinSet.lookupAll(key);

                boolean found = false;
                while (jitr.hasNext()) {

                    // Note: Compare full solutions, not just the hash code!

                    final ITuple xt = jitr.next();

                    final ByteArrayBuffer xb = xt.getValueBuffer();

                    if (0 == BytesUtil.compareBytesWithLenAndOffset(
                            0/* aoff */, tb.limit()/* alen */,
                            tb.array()/* a */, 0/* boff */,
                            xb.limit()/* blen */, xb.array())) {

                        found = true;

                        break;

                    }

                }

                if (!found) {

                    /*
                     * Since the source solution is not in the join set, output
                     * it as an optional solution.
                     */

                    IBindingSet bs = rightSolution;

                    if (selectVars != null) {

                        // Drop variables which are not projected.
                        bs = bs.copy(selectVars);

                    }

                    encoder.resolveCachedValues(bs);

                    if (f != null) {

                        if (bs == rightSolution)
                            bs = rightSolution.clone();

                        bs.set(askVar, f);

                    }

                    outputBuffer.add(bs);

                }

            }

        } catch (Throwable t) {

            throw launderThrowable(t);
        }
        
    } // outputOptionals.

    @SuppressWarnings("unchecked")
    @Override
    public ICloseableIterator indexScan() {

        final HTree rightSolutions = getRightSolutions();

        if (log.isInfoEnabled()) {
            log.info("rightSolutions: #nnodes="
                    + rightSolutions.getNodeCount() + ",#leaves="
                    + rightSolutions.getLeafCount() + ",#entries="
                    + rightSolutions.getEntryCount());
        }

        // source.
        final ITupleIterator solutionsIterator = rightSolutions
                .rangeIterator();

        IStriterator itr = new Striterator(solutionsIterator);

        /**
         * Add resolution step.
         */
        itr = itr.addFilter(new Resolver(){

            private static final long serialVersionUID = 1L;

            @Override
            protected Object resolve(Object obj) {
                
                final ITuple t = ((ITuple) obj);
                
                // Decode the solution.
                IBindingSet bset = decodeSolution(t);

//                if (selectVars != null) {
//
//                    // Drop variables which are not projected.
//                    bset = bset.copy(selectVars);
//
//                }

                // Resolve ivCache associations.
                encoder.resolveCachedValues(bset);

                return bset;
                
            }
            
        });
        
        return (ICloseableIterator) itr;
        
    }

    @Override
    public void outputSolutions(final IBuffer out) {

        if (!open.get())
            throw new IllegalStateException();

       try {

          final HTree rightSolutions = getRightSolutions();

           if (log.isInfoEnabled()) {
               log.info("rightSolutions: #nnodes="
                       + rightSolutions.getNodeCount() + ",#leaves="
                       + rightSolutions.getLeafCount() + ",#entries="
                       + rightSolutions.getEntryCount());
           }
           
           /*
            * Used to impose distinct JV on solutions having the same hash
            * code. Together with lastHashCode, used to decide when we enter a
            * new hash bucket.
            */
           HashSet distinctSet = null;
           int lastHashCode = -1;
           
           // source.
           final ITupleIterator solutionsIterator = rightSolutions
                   .rangeIterator();

           while (solutionsIterator.hasNext()) {

               final ITuple t = solutionsIterator.next();
               
               IBindingSet bset = decodeSolution(t);

               if (outputDistinctJVs) {

                   // Drop any bindings that are not in the join variables.
                   bset = bset.copy(joinVars);

                   final int newHashCode = 
                      hashCode(joinVars, bset, true/* ignoreUnboundVariables */);

                   final boolean newBucket = distinctSet == null
                           || newHashCode != lastHashCode;

                   if (newBucket) {
                       // New bucket? New DISTINCT set.
                       // TODO This is not on the native heap. But it only
                       // handles a single bucket. Still, it is possible for
                       // a bucket to get very large.
                       distinctSet = 
                          outputDistinctJVs ? new HashSet() : null;
                       lastHashCode = newHashCode;
                   }

                   if (!distinctSet.add(bset)) {
                       // Duplicate solution on JVs within current bucket.
                       continue;
                   }
               
//               if (distinctFilter != null) {
//
//                   if ((bset = distinctFilter.accept(bset)) == null) {
//
//                       // Drop duplicate solutions.
//                       continue;
//
//                   }

               } else if (selectVars != null) {

                   /*
                    * FIXME We should be using projectedInVars here since
                    * outputSolutions() is used to stream solutions into
                    * the child join group (at least for some kinds of
                    * joins, but there might be exceptions for joining with
                    * a named solution set).
                    */

                   // Drop variables which are not projected.
                   bset = bset.copy(selectVars);

               }

               encoder.resolveCachedValues(bset);

               out.add(bset);

           }

       } catch (Throwable t) {

           throw launderThrowable(t);

       }

    } // outputSolutions
    
    @Override
    public void outputJoinSet(final IBuffer out) {

        try {

            @SuppressWarnings({ "rawtypes", "unchecked" })
            final Constant t = askVar == null ? null : new Constant(
                    XSDBooleanIV.TRUE);
            
            final HTree joinSet = getJoinSet();

            if (log.isInfoEnabled()) {
                log.info("joinSet: #nnodes="
                        + joinSet.getNodeCount() + ",#leaves="
                        + joinSet.getLeafCount() + ",#entries="
                        + joinSet.getEntryCount());
            }

            // source.
            final ITupleIterator solutionsIterator = joinSet
                    .rangeIterator();

            while (solutionsIterator.hasNext()) {

                IBindingSet bset = decodeSolution(solutionsIterator.next());

                if (selectVars != null) {

                    // Drop variables which are not projected.
                    bset = bset.copy(selectVars);

                }

                if (t != null) {

                    if (selectVars == null)
                        bset = bset.clone();

                    bset.set(askVar, t);

                }

                encoder.resolveCachedValues(bset);

                out.add(bset);

            }

        } catch (Throwable t) {

            throw launderThrowable(t);

        }

    } // outputJoinSet

    /**
     * {@inheritDoc}
     * 
     * Note: For the {@link HTree}, the entries are in key order. Those keys are
     * hash codes computed from the solutions using the join variables. Since
     * the keys are hash codes and not the join variable bindings, each hash
     * code identifies a collision bucket from the perspective of the merge join
     * algorithm. Of course, from the perspective of the {@link HTree} those
     * solutions are just consequective tuples readily identified using
     * {@link HTree#lookupAll(int)}.
     * 
     * FIXME Either always project everything or raise [select] into a parameter
     * for this method. We DO NOT want to only project whatever was projected by
     * the first source.
     */
    @Override
    public void mergeJoin(
			//
			final IHashJoinUtility[] others,
			final IBuffer outputBuffer,
			final IConstraint[] constraints, final boolean optional) {

        try {
        
		/*
		 * Validate arguments.
		 */

		if (others == null)
			throw new IllegalArgumentException();

		if (others.length == 0)
			throw new IllegalArgumentException();

		if (outputBuffer == null)
			throw new IllegalArgumentException();
		
		final HTreeHashJoinUtility[] all = new HTreeHashJoinUtility[others.length + 1];
		{
			all[0] = this;
			for (int i = 0; i < others.length; i++) {
				final HTreeHashJoinUtility o = (HTreeHashJoinUtility) others[i];
				if (o == null)
					throw new IllegalArgumentException();
                if (!Arrays.equals(joinVars, o.joinVars)) {
					// Must have the same join variables.
					throw new IllegalArgumentException();
				}
				all[i + 1] = o;
			}

		}

		if (isEmpty()) {
			// NOP
			return;
		}

		/*
		 * Combine constraints for each source with the given constraints.
		 */
		final IConstraint[] c = JVMHashJoinUtility.combineConstraints(
				constraints, all);

		/*
		 * MERGE JOIN
		 * 
		 * We follow the iterator on the first source. For each hash code which
		 * it visits, we synchronize iterators against the remaining sources. If
		 * the join is optional, then the iterator will be null for a source
		 * which does not have that hash code. Otherwise false is returned if
		 * any source lacks tuples for the current hash code.
		 */
		long njoined = 0, nrejected = 0;
		{

            // if optional then if there are no solutions don't try and
            // expand further, we need a place-holder object
            final Object NULL_VALUE = "NULL";

            final int nsources = all.length;
            
            final ITuple[] set = new ITuple[nsources + 1];

            // Visit everything in the first source.
			final Striterator sols0 = new Striterator(all[0].getRightSolutions()
					.rangeIterator());
			{

			    sols0.addFilter(new Visitor() {
                    private static final long serialVersionUID = 1L;
                    /**
                     * Set the tuple for the first source each time it advances.
                     * 
                     * @param obj
                     *            The tuple.
                     */
                    @Override
                    protected void visit(final Object obj) {
                        set[0] = (ITuple) obj;
                    }

                });

                // now add in Expanders and Visitors for each remaining source.
                for (int i = 1; i < nsources; i++) {
                    // Final variables used inside inner classes.
                    final int slot = i;
                    final HTree thisTree = all[slot].getRightSolutions();

					sols0.addFilter(new Expander() {
	                    private static final long serialVersionUID = 1L;
                        /**
                         * Expansion pattern gives solutions for source @ slot.
                         * 
                         * @param obj
                         *            The tuple in set[slot-1].
                         */
						@Override
						protected Iterator expand(final Object obj) {
							if (obj == NULL_VALUE) {
								assert optional;
								return new SingleValueIterator(NULL_VALUE);
							}
							// Sync itr for this source to key for prior src.
							final byte[] key2 = ((ITuple) obj).getKey();
							final ITupleIterator ret = thisTree.lookupAll(key2);
							if (optional && !ret.hasNext()) {
                                /*
                                 * Nothing for that key from this source. Return
                                 * a single marker value so we can proceed to
                                 * the remaining sources rather than halting.
                                 */
								return new SingleValueIterator(NULL_VALUE);
							} else {
                                /*
                                 * Iterator visiting solutions from this source
                                 * for the current key in the prior source.
                                 */
								return ret;
							}
						}

					});

					sols0.addFilter(new Visitor() {
	                    private static final long serialVersionUID = 1L;
                        /**
                         * Assign tuple to set[slot].
                         * 
                         * Note: If [obj==NULL_VALUE] then no solutions for that
                         * slot.
                         */
						@Override
						protected void visit(final Object obj) {
							set[slot] = (ITuple) (obj == NULL_VALUE ? null : obj);
						}

					});
			    }
			} // end of striterator setup.

            /*
             * This will visit after all expansions. That means that we will
             * observe the cross product of the solutions from the remaining
             * sources having the same hash for each from the first source.
             * 
             * Each time we visit something, set[] is the tuple[] which
             * describes a specific set of solutions from that cross product.
             * 
             * TODO Lift out the decodeSolution() for all slots into the
             * expander pattern.
             */
			while (sols0.hasNext()) {
				sols0.next();
				IBindingSet in = all[0].decodeSolution(set[0]);
				// FIXME apply constraint to source[0] (JVM version also).
				for (int i = 1; i < set.length; i++) {

					// See if the solutions join.
                    final IBindingSet left = in;
					if (set[i] != null) {
						final IBindingSet right = all[i].decodeSolution(set[i]); 
						in = BOpContext.bind(//
								left,// 
								right,// 
								c,// TODO constraint[][]
								null//
								);
					}

					if (in == null) {
//					    if(optional) {
//					        in = left;
//					        continue;
//					    }
						// Join failed.
						break;
					}

				}
				// Accept this binding set.
				if (in != null) {
					if (log.isDebugEnabled())
						log.debug("Output solution: " + in);
                    encoder.resolveCachedValues(in);
					outputBuffer.add(in);
				}

//				// now clear set!
//				for (int i = 1; i < set.length; i++) {
//					set[i] = null;
//				}
				
			}

		}
		
        } catch(Throwable t) {
            throw launderThrowable(t);
        }

	}    

    /**
     * Adds metadata about the {@link IHashJoinUtility} state to the stack
     * trace.
     * 
     * @param t
     *            The thrown error.
     * 
     * @return The laundered exception.
     * 
     * @throws Exception
     * 
     * @see http://sourceforge.net/apps/trac/bigdata/ticket/508 (LIMIT causes
     *      hash join utility to log errors)
     * @see BLZG-1658 MemoryManager should know when it has been closed
     */
    private RuntimeException launderThrowable(final Throwable t) {

        final String msg = "cause=" + t + ", state=" + toString();

        /*
         * Note: Per BLZG-1658, the MemoryManager.close() is invoked when a
         * query is done. Thus, any exception having a root cause indicating
         * that the MemoryManager is closed may be taken as direct evidence that
         * the query is done. Thus, if an attempt by the HTree or BTree to read
         * on the backing store (the MemoryManager) fails because the store is
         * closed, we interpret this as a concurrent termination of the query
         * for some other root cause and ignore the exception here (this is
         * just like ignoring InterruptedException or BufferClosedException).
         */
        if (!InnerCause.isInnerCause(t, InterruptedException.class)
                && !InnerCause.isInnerCause(t, BufferClosedException.class)
                && !InnerCause.isInnerCause(t, MemoryManagerClosedException.class)) {

            /*
             * Some sort of unexpected exception.
             */
            
            log.error(msg, t);

        }

        return new RuntimeException(msg, t);

    }

}