com.bigdata.bop.join.SolutionSetHashJoinOp Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of bigdata-core Show documentation
Blazegraph(TM) DB Core Platform. It contains all Blazegraph DB dependencies other than Blueprints.
There is a newer version: 2.1.4
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Aug 30, 2011
 */

package com.bigdata.bop.join;

import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.FutureTask;

import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstraint;
import com.bigdata.bop.IQueryAttributes;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.controller.HTreeNamedSubqueryOp;
import com.bigdata.bop.controller.INamedSolutionSetRef;
import com.bigdata.bop.controller.INamedSubqueryOp;
import com.bigdata.bop.controller.JVMNamedSubqueryOp;
import com.bigdata.bop.controller.NamedSetAnnotations;
import com.bigdata.bop.engine.IRunningQuery;
import com.bigdata.htree.HTree;
import com.bigdata.relation.accesspath.AbstractUnsynchronizedArrayBuffer;
import com.bigdata.relation.accesspath.IBlockingBuffer;
import com.bigdata.relation.accesspath.UnsyncLocalOutputBuffer;

/**
 * Operator joins a solution set modeled as a hash index into the pipeline. The
 * solution set may be modeled by an {@link HTree} or a JVM {@link HashMap}.
 * While this JOIN requires the RHS hash index to be fully materialized,
 * evaluation of the LHS source solutions is pipelined.
 * 
 * Parallel evaluation of source chunks is permitted, but the RHS hash index
 * must have been checkpointed before this operator begins evaluation (the
 * read-only {@link HTree} is thread-safe for concurrent readers). The
 * checkpoint is a NOP for the thread-safe JVM {@link ConcurrentHashMap}
 * collection class.
 * 

 * A {@link HTree} solution set must be be constructed by a
 * {@link HTreeNamedSubqueryOp} or a {@link HTreeHashIndexOp}. A JVM solution
 * set must be constructed by a {@link JVMNamedSubqueryOp} or
 * {@link JVMHashIndexOp}.
 * 
 * 
Handling OPTIONAL, EXISTS, and NOT-EXISTS

 * 
 * {@link PipelineOp.Annotations#LAST_PASS} evaluation MUST be requested for any
 * other than a {@link JoinTypeEnum#Normal}. See
 * {@link ChunkTask#doLastPass(UnsyncLocalOutputBuffer)} for details.
 * 
 * TODO This class could be made concrete. There is no logic it in that is
 * specific to either the {@link HTree} or JVM hash join operation.
 * 
 * @see INamedSubqueryOp
 * @see HashIndexOp
 * 
 * @author Bryan Thompson
 * @version $Id: NamedSubqueryIncludeOp.java 5178 2011-09-12 19:09:23Z
 *          thompsonbry $
 */
abstract public class SolutionSetHashJoinOp extends PipelineOp {

    /**
     * 
     */
    private static final long serialVersionUID = 1L;

    public interface Annotations extends PipelineOp.Annotations,
            NamedSetAnnotations {

        /**
         * An {@link IConstraint}[] to be applied to solutions when they are
         * joined (optional).
         */
        final String CONSTRAINTS = JoinAnnotations.CONSTRAINTS;
        
        /**
         * When true the hash index identified by
         * {@link #NAMED_SET_REF} will be released when this operator is done
         * (default {@value #DEFAULT_RELEASE}).
         * 
         * Note: Whether or not the hash index can be released depends on
         * whether or not the hash index will be consumed by more than one
         * operator in the query plan. For example, a named solution set can be
         * consumed by more than one operator and thus must not be released
         * until all such operators are done.
         * 
         * TODO Alternatively, we could specify the #of different locations in
         * the query plan where the named solution set will be consumed. This
         * could be part of the {@link HTreeHashJoinUtility} state, in which
         * case it would only be set as an annotation on the operator which
         * generates the hash index.
         * 
         * Note: Any memory associated with the {@link IRunningQuery} will be
         * released no later than when the {@link IRunningQuery#isDone()}. This
         * only provides a means to release data as soon as it is known that the
         * data will not be referenced again during the query.
         */
        final String RELEASE = SolutionSetHashJoinOp.class + ".release";

        final boolean DEFAULT_RELEASE = true;
        
    }

    /**
     * Deep copy constructor.
     */
    public SolutionSetHashJoinOp(final SolutionSetHashJoinOp op) {

        super(op);
        
    }

    /**
     * Shallow copy constructor.
     * 
     * @param args
     * @param annotations
     */
    public SolutionSetHashJoinOp(final BOp[] args,
            final Map annotations) {

        super(args, annotations);

        if (getProperty(Annotations.RELEASE, Annotations.DEFAULT_RELEASE)
                && !isLastPassRequested()) {
            /*
             * In order to release the hash index, this operator needs to be
             * notified when no more source solutions will become available.
             */
            throw new IllegalArgumentException(Annotations.RELEASE
                    + " requires " + Annotations.LAST_PASS);
        }
        
        // The RHS annotation must be specified.
        getRequiredProperty(Annotations.NAMED_SET_REF);
        
    }

    public SolutionSetHashJoinOp(final BOp[] args, final NV... annotations) {

        this(args, NV.asMap(annotations));
        
    }
    
    @Override
    public BaseJoinStats newStats() {

        return new BaseJoinStats();

    }

    @Override
    public FutureTask eval(final BOpContext context) {

        return new FutureTask(new ChunkTask(context, this));
        
    }

    /**
     * Task executing on the node.
     */
    private static class ChunkTask implements Callable {

        private final BOpContext context;

        private final SolutionSetHashJoinOp op;

        private final IHashJoinUtility state;

        private final IConstraint[] constraints;
        
        private final boolean release;
        
        private final BaseJoinStats stats;

        private final IBlockingBuffer sink;
        
        private final IBlockingBuffer sink2;

        public ChunkTask(final BOpContext context,
                final SolutionSetHashJoinOp op) {

            this.context = context;

            this.stats = (BaseJoinStats) context.getStats();

            this.release = op.getProperty(Annotations.RELEASE,
                    Annotations.DEFAULT_RELEASE);

            this.sink = context.getSink();

            this.sink2 = context.getSink2();

            this.op = op;

            // The name of the attribute used to discover the solution set.
            final INamedSolutionSetRef namedSetRef = (INamedSolutionSetRef) op
                    .getRequiredProperty(Annotations.NAMED_SET_REF);

            /*
			 * Lookup the attributes for the query on which we will hang the
			 * solution set. See BLZG-1493 (if queryId is null, use the query
			 * attributes for this running query).
			 */
			final IQueryAttributes attrs = context.getQueryAttributes(namedSetRef.getQueryId());

            state = (IHashJoinUtility) attrs.get(namedSetRef);

            if (state == null) {
                
                // The solution set was not found!
                
                throw new RuntimeException("Not found: " + namedSetRef);
                
            }

            if (!state.getJoinType().isNormal() && !op.isLastPassRequested()) {

                /*
                 * Anything but a Normal join requires that we observe all solutions
                 * and then do some final reporting. This is necessary for Optional,
                 * Exists, and NotExists. 
                 */

                throw new UnsupportedOperationException(state.getJoinType()
                        + " requires " + Annotations.LAST_PASS);

            }

            /*
             * Combine the original constraints (if any) with those attached to
             * this operator (if any).
             * 
             * Note: The solution set hash join is used to join in a hash index
             * generated by some other part of the query plan. Since it is also
             * used for named subqueries, which can be included in more than one
             * location, it is necessary that we can override/expand on the join
             * constraints for this operator.
             */
            this.constraints = BOpUtility.concat(
                    (IConstraint[]) op.getProperty(Annotations.CONSTRAINTS),
                    state.getConstraints());

        }

        @Override
        public Void call() throws Exception {

            try {

                doHashJoin();
                
                // Done.
                return null;
                
            } finally {

                if (release && context.isLastInvocation()) {

                    /*
                     * Note: It is possible to INCLUDE the named temporary
                     * solution set multiple times within a query. If we want to
                     * release() the hash tree then we need to know how many
                     * times the temporary solution set is being included and
                     * decrement a counter each time. When the counter reaches
                     * zero, we can release the hash index.
                     */
                    state.release();

                }
                
                sink.close();

                if (sink2 != null)
                    sink2.close();
                
            }

        }
        
        /**
         * Do a hash join of the buffered solutions with the access path.
         */
        private void doHashJoin() {

            if (state.isEmpty())
                return;
            
            stats.accessPathCount.increment();

            stats.accessPathRangeCount.add(state.getRightSolutionCount());

            final UnsyncLocalOutputBuffer unsyncBuffer = new UnsyncLocalOutputBuffer(
                    op.getChunkCapacity(), sink);

            state.hashJoin2(context.getSource(), stats, unsyncBuffer,
                    constraints);

            if (context.isLastInvocation()) {

                doLastPass(unsyncBuffer);
            
            }
            
            unsyncBuffer.flush();
            sink.flush();

        }

        /**
         * This method handles {@link JoinTypeEnum} values other than
         * {@link JoinTypeEnum#Normal}. {@link PipelineOp.Annotations#LAST_PASS}
         * evaluation MUST be requested for any other than a
         * {@link JoinTypeEnum#Normal}.
         */
        private void doLastPass(
                final UnsyncLocalOutputBuffer unsyncBuffer) {
            
            switch (state.getJoinType()) {
            case Normal:
                /*
                 * Nothing to do.
                 */
                break;
            case Optional:
            case NotExists: {
                /*
                 * Output the optional solutions.
                 */

                // where to write the optional solutions.
                final AbstractUnsynchronizedArrayBuffer unsyncBuffer2 = sink2 == null ? unsyncBuffer
                        : new UnsyncLocalOutputBuffer(
                                op.getChunkCapacity(), sink2);

                state.outputOptionals(unsyncBuffer2);

                unsyncBuffer2.flush();
                if (sink2 != null)
                    sink2.flush();

                break;
            }
            case Exists: {
                /*
                 * Output the join set.
                 * 
                 * Note: This has special hooks to support (NOT) EXISTS
                 * graph patterns, which must bind the "ASK_VAR" depending
                 * on whether or not the graph pattern is satisified.
                 */
                final IVariable askVar = state.getAskVar();
                // askVar := true
                state.outputJoinSet(unsyncBuffer);
                if (askVar != null) {
                    // askVar := false;
                    state.outputOptionals(unsyncBuffer);
                }
                break;
            }
            default:
                throw new AssertionError();
            }

        }

    } // class ChunkTask

}