
com.bigdata.bop.join.SolutionSetHashJoinOp Maven / Gradle / Ivy
Show all versions of bigdata-core Show documentation
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Aug 30, 2011
*/
package com.bigdata.bop.join;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.FutureTask;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstraint;
import com.bigdata.bop.IQueryAttributes;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.controller.HTreeNamedSubqueryOp;
import com.bigdata.bop.controller.INamedSolutionSetRef;
import com.bigdata.bop.controller.INamedSubqueryOp;
import com.bigdata.bop.controller.JVMNamedSubqueryOp;
import com.bigdata.bop.controller.NamedSetAnnotations;
import com.bigdata.bop.engine.IRunningQuery;
import com.bigdata.htree.HTree;
import com.bigdata.relation.accesspath.AbstractUnsynchronizedArrayBuffer;
import com.bigdata.relation.accesspath.IBlockingBuffer;
import com.bigdata.relation.accesspath.UnsyncLocalOutputBuffer;
/**
* Operator joins a solution set modeled as a hash index into the pipeline. The
* solution set may be modeled by an {@link HTree} or a JVM {@link HashMap}.
* While this JOIN requires the RHS hash index to be fully materialized,
* evaluation of the LHS source solutions is pipelined.
*
* Parallel evaluation of source chunks is permitted, but the RHS hash index
* must have been checkpointed before this operator begins evaluation (the
* read-only {@link HTree} is thread-safe for concurrent readers). The
* checkpoint is a NOP for the thread-safe JVM {@link ConcurrentHashMap}
* collection class.
*
* A {@link HTree} solution set must be be constructed by a
* {@link HTreeNamedSubqueryOp} or a {@link HTreeHashIndexOp}. A JVM solution
* set must be constructed by a {@link JVMNamedSubqueryOp} or
* {@link JVMHashIndexOp}.
*
*
Handling OPTIONAL, EXISTS, and NOT-EXISTS
*
* {@link PipelineOp.Annotations#LAST_PASS} evaluation MUST be requested for any
* other than a {@link JoinTypeEnum#Normal}. See
* {@link ChunkTask#doLastPass(UnsyncLocalOutputBuffer)} for details.
*
* TODO This class could be made concrete. There is no logic it in that is
* specific to either the {@link HTree} or JVM hash join operation.
*
* @see INamedSubqueryOp
* @see HashIndexOp
*
* @author Bryan Thompson
* @version $Id: NamedSubqueryIncludeOp.java 5178 2011-09-12 19:09:23Z
* thompsonbry $
*/
abstract public class SolutionSetHashJoinOp extends PipelineOp {
/**
*
*/
private static final long serialVersionUID = 1L;
public interface Annotations extends PipelineOp.Annotations,
NamedSetAnnotations {
/**
* An {@link IConstraint}[] to be applied to solutions when they are
* joined (optional).
*/
final String CONSTRAINTS = JoinAnnotations.CONSTRAINTS;
/**
* When true
the hash index identified by
* {@link #NAMED_SET_REF} will be released when this operator is done
* (default {@value #DEFAULT_RELEASE}).
*
* Note: Whether or not the hash index can be released depends on
* whether or not the hash index will be consumed by more than one
* operator in the query plan. For example, a named solution set can be
* consumed by more than one operator and thus must not be released
* until all such operators are done.
*
* TODO Alternatively, we could specify the #of different locations in
* the query plan where the named solution set will be consumed. This
* could be part of the {@link HTreeHashJoinUtility} state, in which
* case it would only be set as an annotation on the operator which
* generates the hash index.
*
* Note: Any memory associated with the {@link IRunningQuery} will be
* released no later than when the {@link IRunningQuery#isDone()}. This
* only provides a means to release data as soon as it is known that the
* data will not be referenced again during the query.
*/
final String RELEASE = SolutionSetHashJoinOp.class + ".release";
final boolean DEFAULT_RELEASE = true;
}
/**
* Deep copy constructor.
*/
public SolutionSetHashJoinOp(final SolutionSetHashJoinOp op) {
super(op);
}
/**
* Shallow copy constructor.
*
* @param args
* @param annotations
*/
public SolutionSetHashJoinOp(final BOp[] args,
final Map annotations) {
super(args, annotations);
if (getProperty(Annotations.RELEASE, Annotations.DEFAULT_RELEASE)
&& !isLastPassRequested()) {
/*
* In order to release the hash index, this operator needs to be
* notified when no more source solutions will become available.
*/
throw new IllegalArgumentException(Annotations.RELEASE
+ " requires " + Annotations.LAST_PASS);
}
// The RHS annotation must be specified.
getRequiredProperty(Annotations.NAMED_SET_REF);
}
public SolutionSetHashJoinOp(final BOp[] args, final NV... annotations) {
this(args, NV.asMap(annotations));
}
@Override
public BaseJoinStats newStats() {
return new BaseJoinStats();
}
@Override
public FutureTask eval(final BOpContext context) {
return new FutureTask(new ChunkTask(context, this));
}
/**
* Task executing on the node.
*/
private static class ChunkTask implements Callable {
private final BOpContext context;
private final SolutionSetHashJoinOp op;
private final IHashJoinUtility state;
private final IConstraint[] constraints;
private final boolean release;
private final BaseJoinStats stats;
private final IBlockingBuffer sink;
private final IBlockingBuffer sink2;
public ChunkTask(final BOpContext context,
final SolutionSetHashJoinOp op) {
this.context = context;
this.stats = (BaseJoinStats) context.getStats();
this.release = op.getProperty(Annotations.RELEASE,
Annotations.DEFAULT_RELEASE);
this.sink = context.getSink();
this.sink2 = context.getSink2();
this.op = op;
// The name of the attribute used to discover the solution set.
final INamedSolutionSetRef namedSetRef = (INamedSolutionSetRef) op
.getRequiredProperty(Annotations.NAMED_SET_REF);
/*
* Lookup the attributes for the query on which we will hang the
* solution set. See BLZG-1493 (if queryId is null, use the query
* attributes for this running query).
*/
final IQueryAttributes attrs = context.getQueryAttributes(namedSetRef.getQueryId());
state = (IHashJoinUtility) attrs.get(namedSetRef);
if (state == null) {
// The solution set was not found!
throw new RuntimeException("Not found: " + namedSetRef);
}
if (!state.getJoinType().isNormal() && !op.isLastPassRequested()) {
/*
* Anything but a Normal join requires that we observe all solutions
* and then do some final reporting. This is necessary for Optional,
* Exists, and NotExists.
*/
throw new UnsupportedOperationException(state.getJoinType()
+ " requires " + Annotations.LAST_PASS);
}
/*
* Combine the original constraints (if any) with those attached to
* this operator (if any).
*
* Note: The solution set hash join is used to join in a hash index
* generated by some other part of the query plan. Since it is also
* used for named subqueries, which can be included in more than one
* location, it is necessary that we can override/expand on the join
* constraints for this operator.
*/
this.constraints = BOpUtility.concat(
(IConstraint[]) op.getProperty(Annotations.CONSTRAINTS),
state.getConstraints());
}
@Override
public Void call() throws Exception {
try {
doHashJoin();
// Done.
return null;
} finally {
if (release && context.isLastInvocation()) {
/*
* Note: It is possible to INCLUDE the named temporary
* solution set multiple times within a query. If we want to
* release() the hash tree then we need to know how many
* times the temporary solution set is being included and
* decrement a counter each time. When the counter reaches
* zero, we can release the hash index.
*/
state.release();
}
sink.close();
if (sink2 != null)
sink2.close();
}
}
/**
* Do a hash join of the buffered solutions with the access path.
*/
private void doHashJoin() {
if (state.isEmpty())
return;
stats.accessPathCount.increment();
stats.accessPathRangeCount.add(state.getRightSolutionCount());
final UnsyncLocalOutputBuffer unsyncBuffer = new UnsyncLocalOutputBuffer(
op.getChunkCapacity(), sink);
state.hashJoin2(context.getSource(), stats, unsyncBuffer,
constraints);
if (context.isLastInvocation()) {
doLastPass(unsyncBuffer);
}
unsyncBuffer.flush();
sink.flush();
}
/**
* This method handles {@link JoinTypeEnum} values other than
* {@link JoinTypeEnum#Normal}. {@link PipelineOp.Annotations#LAST_PASS}
* evaluation MUST be requested for any other than a
* {@link JoinTypeEnum#Normal}.
*/
private void doLastPass(
final UnsyncLocalOutputBuffer unsyncBuffer) {
switch (state.getJoinType()) {
case Normal:
/*
* Nothing to do.
*/
break;
case Optional:
case NotExists: {
/*
* Output the optional solutions.
*/
// where to write the optional solutions.
final AbstractUnsynchronizedArrayBuffer unsyncBuffer2 = sink2 == null ? unsyncBuffer
: new UnsyncLocalOutputBuffer(
op.getChunkCapacity(), sink2);
state.outputOptionals(unsyncBuffer2);
unsyncBuffer2.flush();
if (sink2 != null)
sink2.flush();
break;
}
case Exists: {
/*
* Output the join set.
*
* Note: This has special hooks to support (NOT) EXISTS
* graph patterns, which must bind the "ASK_VAR" depending
* on whether or not the graph pattern is satisified.
*/
final IVariable askVar = state.getAskVar();
// askVar := true
state.outputJoinSet(unsyncBuffer);
if (askVar != null) {
// askVar := false;
state.outputOptionals(unsyncBuffer);
}
break;
}
default:
throw new AssertionError();
}
}
} // class ChunkTask
}