com.bigdata.bop.join.HashJoinOp Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Nov 14, 2011
*/
package com.bigdata.bop.join;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;
import org.apache.log4j.Logger;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.BOpEvaluationContext;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.IQueryAttributes;
import com.bigdata.bop.IShardwisePipelineOp;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.controller.INamedSolutionSetRef;
import com.bigdata.bop.controller.NamedSetAnnotations;
import com.bigdata.relation.IRelation;
import com.bigdata.relation.accesspath.AbstractUnsynchronizedArrayBuffer;
import com.bigdata.relation.accesspath.IAccessPath;
import com.bigdata.relation.accesspath.IBindingSetAccessPath;
import com.bigdata.relation.accesspath.IBlockingBuffer;
import com.bigdata.relation.accesspath.UnsyncLocalOutputBuffer;
import cutthecrap.utils.striterators.ICloseableIterator;
/**
* Abstract base class for both JVM and native memory hash join against an
* {@link IAccessPath}. The source solutions from the pipeline are buffered on a
* hash index. Depending on the implementation, the hash index may have a
* threshold that will trigger an evaluation pass of the hash join. If not, then
* the hash join will run exactly once. When the hash join runs, the access path
* is scanned and the hash index (of intermediate solutions from the pipeline)
* is probed for each solution read from the {@link IAccessPath}. Solutions
* which join are output.
*
* @author Bryan Thompson
*/
abstract public class HashJoinOp extends PipelineOp implements
IShardwisePipelineOp {
static private final transient Logger log = Logger
.getLogger(HashJoinOp.class);
private static final long serialVersionUID = 1L;
public interface Annotations extends AccessPathJoinAnnotations,
NamedSetAnnotations, HashJoinAnnotations {
}
/**
* @param op
*/
public HashJoinOp(final HashJoinOp op) {
super(op);
}
public HashJoinOp(final BOp[] args, final NV... annotations) {
this(args, NV.asMap(annotations));
}
/**
* @param args
* @param annotations
*/
public HashJoinOp(final BOp[] args, final Map annotations) {
super(args, annotations);
/*
* Validate common requirements for all concrete implementations of this
* operator.
*/
switch (getEvaluationContext()) {
case CONTROLLER:
case SHARDED:
case HASHED:
break;
default:
throw new UnsupportedOperationException(
Annotations.EVALUATION_CONTEXT + "="
+ getEvaluationContext());
}
// Predicate for the access path must be specified.
getPredicate();
getRequiredProperty(Annotations.NAMED_SET_REF);
// Join variables must be specified.
final IVariable>[] joinVars = (IVariable[]) getRequiredProperty(Annotations.JOIN_VARS);
// if (joinVars.length == 0)
// throw new IllegalArgumentException(Annotations.JOIN_VARS);
for (IVariable> var : joinVars) {
if (var == null)
throw new IllegalArgumentException(Annotations.JOIN_VARS);
}
}
/**
* {@inheritDoc}
*
* @see Annotations#PREDICATE
*/
@Override
@SuppressWarnings("unchecked")
public IPredicate getPredicate() {
return (IPredicate) getRequiredProperty(Annotations.PREDICATE);
}
/**
* Return true
iff the predicate associated with the join is
* optional.
*
* @see IPredicate.Annotations#OPTIONAL
*/
protected boolean isOptional() {
return getPredicate().isOptional();
}
@Override
public BaseJoinStats newStats() {
return new BaseJoinStats();
}
/**
* Return the instance of the {@link IHashJoinUtility} to be used by this
* operator. This method is invoked once, the first time this operator is
* evaluated. The returned {@link IHashJoinUtility} reference is attached to
* the {@link IQueryAttributes} and accessed there on subsequent evaluation
* passes for this operator.
*
* @param context
* The {@link BOpEvaluationContext}
* @param namedSetRef
* Metadata to identify the named solution set.
* @param joinType
* The type of join.
*/
abstract protected IHashJoinUtility newState(//
final BOpContext context,//
final INamedSolutionSetRef namedSetRef, //
final JoinTypeEnum joinType//
);
/**
* Return true
if {@link ChunkTask#doHashJoin()} should be
* executed in a given operator {@link ChunkTask} invocation.
*
* @param context
* The operator evaluation context.
* @param state
* The {@link IHashJoinUtility} instance.
*/
abstract protected boolean runHashJoin(final BOpContext> context,
final IHashJoinUtility state);
@Override
public FutureTask eval(final BOpContext context) {
return new FutureTask(new ChunkTask(context, this));
}
/**
* Task executing on the node.
*/
private static class ChunkTask implements Callable {
private final BOpContext context;
private final HashJoinOp op;
private final IRelation relation;
private final IPredicate pred;
private final BaseJoinStats stats;
private final IHashJoinUtility state;
private final IBlockingBuffer sink;
private final IBlockingBuffer sink2;
public ChunkTask(final BOpContext context,
final HashJoinOp op) {
this.context = context;
this.stats = (BaseJoinStats) context.getStats();
this.pred = op.getPredicate();
this.relation = context.getRelation(pred);
this.sink = context.getSink();
this.sink2 = context.getSink2();
this.op = op;
{
/*
* First, see if the map already exists.
*
* Note: Since the operator is not thread-safe, we do not need
* to use a putIfAbsent pattern here.
*
* Note: Publishing the [state] as a query attribute provides
* visibility into the hash join against the access path even
* for implementations (such as the JVMHashJoinOp) where the
* entire operation will occur within a single evaluation pass.
*/
final INamedSolutionSetRef namedSetRef = (INamedSolutionSetRef) op
.getRequiredProperty(Annotations.NAMED_SET_REF);
/*
* Lookup the attributes for the query on which we will hang the
* solution set. See BLZG-1493 (if queryId is null, use the query
* attributes for this running query).
*/
final IQueryAttributes attrs = context.getQueryAttributes(namedSetRef.getQueryId());
IHashJoinUtility state = (IHashJoinUtility) attrs
.get(namedSetRef);
if (state == null) {
state = op.newState(context, namedSetRef,
op.isOptional() ? JoinTypeEnum.Optional
: JoinTypeEnum.Normal);
attrs.put(namedSetRef, state);
}
this.state = state;
}
}
@Override
public Void call() throws Exception {
boolean didRun = false;
try {
acceptSolutions();
if(op.runHashJoin(context, state)) {
didRun = true;
doHashJoin();
}
// Done.
return null;
} finally {
if (didRun) {
/*
* The state needs to be released each time this operator
* runs in order to discard the intermediate solutions
* buffered on the hash index that were just joined against
* the access path. If we do not discard the state after
* processing the intermediate solutions, then they will
* continue to accumulate and we will over-report joins
* (duplicate solutions will be output for things already in
* the hash index the next time we evaluate the hash join
* against the access path).
*/
state.release();
}
sink.close();
if (sink2 != null)
sink2.close();
}
}
/**
* Buffer intermediate resources.
*/
private void acceptSolutions() {
state.acceptSolutions(context.getSource(), stats);
}
/**
* Return the access path that to be scanned. Solutions read from this
* access path will be used to probe the hash index to identify
* solutions that can join.
*/
private IBindingSetAccessPath> getAccessPath() {
return (IBindingSetAccessPath>) context.getAccessPath(relation,
pred);
}
/**
* Do a hash join of the buffered solutions with the access path.
*/
private void doHashJoin() {
if (state.isEmpty())
return;
final IBindingSetAccessPath> accessPath = getAccessPath();
if (log.isInfoEnabled())
log.info("accessPath=" + accessPath);
stats.accessPathCount.increment();
stats.accessPathRangeCount.add(accessPath
.rangeCount(false/* exact */));
final UnsyncLocalOutputBuffer unsyncBuffer = new UnsyncLocalOutputBuffer(
op.getChunkCapacity(), sink);
final long cutoffLimit = pred.getProperty(
IPredicate.Annotations.CUTOFF_LIMIT,
IPredicate.Annotations.DEFAULT_CUTOFF_LIMIT);
// Obtain the iterator for the current join dimension.
final ICloseableIterator itr = accessPath
.solutions(context, cutoffLimit, stats);
/*
* Note: The [stats] are NOT passed in here since the chunksIn and
* unitsIn were updated when the pipeline solutions were accepted
* into the hash index. If we passed in stats here, they would be
* double counted when we executed the hash join against the access
* path.
*/
state.hashJoin(
itr,// left
null, // stats
unsyncBuffer// out
);
switch (state.getJoinType()) {
case Normal:
/*
* Nothing to do.
*/
break;
case Optional:
case NotExists: {
/*
* Output the optional solutions.
*/
// where to write the optional solutions.
final AbstractUnsynchronizedArrayBuffer unsyncBuffer2 = sink2 == null ? unsyncBuffer
: new UnsyncLocalOutputBuffer(
op.getChunkCapacity(), sink2);
state.outputOptionals(unsyncBuffer2);
unsyncBuffer2.flush();
if (sink2 != null)
sink2.flush();
break;
}
case Exists: {
/*
* Output the join set.
*/
state.outputJoinSet(unsyncBuffer);
break;
}
default:
throw new AssertionError();
}
unsyncBuffer.flush();
sink.flush();
}
} // class ChunkTask
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy