![JAR search and dependency download from the Maven repository](/logo.png)
com.bigdata.bop.controller.SubqueryOp Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Aug 18, 2010
*/
package com.bigdata.bop.controller;
import java.util.Arrays;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;
import org.apache.log4j.Logger;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.Constant;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstraint;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.aggregate.IAggregate;
import com.bigdata.bop.engine.AbstractRunningQuery;
import com.bigdata.bop.engine.IRunningQuery;
import com.bigdata.bop.engine.QueryEngine;
import com.bigdata.bop.join.JoinAnnotations;
import com.bigdata.bop.join.JoinTypeEnum;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.impl.literal.XSDBooleanIV;
import com.bigdata.rdf.model.BigdataLiteral;
import com.bigdata.relation.accesspath.IBlockingBuffer;
import cutthecrap.utils.striterators.ICloseableIterator;
/**
* Pipelined join with subquery.
*
* For each binding set presented, this operator executes a subquery. Any
* solutions produced by the subquery are copied to the default sink. If no
* solutions are produced and {@link Annotations#OPTIONAL} is true
,
* then the original binding set is copied to the default sink (optional join
* semantics). Each subquery is run as a separate query but will be cancelled if
* the parent query is cancelled.
*
* This operator does not use internal parallelism, but it is thread-safe and
* multiple instances of this operator may be run in parallel by the query
* engine for parallel evaluation of different binding set chunks flowing
* through the pipeline. However, there are much more efficient query plan
* patterns for most use cases. E.g., (a) creating a hash index with all source
* solutions, (b) flooding a sub-section of the query plan with the source
* solutions from the hash index; and (c) hash joining the solutions from the
* sub-section of the query plan back against the hash index to reunite the
* solutions from the subquery with those in the parent context.
*
*
Usage Notes
*
* If there are no shared variables which must already be bound in the caller,
* then subquery join is (or may be if there are some "might" be bound
* variables) the full cross product (constraints are still applied and optional
* solutions must be reported if a constraint fails and the join is optional).
* Such subqueries should be run as named subqueries instead so they run once,
* rather than once per binding set.
*
* If there are variables in scope in the parent query which are not projected
* by the subquery but which appear in the subquery as well, then such variables
* in the subquery are effectively distinct from those having the same name
* which appear in the parent query. In order to have correct bottom-up
* evaluation semantics under these conditions. This is handled by "projecting"
* only those variables into the subquery which it will project out.
*
*
Efficiency
*
* This non-vectored operator issues one sub-query per source solution flowing
* into the operator. In general, it is MUCH more efficient to vector the
* solutions into a sub-plan. The latter is accomplished by building a hash
* index over the source solutions, flooding them into the sub-plan, and then
* executing the appropriate hash join back against the source solutions after
* the sub-plan.
*
* There are a few cases where it may make sense to use the non-vectored
* operator. For example, for EXISTS where LIMIT ONE can be imposed on the
* subquery. However, there can still be cases where the vectored sub-plan is
* more efficient.
*
* @see AbstractSubqueryOp
* @see JVMNamedSubqueryOp
* @see HTreeNamedSubqueryOp
*
* @author Bryan Thompson
*
* @see bad performance for FILTER
* EXISTS
*/
public class SubqueryOp extends PipelineOp {
private static final Logger log = Logger.getLogger(SubqueryOp.class);
/**
*
*/
private static final long serialVersionUID = 1L;
public interface Annotations extends SubqueryJoinAnnotations {
/**
* When non-null
, the {@link IVariable} which will be bound
* to true
iff there is at least one solution for the
* subquery. When specified, {@link #SELECT} SHOULD be null
* in order to project all bindings from the parent's context into the
* subquery. However, bindings in the subquery WILL NOT be projected
* back into the parent.
*
* Note: This supports EXISTS and NOT EXISTS semantics.
*/
String ASK_VAR = Annotations.class.getName() + ".askVar";
/**
* The {@link IVariable}[] projected by the subquery (optional).
*
* Rule: A variable within a subquery is distinct from the same name
* variable outside of the subquery unless the variable is projected
* from the subquery.
*
* When this option is given, only the variables projected by the
* subquery will be visible during the subquery (this models SPARQL 1.1
* subquery variable scope semantics). A variable having the same name
* in the parent context and the subquery which is not projected by the
* subquery is a distinct variable. This constraint is enforced by
* passing only the projected variables into the subquery and then
* merging the unprojected variables from the source solution into each
* result produced by the subquery for a given source solution.
*
* When this option is not given, all variables are passed into the
* subquery (basically, the semantics are those of SELECT *
* ).
*
* Note: This overrides the semantics of the same named annotation on
* the {@link JoinAnnotations} interface.
*/
String SELECT = SubqueryJoinAnnotations.SELECT;
/**
* Boolean annotation should be true
if the subquery is an
* aggregate (default {@value #DEFAULT_IS_AGGREGATE}).
*
* Note: We need to have distinct {@link IAggregate} objects in each
* subquery issued since they have internal state in order to prevent
* inappropriate sharing of state across invocations of the subquery.
*/
String IS_AGGREGATE = Annotations.class.getName() + ".isAggregate";
boolean DEFAULT_IS_AGGREGATE = false;
}
/**
* Deep copy constructor.
*/
public SubqueryOp(final SubqueryOp op) {
super(op);
}
/**
* Shallow copy constructor.
*
* @param args
* @param annotations
*/
public SubqueryOp(final BOp[] args,
final Map annotations) {
super(args, annotations);
getRequiredProperty(Annotations.SUBQUERY);
final IVariable>[] selectVars = (IVariable>[]) getProperty(Annotations.SELECT);
if (selectVars != null && selectVars.length == 0) {
/*
* An empty array suggests that nothing is being projected. Either
* we should reject that here or we should treat it instead as
* "SELECT *".
*/
throw new IllegalArgumentException(Annotations.SELECT
+ " is optional, but may not be empty.");
}
final JoinTypeEnum joinType = (JoinTypeEnum) getRequiredProperty(Annotations.JOIN_TYPE);
switch (joinType) {
case Normal:
case Optional:
break;
default:
throw new UnsupportedOperationException(Annotations.JOIN_TYPE + "="
+ joinType);
}
}
public SubqueryOp(final BOp[] args, NV... annotations) {
this(args, NV.asMap(annotations));
}
@Override
public FutureTask eval(final BOpContext context) {
return new FutureTask(new ControllerTask(this, context));
}
/**
* Evaluates the subquery for each source binding set. If the controller
* operator is interrupted, then the subqueries are cancelled. If a subquery
* fails, then all subqueries are cancelled.
*/
private static class ControllerTask implements Callable {
private final BOpContext context;
/** The type of join. */
private final JoinTypeEnum joinType;
/** true
if the subquery is an aggregate. */
private final boolean aggregate;
/** The subquery which is evaluated for each input binding set. */
private final PipelineOp subquery;
/** Bound to true or false depending on whether or not there are solutions for the subquery (optional). */
private final IVariable> askVar;
/** The projected variables (select *
) if missing. */
private final IVariable>[] selectVars;
/** The optional constraints on the join. */
private final IConstraint[] constraints;
public ControllerTask(final SubqueryOp controllerOp,
final BOpContext context) {
if (controllerOp == null)
throw new IllegalArgumentException();
if (context == null)
throw new IllegalArgumentException();
this.context = context;
// this.optional = controllerOp.getProperty(
// Annotations.OPTIONAL,
// Annotations.DEFAULT_OPTIONAL);
joinType = (JoinTypeEnum) controllerOp
.getRequiredProperty(Annotations.JOIN_TYPE);
this.aggregate = controllerOp.getProperty(
Annotations.IS_AGGREGATE,
Annotations.DEFAULT_IS_AGGREGATE);
this.subquery = (PipelineOp) controllerOp
.getRequiredProperty(Annotations.SUBQUERY);
this.askVar = (IVariable>) controllerOp
.getProperty(Annotations.ASK_VAR);
this.selectVars = (IVariable>[]) controllerOp
.getProperty(Annotations.SELECT);
this.constraints = (IConstraint[]) controllerOp
.getProperty(Annotations.CONSTRAINTS);
}
/**
* Evaluate the subquery.
*/
@Override
public Void call() throws Exception {
try {
final ICloseableIterator sitr = context
.getSource();
while(sitr.hasNext()) {
final IBindingSet[] chunk = sitr.next();
for(IBindingSet bset : chunk) {
final IRunningQuery runningSubquery = new SubqueryTask(
bset, subquery, context).call();
if (!runningSubquery.isDone()) {
throw new AssertionError("Future not done: "
+ runningSubquery.toString());
}
}
}
// Now that we know the subqueries ran Ok, flush the sink.
context.getSink().flush();
// Done.
return null;
} finally {
context.getSource().close();
context.getSink().close();
if (context.getSink2() != null)
context.getSink2().close();
}
}
/**
* Run a subquery.
*
* @author Bryan
* Thompson
*/
private class SubqueryTask implements Callable {
/**
* The evaluation context for the parent query.
*/
private final BOpContext parentContext;
/**
* The source binding set. This will be copied to the output if
* there are no solutions for the subquery (optional join
* semantics).
*/
private final IBindingSet parentSolutionIn;
/**
* The root operator for the subquery.
*/
private final PipelineOp subQueryOp;
public SubqueryTask(final IBindingSet bset, final PipelineOp subQuery,
final BOpContext parentContext) {
this.parentSolutionIn = bset;
if (aggregate) {
/*
* Note: We need to have distinct IAggregates in subqueries
* since they have internal state. This makes a copy of the
* subquery in which each IAggregate function is a distinct
* instance. This prevents inappropriate sharing of state
* across invocations of the subquery.
*/
this.subQueryOp = BOpUtility
.makeAggregateDistinct(subQuery);
} else {
this.subQueryOp = subQuery;
}
this.parentContext = parentContext;
}
public IRunningQuery call() throws Exception {
/*
* Binding set in which only the projected variables are
* visible. (if selectVars is empty, then all variables remain
* visible.).
*/
final IBindingSet childSolutionIn = parentSolutionIn
.copy(selectVars);
// The subquery
IRunningQuery runningSubquery = null;
// The iterator draining the subquery
ICloseableIterator subquerySolutionItr = null;
try {
final QueryEngine queryEngine = parentContext
.getRunningQuery().getQueryEngine();
if(log.isDebugEnabled())
log.debug("\nRunning subquery:" //
+ "\n selectVars: "
+ Arrays.toString(selectVars) //
+ "\nparentSolution(in): "
+ parentSolutionIn //
+ "\n childSolution(in): "
+ childSolutionIn//
);
// System.out.println("Running subquery" //
// + ": selectVars: "
// + Arrays.toString(selectVars) //
// + ", parentSolution(in): "
// + parentSolutionIn //
// + ", childSolution(in): "
// + childSolutionIn//
// );
runningSubquery = queryEngine.eval(subQueryOp,
childSolutionIn);
long ncopied = 0L;
try {
// Declare the child query to the parent.
((AbstractRunningQuery) parentContext.getRunningQuery())
.addChild(runningSubquery);
// Iterator visiting the subquery solutions.
subquerySolutionItr = runningSubquery.iterator();
if (askVar != null) {
/*
* For an ASK style subquery, we are only interested
* in whether or not at least one solution exists.
*/
final IV success = subquerySolutionItr
.hasNext() ? XSDBooleanIV.TRUE
: XSDBooleanIV.FALSE;
// System.err
// .println("in="
// + childSolutionIn
// + ", out="
// + (subquerySolutionItr.hasNext() ? subquerySolutionItr
// .next() : "N/A")
// + ", askVar=" + success);
//
parentSolutionIn.set(askVar,
new Constant>(
success));
parentContext.getSink().add(
new IBindingSet[] { parentSolutionIn });
// halt the subquery.
runningSubquery.cancel(true/*mayInterruptIfRunning*/);
ncopied = 1;
} else {
// Copy solutions from the subquery to the query.
ncopied = BOpUtility.copy(//
subquerySolutionItr,// subquery solutions.
parentContext.getSink(), //
null, // sink2
parentSolutionIn,// original bindings from
// parent query.
selectVars, // variables projected by
// subquery.
constraints, //
parentContext.getStats()//
);
}
// wait for the subquery to halt / test for errors.
runningSubquery.get();
} catch (InterruptedException ex) {
// this thread was interrupted, so cancel the subquery.
runningSubquery
.cancel(true/* mayInterruptIfRunning */);
// rethrow the exception.
throw ex;
}
if (ncopied == 0L && joinType.isOptional()) {
/*
* Since there were no solutions for the subquery, copy
* the original binding set to the appropriate sink and
* do NOT apply the constraints.
*/
final IBlockingBuffer optionalSink = parentContext
.getSink2() != null ? parentContext.getSink2()
: parentContext.getSink();
optionalSink
.add(new IBindingSet[] { parentSolutionIn });
}
// done.
return runningSubquery;
} catch (Throwable t) {
if (runningSubquery == null
|| runningSubquery.getCause() != null) {
/*
* If things fail before we start the subquery, or if a
* subquery fails (due to abnormal termination), then
* propagate the error to the parent and rethrow the
* first cause error out of the subquery.
*
* Note: IHaltable#getCause() considers exceptions
* triggered by an interrupt to be normal termination.
* Such exceptions are NOT propagated here and WILL NOT
* cause the parent query to terminate.
*/
throw new RuntimeException(ControllerTask.this.context
.getRunningQuery().halt(
runningSubquery == null ? t
: runningSubquery.getCause()));
}
return runningSubquery;
} finally {
try {
// ensure subquery is halted.
if (runningSubquery != null)
runningSubquery
.cancel(true/* mayInterruptIfRunning */);
} finally {
// ensure the subquery solution iterator is closed.
if (subquerySolutionItr != null)
subquerySolutionItr.close();
}
}
}
} // SubqueryTask
} // ControllerTask
}