com.bigdata.bop.controller.AbstractSubqueryOp Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Aug 18, 2010
*/
package com.bigdata.bop.controller;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.FutureTask;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.bset.Tee;
import com.bigdata.bop.engine.IRunningQuery;
import com.bigdata.bop.engine.QueryEngine;
import com.bigdata.util.concurrent.LatchedExecutor;
import cutthecrap.utils.striterators.ICloseableIterator;
/**
* Executes each of the operands as a subquery. The operands are evaluated in
* the order given and with the annotated parallelism. Each subquery is run as a
* separate query but is linked to the parent query in the operator is being
* evaluated. The subqueries receive bindings from the pipeline and may be
* executed independently. By default, the subqueries are run with unlimited
* parallelism. Since the #of subqueries is generally small (2), this means that
* the subqueries run in parallel.
*
* Note: This operator must execute on the query controller.
*
* If you want to route intermediate results from other computations into
* subqueries, then consider a {@link Tee} pattern instead.
*
* For example:
*
*
* SLICE[1](
* UNION[2]([...],{subqueries=[a,b,c]})
* )
*
*
* Will run the subqueries a, b, and c in parallel. Each
* subquery will be run once for each source {@link IBindingSet}.
*
* @author Bryan Thompson
* @version $Id$
*
* @todo There is relatively little difference between this class and SubqueryOp
* and we should consider converging them into a single concrete subquery
* operator with specializations for UNION and STEPS. The main difference
* is that the SubqueryOp can not run multiple subqueries.
*/
abstract public class AbstractSubqueryOp extends PipelineOp {
/**
*
*/
private static final long serialVersionUID = 1L;
public interface Annotations extends PipelineOp.Annotations {
/**
* The ordered {@link BOp}[] of subqueries to be evaluated for each
* binding set presented (required).
*/
String SUBQUERIES = AbstractSubqueryOp.class.getName() + ".subqueries";
/**
* The maximum parallelism with which the subqueries will be evaluated
* (default is unlimited).
*/
String MAX_PARALLEL_SUBQUERIES = AbstractSubqueryOp.class.getName() + ".maxParallelSubqueries";
int DEFAULT_MAX_PARALLEL_SUBQUERIES = Integer.MAX_VALUE;
}
/**
* @see Annotations#MAX_PARALLEL_SUBQUERIES
*/
public int getMaxParallelSubqueries() {
return getProperty(Annotations.MAX_PARALLEL_SUBQUERIES,
Annotations.DEFAULT_MAX_PARALLEL_SUBQUERIES);
}
/**
* Deep copy constructor.
*/
public AbstractSubqueryOp(final AbstractSubqueryOp op) {
super(op);
}
/**
* Shallow copy constructor.
*
* @param args
* @param annotations
*/
public AbstractSubqueryOp(final BOp[] args,
final Map annotations) {
super(args, annotations);
// if (!getEvaluationContext().equals(BOpEvaluationContext.CONTROLLER))
// throw new IllegalArgumentException(Annotations.EVALUATION_CONTEXT
// + "=" + getEvaluationContext());
// if (!getProperty(Annotations.CONTROLLER, Annotations.DEFAULT_CONTROLLER))
// throw new IllegalArgumentException(Annotations.CONTROLLER);
// verify required annotation.
final BOp[] subqueries = (BOp[]) getRequiredProperty(Annotations.SUBQUERIES);
if (subqueries.length == 0)
throw new IllegalArgumentException(Annotations.SUBQUERIES);
// // The id of this operator (if any).
// final Integer thisId = (Integer)getProperty(Annotations.BOP_ID);
//
// for(BOp op : args) {
//
// final Integer sinkId = (Integer) op
// .getRequiredProperty(Annotations.SINK_REF);
//
// if(sinkId.equals(thisId))
// throw new RuntimeException("Operand may not target ")
//
// }
}
public FutureTask eval(final BOpContext context) {
return new FutureTask(new ControllerTask(this, context));
}
/**
* Evaluates the arguments of the operator as subqueries. The arguments are
* evaluated in order. An {@link Executor} with limited parallelism to
* evaluate the arguments. If the controller operator is interrupted, then
* the subqueries are cancelled. If a subquery fails, then all subqueries
* are cancelled.
*/
private static class ControllerTask implements Callable {
private final AbstractSubqueryOp controllerOp;
private final BOp[] subqueries;
private final BOpContext context;
private final int nparallel;
private final Executor executor;
public ControllerTask(final AbstractSubqueryOp controllerOp,
final BOpContext context) {
if (controllerOp == null)
throw new IllegalArgumentException();
if (context == null)
throw new IllegalArgumentException();
this.controllerOp = controllerOp;
this.context = context;
this.subqueries = (BOp[]) controllerOp
.getRequiredProperty(Annotations.SUBQUERIES);
this.nparallel = controllerOp.getProperty(Annotations.MAX_PARALLEL_SUBQUERIES,
Annotations.DEFAULT_MAX_PARALLEL_SUBQUERIES);
this.executor = new LatchedExecutor(context.getIndexManager()
.getExecutorService(), nparallel);
}
/**
* Evaluate the subqueries with limited parallelism.
*/
public Void call() throws Exception {
final ICloseableIterator source = context
.getSource();
try {
while (source.hasNext()) {
final IBindingSet[] chunk = source.next();
for (IBindingSet bset : chunk) {
consumeBindingSet(bset);
}
}
// Now that we know the subqueries ran Ok, flush the sink.
context.getSink().flush();
// Done.
return null;
} finally {
// Close the source.
source.close();
context.getSink().close();
if (context.getSink2() != null)
context.getSink2().close();
}
}
private void consumeBindingSet(final IBindingSet bset)
throws InterruptedException, ExecutionException {
final List> tasks = new LinkedList>();
try {
final CountDownLatch latch = new CountDownLatch(
subqueries.length);
/*
* Create FutureTasks for each subquery. The futures are not
* submitted to the Executor yet. That happens in call(). By
* deferring the evaluation until call() we gain the ability to
* cancel all subqueries if any subquery fails.
*/
for (BOp op : subqueries) {
/*
* Task runs subquery and cancels all subqueries in [tasks]
* if it fails.
*/
tasks.add(new FutureTask(new SubqueryTask(
op, context, bset)) {
/*
* Hook future to count down the latch when the task is
* done.
*/
public void run() {
try {
super.run();
} finally {
latch.countDown();
}
}
});
}
/*
* Run subqueries with limited parallelism.
*/
for (FutureTask ft : tasks) {
executor.execute(ft);
}
/*
* Wait for all subqueries to complete.
*/
latch.await();
/*
* Get the futures, throwing out any errors.
*/
for (FutureTask ft : tasks)
ft.get();
} finally {
// Cancel any tasks which are still running.
for (FutureTask ft : tasks)
ft.cancel(true/* mayInterruptIfRunning */);
}
}
/**
* Run a subquery.
*
* @author Bryan
* Thompson
*/
private class SubqueryTask implements Callable {
/**
* The evaluation context for the parent query.
*/
private final BOpContext parentContext;
/**
* The root operator for the subquery.
*/
private final BOp subQueryOp;
/**
* The input for this invocation of the subquery.
*/
private final IBindingSet bset;
public SubqueryTask(final BOp subQuery,
final BOpContext parentContext,
final IBindingSet bset) {
this.subQueryOp = subQuery;
this.parentContext = parentContext;
this.bset = bset;
}
public IRunningQuery call() throws Exception {
IRunningQuery runningSubquery = null;
ICloseableIterator subquerySolutionItr = null;
try {
final QueryEngine queryEngine = parentContext.getRunningQuery()
.getQueryEngine();
runningSubquery = queryEngine.eval(subQueryOp, bset);
// Iterator visiting the subquery solutions.
subquerySolutionItr = runningSubquery.iterator();
// Copy solutions from the subquery to the query.
BOpUtility.copy(//
subquerySolutionItr, //
parentContext.getSink(), //
null, // sink2
null, // merge solution (parent's source solution).
null, // selectVars (subquery projection).
null, // constraints
null // stats
);
// wait for the subquery.
runningSubquery.get();
// done.
return runningSubquery;
} catch (Throwable t) {
if (runningSubquery == null
|| runningSubquery.getCause() != null) {
/*
* If things fail before we start the subquery, or if a
* subquery fails (due to abnormal termination), then
* propagate the error to the parent and rethrow the
* first cause error out of the subquery.
*
* Note: IHaltable#getCause() considers exceptions
* triggered by an interrupt to be normal termination.
* Such exceptions are NOT propagated here and WILL NOT
* cause the parent query to terminate.
*/
throw new RuntimeException(ControllerTask.this.context
.getRunningQuery().halt(
runningSubquery == null ? t
: runningSubquery.getCause()));
}
return runningSubquery;
} finally {
if (subquerySolutionItr != null)
subquerySolutionItr.close();
}
}
} // SubqueryTask
} // ControllerTask
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy