com.bigdata.bop.engine.AbstractRunningQuery Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Dec 30, 2010
*/
package com.bigdata.bop.engine;
import java.nio.ByteBuffer;
import java.nio.channels.ClosedByInterruptException;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.log4j.Logger;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpEvaluationContext;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.DefaultQueryAttributes;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IQueryAttributes;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.bset.EndOp;
import com.bigdata.bop.engine.RunState.RunStateEnum;
import com.bigdata.bop.fed.EmptyChunkMessage;
import com.bigdata.bop.solutions.SliceOp;
import com.bigdata.io.DirectBufferPool;
import com.bigdata.io.DirectBufferPoolAllocator;
import com.bigdata.journal.IIndexManager;
import com.bigdata.journal.ITx;
import com.bigdata.rdf.sparql.ast.QueryHints;
import com.bigdata.relation.accesspath.IAsynchronousIterator;
import com.bigdata.relation.accesspath.IBlockingBuffer;
import com.bigdata.rwstore.sector.IMemoryManager;
import com.bigdata.rwstore.sector.MemoryManager;
import com.bigdata.service.IBigdataFederation;
import com.bigdata.util.InnerCause;
import com.bigdata.util.concurrent.Haltable;
import com.bigdata.util.concurrent.IHaltable;
import cutthecrap.utils.striterators.ICloseableIterator;
/**
* Abstract base class for various {@link IRunningQuery} implementations. The
* purpose of this class is to isolate aspects common to different designs for
* managing resources for a running query and make it easier to realize
* different strategies for managing the resources allocated to a running query.
*
* There are common requirements for the {@link IRunningQuery}, but a variety of
* ways in which those requirements can be met. Among the common requirements
* are a means to manage tradeoffs in the allocation of various resources to the
* operators in each query. Some of the more important tradeoffs are the #of
* threads to allocate to each operator (threads bounds IO for Java 6 since we
* are using a synchronous IO model) and the amount of RAM allocated to each
* operator (including RAM on the JVM heap and RAM on the native Java process
* heap). If the #of threads is too restrictive, then queries will progress
* slowly due to insufficient IO level parallelism. If the query buffers too
* much data on the JVM heap, then it can cause GC overhead problems that can
* drastically reduce the responsiveness and throughput of the JVM. Data can be
* moved off of the JVM heap onto the Java process heap by serializing it into
* direct {@link ByteBuffer}s. This can be very efficient in
* combination with hash joins at the expense of increasing the latency to the
* first result when compared with pipelined evaluation.
*
* @author Bryan Thompson
*/
abstract public class AbstractRunningQuery implements IRunningQuery {
/**
* Error message used when an operation which must be performed on the query
* controller is attempted on some other {@link IQueryPeer}.
*/
protected static final String ERR_NOT_CONTROLLER = "Operator only permitted on the query controller";
/**
* Error message used when a request is made after the query has stopped
* executing.
*/
protected static final String ERR_QUERY_DONE = "Query is no longer running";
/**
* Error message used when a request is addressed to an operator other than
* the head of the pipeline in a context where the request must be addressed
* to the operator at the head of the pipeline (e.g., when presenting the
* initial binding sets to get the query moving.)
*/
protected static final String ERR_NOT_PIPELINE_START = "Not pipeline start";
/**
* Error message used when no operator can be found for a given
* {@link BOp.Annotations#BOP_ID}.
*/
protected static final String ERR_NO_SUCH_BOP = "No such bop: id=";
/**
* Error message used when two operators have the same
* {@link BOp.Annotations#BOP_ID}.
*/
protected static final String ERR_DUPLICATE_IDENTIFIER = "Duplicate identifier: id=";
private final static transient Logger log = Logger
.getLogger(AbstractRunningQuery.class);
/**
* The class executing the query on this node.
*/
final private QueryEngine queryEngine;
/** The unique identifier for this query. */
final private UUID queryId;
/**
* Stats associated with static analysis
*/
private StaticAnalysisStats saStats = null;
// /**
// * The query deadline. The value is the system clock time in milliseconds
// * when the query is due and {@link Long#MAX_VALUE} if there is no deadline.
// * In order to have a guarantee of a consistent clock, the deadline is
// * interpreted by the query controller.
// */
// final private AtomicLong deadline = new AtomicLong(Long.MAX_VALUE);
/**
* The timestamp (ms) when the query begins to execute.
*/
final private AtomicLong startTime = new AtomicLong(System
.currentTimeMillis());
/**
* The timestamp (ms) when the query is done executing and ZERO (0L) if the
* query is not done.
*/
final private AtomicLong doneTime = new AtomicLong(0L);
/**
* true
iff the outer {@link QueryEngine} is the controller for
* this query.
*/
final private boolean controller;
/**
* The client executing this query (aka the query controller).
*
* Note: The proxy is primarily for light weight RMI messages used to
* coordinate the distributed query evaluation. Ideally, all large objects
* will be transfered among the nodes of the cluster using NIO buffers.
*/
final private IQueryClient clientProxy;
/**
* The original message which kicked off this query on the query controller.
* This is NOT required when the query is materialized on another node and
* MAY be null
, but the original message used to kick off the
* query on the query controller MUST be provided so we can ensure that the
* source iteration is always closed when the query is cancelled.
*/
final private IChunkMessage realSource;
/** The query. */
final private PipelineOp query;
/**
* An index from the {@link BOp.Annotations#BOP_ID} to the {@link BOp}. This
* index is generated by the constructor. It is immutable and thread-safe.
*/
private final Map bopIndex;
/**
* The run state of the query and the result of the computation iff it
* completes execution normally (without being interrupted, cancelled, etc).
*/
final private Haltable future = new Haltable();
/**
* The {@link Future} of this query.
*
* Note: This is exposed to the {@link QueryEngine} to let it cache the
* {@link Future} for recently finished queries.
*/
final protected IHaltable getFuture() {
return future;
}
/**
* The runtime statistics for each {@link BOp} in the query and
* null
unless this is the query controller.
*/
final private ConcurrentHashMap statsMap;
/**
* The buffer used for the overall output of the query pipeline.
*
* Note: This only exists on the query controller, and then only when the
* top-level operator is not a mutation. In order to ensure that the results
* are transferred to the query controller in scale-out, the top-level
* operator in the query plan must specify
* {@link BOpEvaluationContext#CONTROLLER}. For example, {@link SliceOp} or
* {@link EndOp} both require this {@link BOpEvaluationContext}.
*/
final private IBlockingBuffer queryBuffer;
/**
* The iterator draining the {@link #queryBuffer} and null
iff
* the {@link #queryBuffer} is null
.
*/
final private ICloseableIterator queryIterator;
// /**
// * The #of solutions delivered to the {@link #queryBuffer}.
// */
// public long getSolutionCount() {
//
// if (queryBuffer != null) {
//
// ((BlockingBufferWithStats>) queryBuffer).getElementsAddedCount();
//
// }
//
// return 0L;
//
// }
//
// /**
// * The #of solution chunks delivered to the {@link #queryBuffer}.
// */
// public long getSolutionChunkCount() {
//
// if (queryBuffer != null) {
//
// ((BlockingBufferWithStats>) queryBuffer).getChunksAddedCount();
//
// }
//
// return 0L;
//
// }
/**
* A lock guarding various state changes. This guards changes to the
* internal state of the {@link #runState} object. It is also used to
* serialize requests to {@link #acceptChunk(IChunkMessage)} and
* {@link #cancel(boolean)} and make atomic decision concerning whether to
* attach a new {@link IChunkMessage} to an operator task which is already
* running or to start a new task for that message.
*
* @see RunState
*/
protected final ReentrantLock lock = new ReentrantLock();
/**
* The run state of this query and null
unless this is the
* query controller.
*/
final private RunState runState;
/**
* Flag used to prevent retriggering of query tear down activities in
* {@link #cancel(boolean)}.
*/
private final AtomicBoolean didQueryTearDown = new AtomicBoolean(false);
// /**
// * A collection reporting on whether or not a given operator has been torn
// * down. This collection is used to provide the guarantee that an operator
// * is torn down exactly once, regardless of the #of invocations of the
// * operator or the #of errors which might occur during query processing.
// *
// * @see PipelineOp#tearDown()
// */
// private final Map tornDown = new LinkedHashMap();
/**
* Set the query deadline. The query will be cancelled when the deadline is
* passed. If the deadline is passed, the query is immediately cancelled.
*
* @param deadline
* The deadline.
* @throws IllegalArgumentException
* if the deadline is non-positive.
* @throws IllegalStateException
* if the deadline was already set.
* @throws UnsupportedOperationException
* unless node is the query controller.
*/
final public void setDeadline(final long deadline) {
if (!controller)
throw new UnsupportedOperationException(ERR_NOT_CONTROLLER);
try {
/*
* Attempt to set the deadline.
*/
runState.setDeadline(deadline);
queryEngine.addQueryToDeadlineQueue(this);
} catch (QueryTimeoutException e) {
/*
* Deadline is expired, so halt the query.
*/
halt(e);
}
}
/**
* If the query deadline has expired, then halt the query.
*
* @throws QueryTimeoutException
* if the query deadline has expired.
*
* @see
* Query timeout only checked at operator start/stop.
*/
final protected void checkDeadline() {
if (isDone()) {
// already terminated.
return;
}
try {
// if (log.isTraceEnabled())
// log.trace("Checking " + deadline);
runState.checkDeadline();
} catch (QueryTimeoutException ex) {
halt(ex);
/*
* Note: The exception is not rethrown when the query halts for a
* deadline. See startOp() and haltOp() for the standard behavior.
*/
}
}
@Override
final public long getDeadline() {
return runState.getDeadline();
}
@Override
final public long getStartTime() {
return startTime.get();
}
@Override
final public long getDoneTime() {
return doneTime.get();
}
@Override
final public long getElapsed() {
long mark = doneTime.get();
if (mark == 0L)
mark = System.currentTimeMillis();
return mark - startTime.get();
}
/**
* Return the buffer used for the overall output of the query pipeline and
* null
if this is not the query controller.
*/
final protected IBlockingBuffer getQueryBuffer() {
return queryBuffer;
}
@Override
public QueryEngine getQueryEngine() {
return queryEngine;
}
@Override
final public IQueryClient getQueryController() {
return clientProxy;
}
@Override
final public UUID getQueryId() {
return queryId;
}
@Override
final public PipelineOp getQuery() {
return query;
}
/**
* Return true
iff this is the query controller.
*/
final public boolean isController() {
return controller;
}
@Override
final public Map getStats() {
return Collections.unmodifiableMap(statsMap);
}
/**
* Return the {@link BOpStats} instance associated with the given
* {@link BOp} identifier.
*
* @param bopId
* The {@link BOp} identifier.
*
* @return The associated {@link BOpStats} object -or- null
if
* there is no entry for that {@link BOp} identifier.
*
* @throws IllegalArgumentException
* if the argument is null
.
*/
final public BOpStats getStats(final Integer bopId) {
if (bopId == null)
throw new IllegalArgumentException();
if (statsMap == null)
throw new IllegalStateException("bopId=" + bopId + ", query="
+ BOpUtility.toString(query));
return statsMap.get(bopId);
}
@Override
final public Map getBOpIndex() {
return bopIndex;
}
/**
* Return the {@link BOp} having the specified id.
*
* @param bopId
* The {@link BOp} identifier.
*
* @return The {@link BOp}.
*
* @throws IllegalArgumentException
* if there is no {@link BOp} with that identifier declared in
* this query.
*/
final public BOp getBOp(final int bopId) {
final BOp bop = getBOpIndex().get(bopId);
if (bop == null) {
throw new IllegalArgumentException("Not found: id=" + bopId
+ ", query=" + query);
}
return bop;
}
/**
* @param queryEngine
* The {@link QueryEngine} on which the query is running. In
* scale-out, a query is typically instantiated on many
* {@link QueryEngine}s.
* @param queryId
* The identifier for that query.
* @param controller
* true
iff the {@link QueryEngine} is the query
* controller for this query (the {@link QueryEngine} which will
* coordinate the query evaluation).
* @param clientProxy
* The query controller. In standalone, this is the same as the
* queryEngine. In scale-out, this is an RMI proxy for the
* query controller whenever the query is instantiated on a node
* other than the query controller itself.
* @param query
* The query.
* @param realSource
* The original message which kicked off this query on the query
* controller. This is NOT required when the query is
* materialized on another node and MAY be null
, but
* the original message used to kick off the query on the query
* controller MUST be provided so we can ensure that the source
* iteration is always closed when the query is cancelled.
*
* @throws IllegalArgumentException
* if any argument is null
.
* @throws IllegalArgumentException
* if the readTimestamp is {@link ITx#UNISOLATED}
* (queries may not read on the unisolated indices).
* @throws IllegalArgumentException
* if the writeTimestamp is neither
* {@link ITx#UNISOLATED} nor a read-write transaction
* identifier.
*/
public AbstractRunningQuery(final QueryEngine queryEngine,
final UUID queryId, final boolean controller,
final IQueryClient clientProxy, final PipelineOp query,
final IChunkMessage realSource) {
if (queryEngine == null)
throw new IllegalArgumentException();
if (queryId == null)
throw new IllegalArgumentException();
if (clientProxy == null)
throw new IllegalArgumentException();
if (query == null)
throw new IllegalArgumentException();
this.queryEngine = queryEngine;
this.queryId = queryId;
this.controller = controller;
this.clientProxy = clientProxy;
this.query = query;
this.realSource = realSource;
this.bopIndex = BOpUtility.getIndex(query);
/*
* Setup the BOpStats object for each pipeline operator in the query.
*/
if (controller) {
runState = new RunState(this);
statsMap = new ConcurrentHashMap();
populateStatsMap(query);
/*
* FIXME Review the concept of mutation queries. It used to be that
* queries could only either read or write. Now we have access paths
* which either read or write and each query could use zero or more
* such access paths.
*/
if (true/* !query.isMutation() */) {
// read-only query.
final BOpStats queryStats = statsMap.get(query.getId());
queryBuffer = newQueryBuffer(query, queryStats);
queryIterator = new QueryResultIterator(this,
queryBuffer.iterator());
// } else {
//
// // Note: Not used for mutation queries.
// queryBuffer = null;
// queryIterator = null;
}
} else {
runState = null; // Note: only on the query controller.
statsMap = null; // Note: only on the query controller.
queryBuffer = null; // Note: only on the query controller.
queryIterator = null; // Note: only when queryBuffer is defined.
}
}
/**
* Return the buffer that will be used to absorb solutions. The solutions
* will be drained from the buffer using its iterator.
*
* @param query
* The root of the query plan.
* @param queryStats
* Used to track statistics on the solutions to the query (#of
* chunks, #of units).
*
* @return The buffer.
*/
final protected IBlockingBuffer newQueryBuffer(
final PipelineOp query, final BOpStats queryStats) {
return new BlockingBufferWithStats(query, queryStats);
}
/**
* Pre-populate a map with {@link BOpStats} objects for the query. Only the
* child operands are visited. Operators in subqueries are not visited since
* they will be assigned {@link BOpStats} objects when they are run as a
* subquery.
*
* @see BOp.Annotations#CONTROLLER
*/
private void populateStatsMap(final BOp op) {
if (!(op instanceof PipelineOp))
return;
final PipelineOp bop = (PipelineOp) op;
final int bopId = bop.getId();
final BOpStats stats = bop.newStats();
statsMap.put(bopId, stats);
// log.warn("bopId=" + bopId + ", stats=" + stats);
/*
* Visit children.
*
* Note: The CONTROLLER concept has its subquery expressed through an
* annotation, not through its arguments. We always want to visit the
* child arguments of a pipeline operator. We just do not want to visit
* the operators in its sub-query plan.
*/
final Iterator itr = op.argIterator();
while (itr.hasNext()) {
final BOp t = itr.next();
// visit children (recursion)
populateStatsMap(t);
}
}
/**
* Message provides notice that the query has started execution and will
* consume some specific number of binding set chunks.
*
* @param msg
* The initial message presented to the query. The message is
* used to update the query {@link RunState}. However, the
* message will not be consumed until it is presented to
* {@link #acceptChunk(IChunkMessage)} by the {@link QueryEngine}
* .
*
* @throws UnsupportedOperationException
* If this node is not the query coordinator.
*/
final protected void startQuery(final IChunkMessage msg) {
if (!controller)
throw new UnsupportedOperationException(ERR_NOT_CONTROLLER);
if (msg == null)
throw new IllegalArgumentException();
if (!queryId.equals(msg.getQueryId()))
throw new IllegalArgumentException();
lock.lock();
try {
runState.startQuery(msg);
// lifeCycleSetUpQuery();
} catch (TimeoutException ex) {
halt(ex);
} finally {
lock.unlock();
}
}
/**
* Message provides notice that the operator has started execution and will
* consume some specific number of binding set chunks.
*
* @param msg
* The {@link IStartOpMessage}.
*
* @throws UnsupportedOperationException
* If this node is not the query coordinator.
*/
final protected void startOp(final IStartOpMessage msg) {
if (!controller)
throw new UnsupportedOperationException(ERR_NOT_CONTROLLER);
if (msg == null)
throw new IllegalArgumentException();
if (!queryId.equals(msg.getQueryId()))
throw new IllegalArgumentException();
lock.lock();
try {
if(log.isTraceEnabled())
log.trace(msg.toString());
if (future.isDone()) // BLZG-1418
throw new RuntimeException("Query is done");
runState.startOp(msg);
} catch (TimeoutException ex) {
halt(ex);
/*
* Note: The exception is not rethrown when the query halts for a
* deadline.
*/
} finally {
lock.unlock();
}
}
/**
* Message provides notice that the operator has ended execution. The
* termination conditions for the query are checked. (For scale-out, the
* node controlling the query needs to be involved for each operator
* start/stop in order to make the termination decision atomic).
*
* @param msg
* The {@link IHaltOpMessage}
*
* @throws UnsupportedOperationException
* If this node is not the query coordinator.
*/
protected void haltOp(final IHaltOpMessage msg) {
if (!controller)
throw new UnsupportedOperationException(ERR_NOT_CONTROLLER);
if (msg == null)
throw new IllegalArgumentException();
if (!queryId.equals(msg.getQueryId()))
throw new IllegalArgumentException();
lock.lock();
try {
if(log.isTraceEnabled())
log.trace(msg.toString());
// update per-operator statistics.
{
// Data race on insert into CHM.
BOpStats tmp = statsMap.putIfAbsent(msg.getBOpId(),
msg.getStats());
/**
* Combine stats, but do not combine a stats object with itself.
*
* @see
* Query Statistics do not update correctly on cluster
*/
if (tmp == null) {
// won the data race.
tmp = msg.getStats();
} else {
// lost the data race.
if (tmp != msg.getStats()) {
tmp.add(msg.getStats());
}
}
/**
* Post-increment now that we know who one the data race.
*
* @see
* Explain reports incorrect value for opCount
*/
tmp.opCount.increment();
// log.warn("bop=" + getBOp(msg.getBOpId()).toShortString()
// + " : stats=" + tmp);
}
switch (runState.haltOp(msg)) {
case Running:
case RunningLastPass:
return;
case StartLastPass: {
@SuppressWarnings("rawtypes")
final Set doneOn = runState.getDoneOn(msg.getBOpId());
doLastPass(msg.getBOpId(), doneOn);
return;
}
case AllDone:
/*
* Operator is all done.
*/
triggerOperatorsAwaitingLastPass();
// Release any native buffers.
releaseNativeMemoryForOperator(msg.getBOpId());
// Check to see if the query is also all done.
if (runState.isAllDone()) {
if (log.isInfoEnabled())
log.info("Query reports all done: bopId=" + msg.getBOpId()
+ ", msg=" + msg + ", runState=" + runState);
// Normal termination.
halt((Void) null);
}
return;
default:
throw new AssertionError();
}
} catch (Throwable t) {
halt(t);
/*
* Note: The exception is not rethrown when the query halts for a
* deadline.
*/
} finally {
lock.unlock();
}
}
/**
* Method handles the case where there are downstream operators awaiting
* last pass evaluation or at-once evaluation is not re-triggered by the last
* {@link IChunkMessage} output from an upstream operator. If this situation
* arises the query will just sit there waiting for a trigger to kick of
* last pass evaluation. This method works around that by sending an empty
* {@link IChunkMessage} if the operator would not otherwise have been
* triggered.
*
* @param msg
*
* @see COUNT(DISTINCT) returns no rows rather than ZERO.
*/
private void triggerOperatorsAwaitingLastPass() {
/*
* Examine all downstream operators. Find any at-once operators that
* can no longer be triggered and which have not yet executed. Then
* trigger them with an empty chunk message so they will run once
* and only once.
*/
// Consider the operators which require at-once evaluation.
for (Integer bopId : runState.getAtOnceRequired()) {
if (runState.getOperatorRunState(bopId) == RunStateEnum.StartLastPass) {
if (log.isInfoEnabled())
log.info("Triggering at-once (no solutions in): " + bopId);
/*
* Since evaluation is purely local, we specify -1 as the shardId.
*/
final IChunkMessage emptyMessage = new EmptyChunkMessage(
getQueryController(), queryId, bopId, -1/* shardId */, true/* lastInvocation */);
acceptChunk(emptyMessage);
}
}
if (runState.getTotalLastPassRemainingCount() == 0) {
return;
}
// Consider the operators which require last pass evaluation.
for (Integer bopId : runState.getLastPassRequested()) {
if (runState.getOperatorRunState(bopId) == RunStateEnum.StartLastPass) {
@SuppressWarnings("rawtypes")
final Set doneOn = runState.getDoneOn(bopId);
if (log.isInfoEnabled())
log.info("Triggering last pass: " + bopId);
doLastPass(bopId, doneOn);
}
}
}
/**
* Queue empty {@link IChunkMessage}s to trigger the last evaluation pass
* for an operator which can not be re-triggered by any upstream operator or
* by {@link IChunkMessage}s which have already been buffered.
*
* Note: If the queue for accepting new chunks could block then this could
* deadlock. We work around that by using the same lock for the
* AbstractRunningQuery and the queue of accepted messages. If the queue
* blocks, this thread will be yield the lock and another thread may make
* progress.
*
* @param msg
* @param doneOn
* The collection of shards or services on which the operator
* need to receive a last evaluation pass message.
*/
@SuppressWarnings("rawtypes")
protected void doLastPass(final int bopId, final Set doneOn) {
if (!lock.isHeldByCurrentThread())
throw new IllegalMonitorStateException();
if (doneOn == null) {
/*
* This operator was never started on anything and we do not need to
* generate any last pass messages.
*/
throw new AssertionError("doneOn is null? : bopId=" + bopId
+ ", runState=" + runState);
}
if (doneOn.isEmpty()) {
/*
* The operator has received all last evaluation pass notices so
* this method should not have been called (RunStateEnum should be
* AllDone).
*/
throw new AssertionError("doneOn is empty? : bopId=" + bopId
+ ", runState=" + runState);
}
if (doneOn.size() != 1) {
/*
* This base class can only handle purely local queries for which
* there will only be a single element in the doneOn set (either the
* shardId -1 or the serviceId for the query controller). This
* method needs to be overridden to handle doneOn in a cluster.
*/
throw new AssertionError("doneOn set not single element? : bopId="
+ bopId + ", runState=" + runState + ", doneOn=" + doneOn);
}
if (log.isInfoEnabled())
log.info("Triggering last pass: " + bopId);
/*
* Since evaluation is purely local, we specify -1 as the shardId.
*/
final IChunkMessage emptyMessage = new EmptyChunkMessage(
getQueryController(), queryId, bopId, -1/* shardId */, true/* lastInvocation */);
acceptChunk(emptyMessage);
}
/**
* Return true
iff the preconditions have been satisfied for
* the "at-once" invocation of the specified operator (no predecessors are
* running or could be triggered and the operator has not been evaluated).
*
* @param bopId
* Some operator identifier.
*
* @return true
iff the "at-once" evaluation of the operator
* may proceed.
*/
protected boolean isAtOnceReady(final int bopId) {
lock.lock();
try {
// if (isDone()) {
// // The query has already halted.
// throw new InterruptedException();
// }
return runState.isAtOnceReady(bopId);
} finally {
lock.unlock();
}
}
/**
* Return the {@link RunStateEnum} for an operator.
*
* @param bopId
* The operator.
*
* @return It's {@link RunStateEnum}.
*/
protected RunStateEnum getRunState(final int bopId) {
lock.lock();
try {
// if (isDone()) {
// // The query has already halted.
// throw new InterruptedException();
// }
return runState.getOperatorRunState(bopId);
} finally {
lock.unlock();
}
}
/**
* Attempt to return the {@link RunStateEnum} for an operator
* (non-blocking).
*
* Note: This method is intended for use in contexts where it is desirable,
* but not critical, to have the {@link RunStateEnum} for the operator. For
* example, in log messages. The implementation is non-blocking and will
* barge in if the lock is available and return the {@link RunStateEnum} of
* the operator. If the lock is not available, it will return
* null
.
*
* @param bopId
* The operator.
*
* @return It's {@link RunStateEnum} and null
if the lock could
* not be acquired.
*/
protected RunStateEnum tryGetRunState(final int bopId) {
if (lock.tryLock()) {
try {
// if (isDone()) {
// // The query has already halted.
// throw new InterruptedException();
// }
return runState.getOperatorRunState(bopId);
} finally {
lock.unlock();
}
} else {
return null;
}
}
/**
* Release native memory associated with this operator, if any (NOP, but
* overridden in scale-out to release NIO buffers used to move solutions
* around in the cluster).
*
* Note: Operators are responsible for releasing their child
* {@link IMemoryManager} context, if any, when they terminate and should
* specify the {@link PipelineOp.Annotations#LAST_PASS} annotation to
* receive notice in the form of a final evaluation pass over an empty
* {@link IChunkMessage}. If they do NOT release an {@link IMemoryManager}
* context which is a child of the {{@link #getMemoryManager() query's
* context}, then their child {@link IMemoryManager} context will be
* retained until the termination of the query, at which point the query's
* {@link IMemoryManager} context will be release, and all child contexts
* will be released automatically along with it.
*
* @param bopId
*
* @see #releaseNativeMemoryForQuery()
*/
protected void releaseNativeMemoryForOperator(final int bopId) {
// NOP
}
/**
* Release native memory associated with this query, if any.
*
* FIXME This could cause direct buffers to be released back to the pool
* before the operator tasks have terminated. That is NOT safe as the
* buffers could then be reissued to other threads while existing threads
* still have references to the buffers. Really, the same problem exists
* with the allocation contexts used for NIO transfers of IBindingSet[]s.
*
* We will have to be very careful to wait until each operator's Future
* isDone() before calling clear() on the IMemoryManager to release the
* native buffers back to the pool. If we release a buffer while an operator
* is still running, then we will get data corruption arising from the
* recycling of the buffer to another native buffer user.
*
* AbstractRunningQuery.cancel(...) is where we need to handle this, more
* specifically cancelRunningOperators(). Right now it is not waiting for
* those operators to terminate.
*
* Making this work is tricky. AbstractRunningQuery is holding a lock. The
* operator tasks do not actually require that lock to terminate, but they
* are wrapped by a ChunkWrapperTask, which handles reporting back to the
* AbstractRunningQuery and *does* need the lock, and also by a
* ChunkFutureTask. Since we actually do do ChunkFutureTask.get(), we are
* going to deadlock if we invoke that while holding the
* AbstractRunningQuery's lock.
*
* The alternative is to handle the tear down of the native buffers for a
* query asynchronously after the query has been cancelled, deferring the
* release of the native buffers back to the direct buffer pool until all
* tasks for the query are known to be done.
*
* @see BLZG-1658 MemoryManager should know when it has been closed
*
* FIXME We need to have distinct events for the query evaluation life cycle
* and the query results life cycle. Really, this means that temporary
* solution sets are scoped to the parent query. This is a matter of the
* scope of the allocation context for the {@link DirectBufferPoolAllocator}
* and releasing that scope when the parent query is done (in cancel()).
* [Also consider scoping the temporary solution sets to a transaction or an
* HTTP session, e.g., by an integration with the NSS using traditional
* session concepts.]
*/
protected void releaseNativeMemoryForQuery() {
assert lock.isHeldByCurrentThread();
// clear reference, returning old value.
final MemoryManager memoryManager = this.memoryManager.getAndSet(null);
if (memoryManager != null) {
// release resources. See BLZG-1658
memoryManager.close();
}
}
/**
* Make a chunk of binding sets available for consumption by the query.
*
* Note: this is invoked by {@link QueryEngine#acceptChunk(IChunkMessage)}
*
* @param msg
* The chunk.
*
* @return true
if the message was accepted.
*
* @todo Reconcile {@link #acceptChunk(IChunkMessage)} and
* {@link #consumeChunk()}. Why {@link #consumeChunk()} is also used
* by the {@link QueryEngine}.
*/
abstract protected boolean acceptChunk(final IChunkMessage msg);
/**
* Instruct the {@link IRunningQuery} to consume an {@link IChunkMessage}
* already on its input queue.
*/
abstract protected void consumeChunk();
@Override
final public ICloseableIterator iterator() {
if (!controller)
throw new UnsupportedOperationException(ERR_NOT_CONTROLLER);
if (queryIterator == null)
throw new UnsupportedOperationException();
return queryIterator;
}
@Override
final public void halt(final Void v) {
lock.lock();
try {
// signal normal completion.
future.halt((Void) v);
// interrupt anything which is running.
cancel(true/* mayInterruptIfRunning */);
} finally {
lock.unlock();
}
}
@Override
final public T halt(final T t) {
if (t == null)
throw new IllegalArgumentException();
lock.lock();
try {
try {
// halt the query, return [t].
return future.halt(t);
} finally {
// interrupt anything which is running.
cancel(true/* mayInterruptIfRunning */);
}
} finally {
lock.unlock();
}
}
/**
* {@inheritDoc}
*
* Cancelled queries :
*
* - must reject new chunks
* - must cancel any running operators
* - must not begin to evaluate operators
* - must release all of their resources
* - must not cause the solutions to be discarded before the client can
* consume them.
*
*/
@Override
final public boolean cancel(final boolean mayInterruptIfRunning) {
/*
* Set if we notice an interrupt during clean up of the query and then
* propagated to the caller in the finally {} clause.
*/
boolean interrupted = false;
lock.lock();
try {
// halt the query.
boolean cancelled = future.cancel(mayInterruptIfRunning);
if (didQueryTearDown
.compareAndSet(false/* expect */, true/* update */)) {
/*
* Do additional cleanup exactly once.
*/
if (realSource != null)
realSource.release();
// close() IAsynchronousIterators for accepted messages.
releaseAcceptedMessages();
/*
* Cancel any running operators for this query on this node.
*
* Note: This can interrupt *this* thread. E.g., when SLICE
* calls halt().
*/
cancelled |= cancelRunningOperators(mayInterruptIfRunning);
/*
* Test and clear the interrupt status.
*
* Note: This prevents a thread from interrupting itself during
* the query tear down. If we do not do this then the interrupt
* tends to get "noticed" by the next lock acquisition, which
* happens to be the one where we release the native memory
* buffers.
*
* TODO It may be possible for interrupts to be thrown inside of
* these methods after we have tested and cleared the interrupt
* status of the Thread. That would result in a wrapped
* exception and the cancelQueryOnPeers() or queryBuffer.close()
* might not be processed properly.
*/
interrupted |= Thread.interrupted();
if (controller) {
// cancel query on other peers.
cancelled |= cancelQueryOnPeers(future.getCause(),
runState.getServiceIds());
}
if (queryBuffer != null) {
/*
* Close the query buffer so the iterator draining the query
* results will recognize that no new results will become
* available. Failure to do this will cause the iterator to
* hang waiting for more results.
*/
queryBuffer.close();
}
// release native buffers.
releaseNativeMemoryForQuery();
// mark done time.
doneTime.set(System.currentTimeMillis());
// log summary statistics for the query.
if (isController())
QueryLog.log(this);
// final String tag = getQuery().getProperty(QueryHints.TAG,
// QueryHints.DEFAULT_TAG);
// final Counters c = tag == null ? null : queryEngine
// .getCounters(tag);
// track #of done queries.
queryEngine.counters.queryDoneCount.increment();
// if (c != null)
// c.doneCount.increment();
// track elapsed run time of done queries.
final long elapsed = getElapsed();
queryEngine.counters.elapsedMillis.add(elapsed);
// if (c != null)
// c.elapsedMillis.add(elapsed);
if (future.getCause() != null) {
// track #of queries with abnormal termination.
queryEngine.counters.queryErrorCount.increment();
// if (c != null)
// c.errorCount.increment();
}
// remove from the collection of running queries.
queryEngine.halt(this);
}
// true iff we cancelled something.
return cancelled;
} finally {
lock.unlock();
if(interrupted) {
// Propagate the interrupt.
Thread.currentThread().interrupt();
}
}
}
/**
* Cancel any running operators for this query on this node (internal API).
*
* @return true
if any operators were cancelled.
*/
abstract protected boolean cancelRunningOperators(
final boolean mayInterruptIfRunning);
/**
* Close the {@link IAsynchronousIterator} for any {@link IChunkMessage}s
* which have been accepted for this queue on this node (internal
* API).
*
* Note: This must be invoked while holding a lock which is exclusive with
* the lock used to hand off {@link IChunkMessage}s to operator tasks
* otherwise we could wind up invoking {@link IAsynchronousIterator#close()}
* from on an {@link IAsynchronousIterator} running in a different thread.
* That would cause visibility problems in the close() semantics unless the
* {@link IAsynchronousIterator} is thread-safe for close (e.g., volatile
* write, synchronized, etc.). The appropriate lock for this is
* {@link AbstractRunningQuery#lock}. This method is only invoked out of
* {@link AbstractRunningQuery#cancel(boolean)} which owns that lock.
*/
abstract protected void releaseAcceptedMessages();
// {
// boolean cancelled = false;
//
// final Iterator> fitr =
// operatorFutures.values().iterator();
//
// while (fitr.hasNext()) {
//
// final ConcurrentHashMap set =
// fitr.next();
//
// for(ChunkFutureTask f : set.keySet()) {
//
// if (f.cancel(mayInterruptIfRunning))
// cancelled = true;
//
// }
//
// }
//
// return cancelled;
//
// }
/**
* Cancel the query on each node where it is known to be running.
*
* Note: The default implementation verifies that the caller is holding the
* {@link #lock} but is otherwise a NOP. This is overridden for scale-out.
*
* @param cause
* When non-null
, the cause.
*
* @return true
iff something was cancelled.
*
* @throws IllegalMonitorStateException
* unless the {@link #lock} is held by the current thread.
* @throws UnsupportedOperationException
* unless this is the query controller.
*/
protected boolean cancelQueryOnPeers(final Throwable cause,
final Set startedOn) {
if (!controller)
throw new UnsupportedOperationException(ERR_NOT_CONTROLLER);
if (!lock.isHeldByCurrentThread())
throw new IllegalMonitorStateException();
return false;
}
@Override
final public Void get() throws InterruptedException, ExecutionException {
return future.get();
}
@Override
final public Void get(final long arg0, final TimeUnit arg1)
throws InterruptedException, ExecutionException, TimeoutException {
return future.get(arg0, arg1);
}
@Override
final public boolean isCancelled() {
return future.isCancelled();
}
@Override
final public boolean isDone() {
return future.isDone();
}
@Override
final public Throwable getCause() {
return future.getCause();
}
@Override
final public Throwable getAsThrownCause() {
return future.getAsThrownCause();
}
@Override
public IBigdataFederation> getFederation() {
return queryEngine.getFederation();
}
@Override
public IIndexManager getLocalIndexManager() {
return queryEngine.getIndexManager();
}
/**
* Return the #of instances of the operator which are concurrently
* executing.
*/
protected long getRunningCount(final int bopId) {
// Note: lock is NOT required.
return runState.getRunningCount(bopId);
}
/**
* Return the #of shards or nodes on which the operator has started
* evaluation. This is basically a measure of the fan out of the operator
* across the cluster. For example, it will report the #of shards on which a
* sharded join has read based on the solutions being mapped across that
* join. The units are shards if the operator is sharded and nodes if the
* operator is hash partitioned.
*
* @param bopId
* The operator identifier.
*
* @return The #of shards or nodes on which the operator has started.
*/
protected int getStartedOnCount(final int bopId) {
// Note: lock is NOT required.
return runState.getStartedOnCount(bopId);
}
@Override
public IMemoryManager getMemoryManager() {
MemoryManager memoryManager = this.memoryManager.get();
if (memoryManager == null) {
lock.lock();
try {
memoryManager = this.memoryManager.get();
if (memoryManager == null) {
this.memoryManager.set(memoryManager = newMemoryManager());
}
} finally {
lock.unlock();
}
}
return memoryManager;
}
private final AtomicReference memoryManager = new AtomicReference();
/**
* Allocate a memory manager for the query.
*
* @see QueryHints#ANALYTIC_MAX_MEMORY_PER_QUERY
*
* @see QueryHints#DEFAULT_ANALYTIC_MAX_MEMORY_PER_QUERY
*
* @see Per query
* memory limit for analytic query mode.
*/
private MemoryManager newMemoryManager() {
// The native memory pool that will be used by this query.
final DirectBufferPool pool = DirectBufferPool.INSTANCE;
// Figure out how much memory may be allocated by this query.
long maxMemoryBytesPerQuery = QueryHints.DEFAULT_ANALYTIC_MAX_MEMORY_PER_QUERY;
if (maxMemoryBytesPerQuery < 0) {
// Ignore illegal values.
maxMemoryBytesPerQuery = 0L;
}
final boolean blocking;
final int nsectors;
if (maxMemoryBytesPerQuery == 0) {
/*
* Allocation are blocking IFF there is no bound on the memory for
* the query.
*/
blocking = true; // block until allocation is satisfied.
nsectors = Integer.MAX_VALUE; // no limit
} else {
/*
* Allocations do not block if we run out of native memory for this
* query. Instead a memory allocation exception will be thrown and
* the query will break.
*
* The #of sectors is computed by dividing through by the size of
* the backing native ByteBuffers and then rounding up.
*/
blocking = false; // throw exception if query uses too much RAM.
// The capacity of the buffers in this pool.
final int bufferCapacity = pool.getBufferCapacity();
// Figure out the maximum #of buffers (rounding up).
nsectors = (int) Math.ceil(maxMemoryBytesPerQuery
/ (double) bufferCapacity);
}
return new MemoryManager(pool, nsectors, blocking, null/* properties */);
}
@Override
final public IQueryAttributes getAttributes() {
return queryAttributes;
}
private final IQueryAttributes queryAttributes = new DefaultQueryAttributes();
/**
* Report a snapshot of the known (declared) child {@link IRunningQuery}s
* for this {@link IRunningQuery} and (recursively) for any children of this
* {@link IRunningQuery}.
*
* @return An array providing a snapshot of the known child
* {@link IRunningQuery}s and never null
.
*/
final public IRunningQuery[] getChildren() {
synchronized (children) {
if (children.isEmpty()) {
// Fast path if no children.
return EMPTY_ARRAY;
}
// Add in all direct child queries.
final List tmp = new LinkedList(
children.values());
// Note: Do not iterator over [tmp] to avoid concurrent modification.
for (IRunningQuery c : children.values()) {
// Recursive for each child.
tmp.addAll(Arrays.asList(((AbstractRunningQuery) c)
.getChildren()));
}
// Convert to array.
return tmp.toArray(new IRunningQuery[tmp.size()]);
}
}
private static final IRunningQuery[] EMPTY_ARRAY = new IRunningQuery[0];
/**
* Attach a child query.
*
* Queries as submitted do not know about parent/child relationships
*
* @param childQuery
* The child query.
*
* @return true
if the child query was not already declared.
*/
final public boolean addChild(final IRunningQuery childQuery) {
synchronized(children) {
final UUID childId = childQuery.getQueryId();
if (children.containsKey(childId)) {
return false;
}
if (future.isDone()) { // BLZG-1418
childQuery.cancel(true/* mayInterruptIfRunning */);
throw new RuntimeException("Query is done");
}
children.put(childId, childQuery);
return true;
}
}
final private LinkedHashMap children = new LinkedHashMap();
/**
* Return the textual representation of the {@link RunState} of this query.
*
* Note: Exposed for log messages in derived classes since {@link #runState}
* is private.
*/
protected String runStateString() {
lock.lock();
try {
return runState.toString();
} finally {
lock.unlock();
}
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder(getClass().getName());
sb.append("{queryId=" + queryId);
/*
* Note: Obtaining the lock here is required to avoid concurrent
* modification exception in RunState's toString() when there is a
* concurrent change in the RunState. It also makes the isDone() and
* isCancelled() reporting atomic.
*/
lock.lock();
try {
sb.append(",elapsed=" + getElapsed());
sb.append(",deadline=" + runState.getDeadline());
sb.append(",isDone=" + isDone());
sb.append(",isCancelled=" + isCancelled());
sb.append(",runState=" + runState);
} finally {
lock.unlock();
}
sb.append(",controller=" + controller);
sb.append(",clientProxy=" + clientProxy);
sb.append(",query=" + query);
sb.append("}");
return sb.toString();
}
// abstract protected IChunkHandler getChunkHandler();
/**
* Return true
iff the root cause of the {@link Throwable} was
* an interrupt. This checks for any of the different kinds of exceptions
* which can be thrown when an interrupt is encountered.
*
* @param t
* The throwable.
* @return true
iff the root cause was an interrupt.
*
* TODO This could be optimized by checking once at each level for any of
* the indicated exceptions.
*/
static public boolean isRootCauseInterrupt(final Throwable t) {
if (InnerCause.isInnerCause(t, InterruptedException.class)) {
return true;
} else if (InnerCause.isInnerCause(t, ClosedByInterruptException.class)) {
return true;
} else if (InnerCause.isInnerCause(t, InterruptedException.class)) {
return true;
}
return false;
}
@Override
public void setStaticAnalysisStats(StaticAnalysisStats saStats) {
this.saStats = saStats;
}
@Override
public StaticAnalysisStats getStaticAnalysisStats() {
return saStats;
}
}