com.bigdata.bop.engine.RunState Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Sep 15, 2010
*/
package com.bigdata.bop.engine;
import java.text.DateFormat;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.log4j.Logger;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpEvaluationContext;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.NoBOpIdException;
import com.bigdata.bop.NoSuchBOpException;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.join.PipelineJoinStats;
/**
* The run state for an {@link IRunningQuery}. This class is thread-safe.
*
* @author Bryan Thompson
* @version $Id$
*/
class RunState {
static private final Logger log = Logger.getLogger(RunState.class);
/**
* True iff the {@link #log} level is INFO or less.
*/
final static private boolean INFO = log.isInfoEnabled();
/**
* True iff the {@link #log} level is DEBUG or less.
*/
final static private boolean DEBUG = log.isDebugEnabled();
/**
* True iff the {@link #log} level is TRACE or less.
*/
final static private boolean TRACE = log.isTraceEnabled();
/**
* Inner class provides a 2nd logger used for tabular representations.
*/
static private class TableLog {
static private final Logger tableLog = Logger.getLogger(TableLog.class);
final static private boolean INFO = tableLog.isInfoEnabled();
}
/**
* Message if the query has already started evaluation.
*/
static private final transient String ERR_QUERY_STARTED = "Query already running.";
/**
* Message if query evaluation has already halted.
*/
static private final transient String ERR_QUERY_HALTED = "Query already halted.";
/**
* Message if an operator addressed by a {@link IHaltOpMessage} was never started.
*/
static private final transient String ERR_OP_NOT_STARTED = "Operator never ran.";
/**
* Message if an operator addressed by a message has been halted.
*/
static private final transient String ERR_OP_HALTED = "Operator is not running.";
/**
* Message if a query deadline has been exceeded.
*/
static private final transient String ERR_DEADLINE = "Query deadline is expired.";
/**
* The state of the {@link RunState} object. By factoring this into an inner
* class referenced by a private field we are able to write unit tests which
* directly manipulate the internal state of the {@link RunState} object
* while protecting its state from modification.
*/
static class InnerState {
/**
* Constructor.
*/
InnerState(final BOp query, final UUID queryId, final long deadline,
final long begin, final Map bopIndex) {
if (query == null)
throw new IllegalArgumentException();
if (queryId == null)
throw new IllegalArgumentException();
if (deadline <= 0L)
throw new IllegalArgumentException();
if (begin <= 0L)
throw new IllegalArgumentException();
if (bopIndex == null)
throw new IllegalArgumentException();
this.query = query;
this.queryId = queryId;
this.deadline = new AtomicLong(deadline);
this.bopIndex = bopIndex;
this.begin = begin;
}
/**
* The query.
*/
private final BOp query;
/**
* An index from {@link BOp.Annotations#BOP_ID} to {@link BOp} for the
* {@link #query}.
*/
final Map bopIndex;
/**
* The query identifier.
*/
final UUID queryId;
/**
* The timestamp when the {@link RunState} was created (milliseconds
* since the epoch).
*/
final long begin;
/**
* The query deadline (milliseconds since the epoch).
*
* @see BOp.Annotations#TIMEOUT
* @see IRunningQuery#getDeadline()
*/
final AtomicLong deadline;
/**
* Set to true
iff the query evaluation has begun.
*
* @see #startQuery(IChunkMessage)
*/
final AtomicBoolean started = new AtomicBoolean(false);
/**
* Set to true
iff the query evaluation is complete due to
* normal termination.
*
* @see #haltOp(IHaltOpMessage)
*/
final AtomicBoolean allDone = new AtomicBoolean(false);
/**
* The #of run state transitions which have occurred for this query.
*/
final AtomicLong stepCount = new AtomicLong();
/**
* The #of tasks for this query which have started but not yet halted.
*/
final AtomicLong totalRunningCount = new AtomicLong();
/**
* The #of {@link IChunkMessage} for the query which a running task has made
* available but which have not yet been accepted for processing by another
* task.
*
* Note: {@link ChunkedRunningQuery} drops {@link IChunkMessage}s on the
* {@link QueryEngine} as soon as they are generated. A
* {@link IChunkMessage} MAY be taken for evaluation as soon as it is
* published to the {@link QueryEngine}. This means that the operator task
* which will consume that {@link IChunkMessage} can begin to execute
* before {@link ChunkedRunningQuery#haltOp(IHaltOpMessage)} is invoked to
* indicate the end of the operator task which produced that
* {@link IChunkMessage}. Due to the potential overlap in these schedules
* {@link RunState#totalAvailableCount} may be transiently negative
* . This is the expected behavior.
*/
final AtomicLong totalAvailableCount = new AtomicLong();
/**
* The #of operators which have requested last pass evaluation and whose
* last pass evaluation phase is not yet complete. This is incremented
* each time we observe a {@link IStartOpMessage} for an operator not yet
* found in {@link #lastPassRequested}. When last pass evaluation begins
* for an operator, its {@link #startedOn} set is copied and set as its
* {@link #doneOn} set. The entries in the {@link #doneOn} set are
* removed as we observe {@link IHaltOpMessage}s for that operator during
* last pass evaluation. The {@link #totalLastPassRemainingCount} is
* decremented each time we count the {@link #doneOn} set for an
* operator becomes empty.
*/
final AtomicLong totalLastPassRemainingCount = new AtomicLong();
/**
* A map reporting the #of {@link IChunkMessage} available for each operator
* in the pipeline. The total #of {@link IChunkMessage}s available across
* all operators in the pipeline is reported by {@link #totalAvailableCount}
* .
*
* The movement of the intermediate binding set chunks forms an acyclic
* directed graph. This map is used to track the #of chunks available for
* each {@link BOp} in the pipeline. When a {@link BOp} has no more incoming
* chunks, we send an asynchronous message to all nodes on which that
* {@link BOp} had executed informing the {@link QueryEngine} on that node
* that it should immediately release all resources associated with that
* {@link BOp}.
*
* Note: Since the map contains {@link AtomicLong}s it can not be readily
* exposed as {@link Map} object. If we were to expose the map, it would
* have to be via a get(key) style interface. Even an immutable map does
* not help because of the mutable {@link AtomicLong}s.
*/
final Map availableMap = new LinkedHashMap();
/**
* A collection reporting on the #of instances of a given {@link BOp} which
* are concurrently executing. An entry in this map indicates that the
* operator has been evaluated at least once.
*
* Note: Since the map contains {@link AtomicLong}s it can not be readily
* exposed as {@link Map} object. If we were to expose the map, it would
* have to be via a get(key) style interface. Even an immutable map does
* not help because of the mutable {@link AtomicLong}s.
*/
final Map runningMap = new LinkedHashMap();
/**
* The set of services on which this query has started an operator. This
* will be just the query controller for purely local evaluation. In
* scale-out, this will include each service on which the query has been
* evaluated. This collection is maintained based on the
* {@link #startQuery(IChunkMessage)} and {@link #startOp(IStartOpMessage)}
* messages.
*/
final Set serviceIds = new LinkedHashSet();
/**
* A map associating each operator for which evaluation has begun with the
* the set of distinct nodes or shards where the operator has been submitted
* for evaluation. The {@link Set} associated with each operator will be a
* Set<serviceId>
or Set<shardId>
(iff
* sharded evaluation). Whether the set stores serviceIds or shardIds
* depends on the {@link BOpEvaluationContext} of the operator. (For purely
* local invocations we will either store the {@link UUID} of the query
* controller service or the special shardId -1
.)
*
* If {@link #getOperatorRunState(int, boolean)} reports
* {@link RunStateEnum#StartLastPass}, then that a copy of that
* Set<serviceId>
or Set<shardId>
is
* put on {@link #doneOn}. The {@link #doneOn} set for each operator is made
* available to the {@link IRunningQuery} so it can send out the appropriate
* messages for the last evaluation pass.
*
* {@link #doneOn} is used to count off lastPass() invocations for an
* operator which requires them. The operator is not really and truly done
* until doneOn.get(bopId) either returns null
(the operator
* did not request lastPass() invocation) or it returns an empty set (the
* operator requested lastPass() invocation and we have seen the
* {@link IHaltOpMessage} for each node or shard on which the operator ran).
*
* {@link #startQuery(IChunkMessage)} and {@link #startOp(IStartOpMessage)}
* are responsible for maintaining the {@link #startedOn} state while
* {@link #_haltOp(int)} is responsible for maintaining the {@link #doneOn}
* state.
*
* {@link #getOperatorRunState(int, boolean)} considers {@link #doneSet} in
* addition to the other run state metadata to decide whether an upstream
* operator is still running. {@link RunStateEnum#StartLastPass} is
* recognized when {@link #doneOn} is null
for an operator
* which can not be otherwise triggered and whose last evaluation pass has
* been requested. Once the {@link #startedOn} set has been propagated to
* the {@link #doneOn} set for an operator, it is recognized as being in the
* {@link RunStateEnum#RunningLastPass} state until its {@link #doneOn} set
* becomes empty.
*/
final Map> startedOn = new LinkedHashMap>();
/**
* A collection for each operator which has executed at least once and for
* which the optional final evaluation phase was requested. Together with
* {@link #startedOn}, {@link #doneOn} captures the state change from when
* an operator can not be triggered by external inputs to when it can no
* longer be triggered by the final evaluation pass. The criteria for an
* operator being "done" therefore includes (a) it has run at least once;
* (b) it can not be retriggered by input from other operators; and (c) its
* optional last evaluation pass either has been evaluated or will not be
* evaluated.
*
* @see #startedOn
*/
@SuppressWarnings("rawtypes")
final Map doneOn = new LinkedHashMap();
/**
* The set of operators for which a last evaluation pass was requested.
*
* @see #startOp(IStartOpMessage, boolean)
*/
final Set lastPassRequested = new LinkedHashSet();
/**
* The set of operators for which at-once evaluation is required. The operator
* is removed from this set as soon as it is started.
*
* @see #startOp(IStartOpMessage, boolean)
* @see COUNT(DISTINCT) returns no rows rather than ZERO.
*/
final Set atOnceRequired = new LinkedHashSet();
@Override
public String toString() {
return toString(new StringBuilder()).toString();
}
StringBuilder toString(final StringBuilder sb) {
sb.append("{nsteps=" + stepCount);
sb.append(",allDone=" + allDone);
sb.append(",totalRunning=" + totalRunningCount);
sb.append(",totalAvailable=" + totalAvailableCount);
sb.append(",totalLastPassRemaining=" + totalLastPassRemainingCount);
sb.append(",services=" + serviceIds);
sb.append(",startedOn=" + startedOn);
sb.append(",doneOn=" + doneOn);
sb.append(",running=" + runningMap);
sb.append(",available=" + availableMap);
sb.append(",lastPassRequested=" + lastPassRequested);
sb.append(",atOnceRequired=" + atOnceRequired);
sb.append("}");
return sb;
}
}
// Note: This MUST be private to protect the internal run state.
final private InnerState innerState;
/**
* Return the query identifier specified to the constructor.
*/
final public UUID getQueryId() {
return innerState.queryId;
}
/**
* Return the deadline specified to the constructor.
*/
final public long getDeadline() {
return innerState.deadline.get();
}
/**
* Set the deadline for the query.
*
* @param deadline
* The deadline.
*
* @throws IllegalArgumentException
* if the deadline is non-positive.
* @throws IllegalStateException
* if the deadline has already been set.
* @throws QueryTimeoutException
* if the deadline is already expired.
*/
final public void setDeadline(final long deadline) throws QueryTimeoutException {
if (deadline <= 0)
throw new IllegalArgumentException();
// set the deadline.
if (!innerState.deadline.compareAndSet(Long.MAX_VALUE/* expect */,
deadline/* update */)) {
// the deadline is already set.
throw new IllegalStateException();
}
if (deadline < System.currentTimeMillis()) {
// deadline has already expired.
throw new QueryTimeoutException();
}
}
/**
* Return true
if evaluation of the query has been initiated
* using {@link #startQuery(IChunkMessage)}.
*/
final public boolean isStarted() {
return innerState.started.get();
}
/**
* Return true
if the query is known to be completed based on
* the {@link #haltOp(IHaltOpMessage)}.
*/
final public boolean isAllDone() {
return innerState.allDone.get();
}
/**
* The #of run state transitions which have occurred for this query.
*/
final public long getStepCount() {
return innerState.stepCount.get();
}
/**
* The #of tasks for this query which have started but not yet halted.
*/
final public long getTotalRunningCount() {
return innerState.totalRunningCount.get();
}
/**
* The #of {@link IChunkMessage} for the query which a running task has made
* available but which have not yet been accepted for processing by another
* task.
*/
final public long getTotalAvailableCount() {
return innerState.totalAvailableCount.get();
}
/**
* The number of operators which have requested last pass evaluation and
* whose last pass evaluation phase is not yet complete.
*/
final public long getTotalLastPassRemainingCount() {
return innerState.totalLastPassRemainingCount.get();
}
/**
* Return an unmodifiable set containing the service UUID of each service on
* which the query has been started.
*/
final Set getServiceIds() {
return Collections.unmodifiableSet(innerState.serviceIds);
}
/**
* Return the #of instances of this operator which are concurrently
* executing.
*
* @param bopId
* The operator.
*
* @return The #of instances of the operator which are currently executing.
*/
final long getRunningCount(final int bopId) {
final AtomicLong runningCount = innerState.runningMap.get(bopId);
if(runningCount == null)
return 0L;
return runningCount.get();
}
/**
* Return the #of shards or nodes on which the operator has been started.
* This is a measure of how much the operator has "fanned out" across the
* shards or the cluster. The units will be shards if the operator is
* sharded and nodes if the operator is hash partitioned.
*
* @param bopId
* The operator.
*
* @return The #of shards or nodes on which the operator has been started.
*/
final int getStartedOnCount(final int bopId) {
final Set> startedOn = innerState.startedOn.get(bopId);
if(startedOn == null)
return 0;
return startedOn.size();
}
/**
* Return the set of either shardIds or the set of serviceIds on which the
* operator had been started and has not yet received its last pass
* invocation. The query engine is responsible for generating the last pass
* messages for the operator on each of these shards or services.
*
* @param bopId
* The operator.
*
* @return The set (either of shardIds or serviceIds, depending on the
* {@link BOpEvaluationContext} of the operator) -or-
* null
if no operator having that bopId has entered
* into its last pass evaluation stage.
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
final Set/*shardOrServiceId*/ getDoneOn(final int bopId) {
final Set set = innerState.doneOn.get(bopId);
if(set == null)
return null;
return Collections.unmodifiableSet(set);
}
/**
* Return the set of bopIds for operators which have requested last pass
* evaluation.
*/
final Set getLastPassRequested() {
return Collections.unmodifiableSet(innerState.lastPassRequested);
}
/**
* Return the set of bopIds for operators that must be evaluated once and
* only once.
*/
final Set getAtOnceRequired() {
return Collections.unmodifiableSet(innerState.atOnceRequired);
}
/**
* Constructor.
*
* @param query
* The {@link IRunningQuery}.
*/
public RunState(final IRunningQuery query) {
/*
* Note: The deadline is Long.MAX_VALUE until set. query.getDeadline()
* delegates back to RunState so we can not use it to setup our own
* state.
*/
this(new InnerState(query.getQuery(), query.getQueryId(),
Long.MAX_VALUE/* query.getDeadline() */,
System.currentTimeMillis(), query.getBOpIndex()));
}
/**
* Constructor used by unit tests.
*/
// Note: package private to expose to the unit tests.
RunState(final InnerState innerState) {
if (innerState == null)
throw new IllegalArgumentException();
this.innerState = innerState;
}
/**
* Update the {@link RunState} to indicate that the query evaluation will
* begin with one initial {@link IChunkMessage} available for consumption.
*
* @param msg
* The message.
*
* @throws IllegalArgumentException
* if the argument is null
.
* @throws IllegalStateException
* if the query is already running.
* @throws IllegalStateException
* if the query is already done.
* @throws QueryTimeoutException
* if the deadline for the query has passed.
*/
synchronized
public void startQuery(final IChunkMessage> msg) throws QueryTimeoutException {
if (msg == null)
throw new IllegalArgumentException();
if (innerState.allDone.get())
throw new IllegalStateException(ERR_QUERY_HALTED);
checkDeadline();
if (!innerState.started.compareAndSet(false/* expect */, true/* update */))
throw new IllegalStateException(ERR_QUERY_STARTED);
innerState.stepCount.incrementAndGet();
messagesProduced(msg.getBOpId(), 1/* nmessages */);
/*
* Any operator requesting last pass evaluation on the controller needs
* to be added to the last pass evaluation or at-once evaluation set now. This provides the
* guarantee that such operators will be run at least once (assuming
* that the query terminates normally).
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/377#comment:12
* @see http://trac.blazegraph.com/ticket/868
*/
final UUID controllerId = msg.getQueryControllerId();
{
final Iterator itr = BOpUtility.preOrderIterator(innerState.query);
while(itr.hasNext()) {
final BOp op = itr.next();
if (op.getEvaluationContext() != BOpEvaluationContext.CONTROLLER)
continue;
final boolean lastPassRequested = ((PipelineOp) op).isLastPassRequested();
final boolean atOnceRequired = ((PipelineOp) op).isAtOnceEvaluation();
if(atOnceRequired || lastPassRequested) {
final Integer id = (Integer) op.getProperty(BOp.Annotations.BOP_ID);
if (id == null)
throw new NoBOpIdException(op.toString());
boolean didAdd = false;
if (atOnceRequired)
didAdd = innerState.atOnceRequired.add(id);
if (lastPassRequested)
if(didAdd = innerState.lastPassRequested.add(id))
innerState.totalLastPassRemainingCount.incrementAndGet();
if (didAdd) {
/*
* Mock an evaluation pass for this operator. It will look
* as if the operator has been evaluated (various
* collections have entries for that bopId) but that it was
* evaluated ZERO (0) times (various counters are zero).
*/
innerState.runningMap.put(id.intValue(), new AtomicLong());
@SuppressWarnings("rawtypes")
final Set set = new LinkedHashSet();
set.add(controllerId);
innerState.startedOn.put(id, set);
messagesConsumed(id.intValue(), 0/*nmessages*/);
}
}
}
}
// /*
// * Note: RunState is only used by the query controller so this will not
// * do an RMI and the RemoteException will not be thrown.
// */
// final UUID serviceId;
// try {
// serviceId = msg.getQueryController().getServiceUUID();
// } catch (RemoteException ex) {
// throw new AssertionError(ex);
// }
innerState.serviceIds.add(controllerId);
if (TableLog.INFO) {
// TableLog.tableLog.info("\n\nqueryId=" + queryId + "\n");
TableLog.tableLog.info(getTableHeader());
TableLog.tableLog.info(getTableRow("startQ", controllerId, msg
.getBOpId(), -1/* shardId */, 1/* fanIn */,
null/* cause */, null/* stats */));
}
if(INFO)
log.info("startQ : " + toString());
if (TRACE)
log.trace(msg.toString());
}
/**
* Update the {@link RunState} to indicate that the operator identified in
* the {@link IStartOpMessage} will execute and will consume the one or more
* {@link IChunkMessage}s.
*
* @return true
if this is the first time we will evaluate the
* op.
*
* @throws IllegalArgumentException
* if the argument is null
.
* @throws QueryTimeoutException
* if the deadline for the query has passed.
*/
synchronized
public boolean startOp(final IStartOpMessage msg) throws QueryTimeoutException {
if (msg == null)
throw new IllegalArgumentException();
if (innerState.allDone.get())
throw new IllegalStateException(ERR_QUERY_HALTED);
checkDeadline();
innerState.stepCount.incrementAndGet();
final boolean firstTime = _startOp(msg);
messagesConsumed(msg.getBOpId(), msg.getChunkMessageCount());
final Integer bopId = msg.getBOpId();
final PipelineOp bop = (PipelineOp) innerState.bopIndex.get(bopId);
// Note: This is an effective NOP since we init the last pass data structures in startQuery().
if (bop.isLastPassRequested())
if (innerState.lastPassRequested.add(bopId))
innerState.totalLastPassRemainingCount.incrementAndGet();
innerState.serviceIds.add(msg.getServiceId());
if (TableLog.INFO) {
TableLog.tableLog.info(getTableRow("startOp", msg.getServiceId(),
msg.getBOpId(), msg.getPartitionId(), msg.getChunkMessageCount(),
null/* cause */, null/* stats */));
}
if (INFO)
log.info("startOp: " + toString() + " : bop=" + msg.getBOpId());
if (TRACE)
log.trace(msg.toString());
return firstTime;
}
/**
* Type safe enumeration of the summary run state for an operator as
* reported in response to a {@link IHaltOpMessage}. This information
* is maintained by the query controller in the {@link RunState} for
* the {@link IRunningQuery}.
*/
static enum RunStateEnum {
/**
* The operator can be re-triggered either by upstream operators in the
* pipeline which are not yet {@link #AllDone} or by
* buffered {@link IChunkMessage}(s).
*/
Running,
/**
* No instance of the operator is running and the operator can no longer
* be re-triggered by upstream operators or buffered
* {@link IChunkMessage}(s). The optional last evaluation pass for the
* operator should be executed.
*/
StartLastPass,
/**
* The operator awaiting last pass evaluation messages on each node or
* shard where it was started.
*/
RunningLastPass,
/**
* No instance of the operator is running and optional last evaluation
* pass either has been executed or will not run. Any resources
* associated with that operator should be released.
*/
AllDone;
} // RunStateEnum
/**
* Check the query to see whether its deadline has expired.
*
* @throws QueryTimeoutException
* if the query deadline has expired.
*/
protected void checkDeadline() throws QueryTimeoutException {
if (innerState.deadline.get() < System.currentTimeMillis())
throw new QueryTimeoutException(ERR_DEADLINE);
}
/**
* Update the {@link RunState} to reflect the post-condition of the
* evaluation of an operator against one or more {@link IChunkMessage}s,
* adjusting the #of messages available for consumption by the operator
* accordingly.
*
* Note: If the query terminated normally then {@link #allDone} is set to
* true
as a side effect.
*
* @return true
if the operator life cycle is over.
*
* @throws IllegalArgumentException
* if the argument is null
.
* @throws IllegalStateException
* if the query is not running.
* @throws IllegalStateException
* if the operator addressed by the message is not running.
* @throws QueryTimeoutException
* if the deadline has expired.
* @throws ExecutionException
* if the {@link IHaltOpMessage#cause} was non-null
* , if which case it wraps {@link IHaltOpMessage#cause}.
*/
synchronized
public RunStateEnum haltOp(final IHaltOpMessage msg) throws QueryTimeoutException,
ExecutionException {
if (msg == null)
throw new IllegalArgumentException();
if (innerState.allDone.get())
throw new IllegalStateException(ERR_QUERY_HALTED);
checkDeadline();
innerState.stepCount.incrementAndGet();
final PipelineOp bop = (PipelineOp) innerState.bopIndex.get(msg
.getBOpId());
if (bop == null)
throw new IllegalArgumentException();
if (msg.getSinkMessagesOut() != 0) {
/*
* Note: The sinkId is not a required PipelineOp annotation. It
* defaults to the parent of the operator, which has the semantics
* of being the operator which is "downstream" from this operator.
* It might be worthwhile to pre-compile this information into the
* query so we can avoid the search for the parent. Alternatively,
* the search for the parent could be moved into
* getEffectiveDefaultSink such that it was only performed when the
* SINK_REF was not explicitly given as an annotation.
*/
// parent (null if this is the root of the operator tree).
final BOp p = BOpUtility.getParent(innerState.query, bop);
final Integer sinkId = BOpUtility.getEffectiveDefaultSink(bop, p);
// final Integer sinkId = msg.getSinkId();
if (sinkId != null) {
/*
* Note: The sink will be null for the last operator in the
* query plan. That operator is understood to target the buffer
* from which the solutions are drained on the query controller.
*/
messagesProduced(sinkId, msg.getSinkMessagesOut());
}
}
if (msg.getAltSinkMessagesOut() != 0) {
final Integer altSinkId = (Integer) bop
.getRequiredProperty(PipelineOp.Annotations.ALT_SINK_REF);
messagesProduced(altSinkId, msg.getAltSinkMessagesOut());
}
_haltOp(msg);
/*
* Figure out the current RunStateEnum for this operator.
*/
final RunStateEnum state = getOperatorRunState(msg.getBOpId());
// if (RunStateEnum.StartLastPass == state) {
// // should not be triggered unless requested.
// assert innerState.lastPassRequested.contains(msg.bopId);
// if (innerState.doneOn.get(msg.bopId) == null) {
// /*
// * When we convert into the LastPass phase for this operator we
// * copy the [startedOn] set for the operator into the [doneOn]
// * set and then increment [totalLastPassRemainingCount] by the
// * size of the copied set. That sets up the criteria that we
// * need to count off the last pass evaluations of the operator,
// * which are essentially self-triggered.
// *
// * Note: Both availableCount and the availableMap counter for
// * this bopId have to be bumped by the #of expected messages,
// * otherwise those messages will not be "expected" when they
// * arrive.
// */
// final Set> set = innerState.startedOn.get(msg.bopId);
// if (set != null) {
// final int nexpected = set.size();
// innerState.doneOn.put(msg.bopId, new LinkedHashSet(set)); // Copy!
// innerState.totalAvailableCount.addAndGet(nexpected);
// innerState.availableMap.get(msg.bopId).addAndGet(nexpected);
// }
// }
// }
// true iff the entire query is done.
final boolean isAllDone = innerState.totalRunningCount.get() == 0
&& innerState.totalAvailableCount.get() == 0
&& innerState.totalLastPassRemainingCount.get() == 0
&& innerState.atOnceRequired.isEmpty()
;
if (isAllDone)
innerState.allDone.set(true);
/*
* Note: It is possible for the per-operator availableMap counters to
* have not been updated yet but [isAllDone] to be true. Since the query
* is defacto done when [isAllDone] is satisfied, this tests for that
* condition first and then for isOperatorDone().
*/
// final boolean isOpDone = isAllDone || state == RunStateEnum.AllDone;
// if (isAllDone && !isOpDone)
// throw new RuntimeException("Whoops!: "+this);
if (TableLog.INFO) {
final int fanOut = msg.getSinkMessagesOut() + msg.getAltSinkMessagesOut();
TableLog.tableLog.info(getTableRow("haltOp", msg.getServiceId(),
msg.getBOpId(), msg.getPartitionId(), fanOut, msg.getCause(),
msg.getStats()));
}
if (INFO)
log.info("haltOp : " + toString() + " : bop=" + msg.getBOpId()
+ ",opRunState=" + state + ",queryAllDone=" + isAllDone);
if (TRACE)
log.trace(msg.toString());
if (msg.getCause() != null) {
/*
* Note: just wrap and throw rather than logging since this class
* does not have enough insight into non-error exceptions while
* Haltable does.
*/
throw new ExecutionException(msg.getCause());
}
return state;
}
/**
* Return the {@link RunStateEnum} for the operator given the
* {@link RunState} of the {@link IRunningQuery}.
*
* The movement of the intermediate binding set chunks during query
* processing forms an acyclic directed graph. We can decide whether or not
* a {@link BOp} in the query plan can be triggered by the current activity
* pattern by inspecting {@link RunState} for the {@link BOp} and its
* operands recursively. For the purposes of this method, only
* {@link BOp#args() operands} are considered as descendants (this is the
* query pipeline).
*
* Operators which specify the {@link PipelineOp.Annotations#LAST_PASS}
* annotation will self-trigger once per service or shard on which they were
* evaluated. This self-triggering phase provides the last pass evaluation
* semantics.
*
* Note: The caller MUST hold a lock across this operation in order for it
* to be atomic with respect to the concurrent evaluation of other operators
* for the same query.
*
* Note: While could also report whether or not the operator has ever
* started here, this method is only invoked when processing a
* {@link IHaltOpMessage} and we already know in that context (and can
* readily verify) that the operator had started evaluation.
*
* @param bopId
* The identifier for an operator which appears in the query
* plan.
*
* @return The current {@link RunStateEnum} for the operator.
*
* @throws IllegalArgumentException
* if any argument is null
.
* @throws NoSuchBOpException
* if bopId is not found in the query index.
*/
RunStateEnum getOperatorRunState(final int bopId) {
final BOp op = innerState.bopIndex.get(bopId);
if (op == null)
throw new NoSuchBOpException(bopId);
// look at the operators that could feed chunks into this operator.
final Iterator itr = BOpUtility.preOrderIterator(op);
while (itr.hasNext()) {
final BOp t = itr.next();
final Integer id = (Integer) t.getProperty(BOp.Annotations.BOP_ID);
if (id == null)
throw new NoBOpIdException(t.toString());
{
/*
* If the operator is running then it is, defacto, "not done."
*
* If any descendants of the operator are running, then they
* could cause the operator to be re-triggered and it is "not
* done."
*/
final AtomicLong runningCount = innerState.runningMap.get(id);
if (runningCount != null && runningCount.get() != 0) {
if (DEBUG)
log.debug("Operator can be triggered: op=" + op
+ ", possible trigger=" + t + " is running.");
return RunStateEnum.Running;
}
}
{
/*
* Any chunks available for the operator in question or any of
* its descendants could cause that operator to be triggered.
*/
final AtomicLong availableChunkCount = innerState.availableMap
.get(id);
if (availableChunkCount != null
&& availableChunkCount.get() != 0) {
if (DEBUG)
log.debug("Operator can be triggered: op=" + op
+ ", possible trigger=" + t + " has "
+ availableChunkCount + " chunks available.");
return RunStateEnum.Running;
}
}
if (bopId != id.intValue()) {
if (innerState.atOnceRequired.contains(id)) {
/*
* Any predecessor of the operator (but not the operator
* itself) requires at-once evaluation and has not yet run.
*/
if (DEBUG)
log.debug("Operator can be triggered: op=" + op
+ ", possible trigger=" + t
+ " awaiting at-once evaluation of predecessor.");
return RunStateEnum.Running;
}
if (innerState.lastPassRequested.contains(id)) {
/*
* Any predecessor of the operator (but not the operator itself)
* requested a last pass evaluation phase and that last pass
* evaluation phase is still in progress (as recognized by a
* non-null entry in [doneOn] for that operator).
*/
// Examine the doneSet for this operator.
final Set doneSet = innerState.doneOn.get(id);
// If null, then has not started lastPass evaluation.
if (doneSet == null) {
if (DEBUG)
log.debug("Operator can be triggered: op=" + op
+ ", possible trigger=" + t
+ " has not started last pass evaluation.");
return RunStateEnum.Running;
}
// If non-empty, then has not finished lastPass evaluation.
if (!doneSet.isEmpty()) {
if (DEBUG)
log.debug("Operator can be triggered: op=" + op
+ ", possible trigger=" + t
+ " awaiting last pass evaluation for doneSet="
+ doneSet);
return RunStateEnum.Running;
}
}
}
}
/*
* The operator can not be triggered by upstream operators or messages
* already generated by upstream operators.
*/
// handle at-once operator.
final boolean atOnceRequired = innerState.atOnceRequired.contains(bopId);
// handle pipeline operator with last-pass evaluation requested.
final boolean lastPassRequest = innerState.lastPassRequested.contains(bopId);
if ((atOnceRequired || lastPassRequest)
&& innerState.runningMap.containsKey(bopId)) {
/*
* The operator has requested either (last pass evaluation -or-
* at-once evaluation) AND (either has in fact been evaluated (the
* running count map has an entry for this operator) -OR- is an
* operator which runs on the query controller).
*
* Now we need to determine whether or not the operator should start
* its last pass evaluation phase or if it is waiting for the last
* pass evaluation phase to terminate.
*/
// Examine the doneSet for this operator.
final Set doneSet = innerState.doneOn.get(bopId);
// If null, then has not started lastPass eval.
if (doneSet == null) {
if (DEBUG)
log.debug("Operator will self-trigger last pass evaluation: op="
+ op);
/*
* When we convert into the LastPass phase for this operator we
* copy the [startedOn] set for the operator into the [doneOn]
* set and then increment [totalLastPassRemainingCount] by the
* size of the copied set. That sets up the criteria that we
* need to count off the last pass evaluations of the operator,
* which are essentially self-triggered.
*
* Note: Both availableCount and the availableMap counter for
* this bopId have to be bumped by the #of expected messages,
* otherwise those messages will not be "expected" when they
* arrive.
*/
final Set> set = innerState.startedOn.get(bopId);
if (set != null) {
final int nexpected = set.size();
innerState.doneOn.put(bopId, new LinkedHashSet(set)); // Copy!
innerState.totalAvailableCount.addAndGet(nexpected);
innerState.availableMap.get(bopId).addAndGet(nexpected);
}
return RunStateEnum.StartLastPass;
}
// If non-empty, then has not finished lastPass eval.
if (!doneSet.isEmpty()) {
if (DEBUG)
log.debug("Operator will self-trigger last pass evaluation: op="
+ op
+ ", awaiting last pass evaluation for doneSet="
+ doneSet);
return RunStateEnum.RunningLastPass;
}
}
if (INFO)
log.info("Operator can not be triggered: op=" + op);
return RunStateEnum.AllDone;
}
/**
* Return true
iff the running query state is such that the
* "at-once" evaluation of the specified operator may proceed (no
* predecessors are running or could be triggered and the operator has not
* been evaluated).
*
* Note: The specific requirements are: (a) the operator is not running and
* has not been started; (b) no predecessor in the pipeline is running; and
* (c) no predecessor in the pipeline can be triggered.
*
* @param bopId
* The identifier for an operator which appears in the query
* plan.
*
* @return true
iff the "at-once" evaluation of the operator
* may proceed.
*
* @throws IllegalArgumentException
* if any argument is null
.
* @throws NoSuchBOpException
* if bopId is not found in the query index.
*/
boolean isAtOnceReady(final int bopId) {
final BOp op = innerState.bopIndex.get(bopId);
if (op == null)
throw new NoSuchBOpException(bopId);
final AtomicLong counter = innerState.runningMap.get(bopId);
final boolean didStart = counter != null && counter.get() != 0L;
if (didStart) {
// Evaluation has already run (or begun) for this operator.
if (INFO)
log.info("Already ran/running: " + bopId);
return false;
}
final Iterator itr = BOpUtility.preOrderIterator(op);
while (itr.hasNext()) {
final BOp t = itr.next();
final Integer id = (Integer) t.getProperty(BOp.Annotations.BOP_ID);
if (id == null)
throw new NoBOpIdException(t.toString());
if (bopId == id.intValue()) {
// Ignore self.
continue;
}
{
/*
* If any descendants (aka predecessors) of the operator are
* running, then they could cause produce additional solutions
* so the operator is not ready for "at-once" evaluation.
*/
final AtomicLong runningCount = innerState.runningMap.get(id);
if (runningCount != null && runningCount.get() != 0) {
if (DEBUG)
log.debug("Predecessor running: predecessorId=" + id
+ ", predecessorRunningCount=" + runningCount);
return false;
}
}
{
/*
* Any chunks available for a descendant (aka predecessor) of
* the operator could produce additional solutions as inputs to
* the operator so it is not ready for "at-once" evaluation.
*/
final AtomicLong availableChunkCount = innerState.availableMap
.get(id);
if (availableChunkCount != null
&& availableChunkCount.get() != 0) {
/*
* We are looking at some other predecessor of the specified
* operator.
*/
if (DEBUG)
log.debug("Predecessor can be triggered: predecessorId="
+ id + " has " + availableChunkCount
+ " chunks available.");
return false;
}
}
}
// Success.
if (INFO)
log.info("Ready for 'at-once' evaluation: " + bopId);
return true;
}
/**
* Update the {@link RunState} to reflect that fact that a new evaluation
* phase has begun for an operator.
*
* @param bopId
* The operator identifier.
*
* @return true
iff this is the first time that operator is
* being evaluated within the context of this query {@link RunState}
* .
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
private boolean _startOp(final IStartOpMessage msg) {
final long running = innerState.totalRunningCount.incrementAndGet();
assert running >= 1 : "running=" + running + " :: runState=" + this;
final boolean firstTime;
{
final Integer bopId = msg.getBOpId();
AtomicLong n = innerState.runningMap.get(bopId);
if (n == null)
innerState.runningMap.put(bopId, n = new AtomicLong());
final long tmp = n.incrementAndGet();
assert tmp >= 0 : "runningCount=" + tmp + " for bopId=" + bopId
+ " :: runState=" + this;
Set set = innerState.startedOn.get(bopId);
if (set == null) {
set = new LinkedHashSet();
innerState.startedOn.put(bopId, set);
firstTime = true;
} else {
firstTime = false;
}
final BOp bop = innerState.bopIndex.get(bopId);
switch (bop.getEvaluationContext()) {
case ANY:
case CONTROLLER:
case HASHED:
set.add(msg.getServiceId());
break;
case SHARDED:
set.add(Integer.valueOf(msg.getPartitionId()));
break;
}
}
return firstTime;
}
/**
* Update the {@link RunState} to reflect the fact that an operator
* execution phase is finished.
*
* @param msg
*/
private void _haltOp(final IHaltOpMessage msg) {
final Integer bopId = msg.getBOpId();
// one less task is running.
final long running = innerState.totalRunningCount.decrementAndGet();
assert running >= 0 : "running=" + running + " :: runState=" + this;
{
final AtomicLong n = innerState.runningMap.get(bopId);
if (n == null) {
/*
* I have observed this exception once. It may have been caused
* by the interrupt of an RMI during the handling of a startOp
* message on a cluster node.
*/
throw new IllegalArgumentException(ERR_OP_NOT_STARTED);
}
if (n.get() <= 0)
throw new IllegalArgumentException(ERR_OP_HALTED);
n.decrementAndGet();
}
//if it is halt for at-once operator than we need to remove it from
//the set of at-once operators that we still need to run
if(innerState.atOnceRequired.contains(bopId)) {
innerState.atOnceRequired.remove(bopId);
}
/*
* If we are processing our doneSet, then handle that first so the
* changes will be reflected when we decide the new run state for the
* operator.
*/
{
final Set> set = innerState.doneOn.get(bopId);
if (set != null) {
/*
* Remove the shardId or serviceId from our doneSet.
*/
if (!set.remove(msg.getPartitionId()) && !set.remove(msg.getServiceId())) {
throw new RuntimeException("Not in doneSet: msg=" + msg
+ ", doneSet=" + set);
}
if (set.isEmpty()) {
if(innerState.lastPassRequested.contains(bopId)) {
// End of last pass evaluation for this operator.
innerState.totalLastPassRemainingCount.decrementAndGet();
}
}
}
}
}
/**
* Update the {@link RunState} to reflect that the operator has consumed
* some number of {@link IChunkMessage}s.
*
* @param bopId
* The operator identifier.
* @param nmessages
* The #of messages which were consumed by the operator.
*/
private void messagesConsumed(final int bopId, final int nmessages) {
innerState.totalAvailableCount.addAndGet(-nmessages);
AtomicLong n = innerState.availableMap.get(bopId);
if (n == null)
innerState.availableMap.put(bopId, n = new AtomicLong());
n.addAndGet(-nmessages);
}
/**
* Update the {@link RunState} to reflect that some operator has generated
* some number of {@link IChunkMessage}s which are available to be consumed
* by the specified target operator.
*
* @param targetId
* The target operator.
* @param nmessages
* The #of of messages which were made available to that
* operator.
*/
private void messagesProduced(final Integer targetId, final int nmessages) {
if(targetId == null)
throw new IllegalArgumentException();
if (nmessages < 0)
throw new IllegalArgumentException();
if (nmessages == 0) {
// NOP
return;
}
if (!innerState.bopIndex.containsKey(targetId))
throw new IllegalArgumentException();
innerState.totalAvailableCount.addAndGet(nmessages);
AtomicLong n = innerState.availableMap.get(targetId);
if (n == null)
innerState.availableMap.put(targetId, n = new AtomicLong());
n.addAndGet(nmessages);
}
/*
* Human readable representations of the query run state.
*/
/**
* Human readable summary of the current {@link RunState}.
*
* Note: You must holding the lock guarding the {@link RunState} to
* guarantee that will return a consistent representation.
*/
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append(getClass().getName());
innerState.toString(sb);
return sb.toString();
}
private String getTableHeader() {
final StringBuilder sb = new StringBuilder();
final Integer[] bopIds = innerState.bopIndex.keySet().toArray(new Integer[0]);
Arrays.sort(bopIds);
sb.append("queryId");
sb.append("\tbegin");
sb.append("\telapsed");
sb.append("\tstep");
sb.append("\tlabel");
sb.append("\tbopId");
sb.append("\tserviceId");
sb.append("\tevalContext");
sb.append("\tcontroller");
sb.append("\tcause");
sb.append("\tbop");
sb.append("\tshardId");
sb.append("\tfanIO");
sb.append("\tnservices");
sb.append("\tnavail(query)");
sb.append("\tnrun(query)");
sb.append("\tnlastPassRemaining");
sb.append("\tallDone");
for (int i = 0; i < bopIds.length; i++) {
final Integer id = bopIds[i];
final BOp bop = innerState.bopIndex.get(id);
if(!(bop instanceof PipelineOp))
continue; // skip non-pipeline operators.
sb.append("\tnavail(id=" + id + ")");
sb.append("\tnrun(id=" + id + ")");
sb.append("\tnstartedOn(id=" + id + ")");
sb.append("\tndoneOn(id=" + id + ")");
}
sb.append("\telapsed");
sb.append("\tchunksIn");
sb.append("\tunitsIn");
sb.append("\tchunksOut");
sb.append("\tunitsOut");
sb.append("\taccessPathDups");
sb.append("\taccessPathCount");
sb.append("\taccessPathRangeCount");
sb.append("\taccessPathChunksIn");
sb.append("\taccessPathUnitsIn");
//{chunksIn=1,unitsIn=100,chunksOut=4,unitsOut=313,accessPathDups=0,accessPathCount=100,chunkCount=100,elementCount=313}
sb.append('\n');
return sb.toString();
}
/**
* Return a tabular representation of the query {@link RunState}.
*
* Note: You must holding the lock guarding the {@link RunState} to
* guarantee this method will return a consistent representation.
*
* @param label
* The state change level (startQ, startOp, haltOp).
* @param serviceId
* The node on which the operator will be / was executed.
* @param bopId
* The identifier for the bop which will be / was executed.
* @param shardId
* The index partition against which the operator was running and
* -1
if the operator was not evaluated against a
* specific index partition.
* @param fanIO
* This is the #of {@link IChunkMessage}s accepted by the
* operator ("fanIn" for startQ,startOp) or output by the
* operator ("fanOut" for haltOp). (Termination is decided by
* counting {@link IChunkMessage}s rather than by counting
* solutions or chunks of solutions.)
* @param cause
* The {@link Throwable} in a {@link IHaltOpMessage} and
* null
for other messages or if the
* {@link Throwable} was null.
* @param stats
* The statistics from the operator evaluation and
* null
unless {@link #haltOp(IHaltOpMessage)} is
* the invoker.
*/
private String getTableRow(final String label, final UUID serviceId,
final int bopId, final int shardId, final int fanIO,
final Throwable cause, final BOpStats stats) {
final StringBuilder sb = new StringBuilder();
final DateFormat dateFormat = DateFormat.getDateTimeInstance(
DateFormat.FULL, DateFormat.FULL);
final long elapsed = System.currentTimeMillis() - innerState.begin;
sb.append(innerState.queryId);
sb.append('\t');
sb.append(dateFormat.format(new Date(innerState.begin)));
sb.append('\t');
sb.append(elapsed);
sb.append('\t');
sb.append(Long.toString(innerState.stepCount.get()));
sb.append('\t');
sb.append(label);
sb.append('\t');
sb.append(Integer.toString(bopId));
// the serviceId : will be null unless scale-out.
sb.append('\t');
sb.append(serviceId == null ? "N/A" : serviceId.toString());
{
final BOp bop = innerState.bopIndex.get(bopId);
sb.append('\t');
sb.append(bop.getEvaluationContext());
sb.append('\t');
sb.append(bop.getProperty(BOp.Annotations.CONTROLLER,
BOp.Annotations.DEFAULT_CONTROLLER));
}
// the thrown cause.
sb.append('\t');
if (cause != null)
sb.append(cause.getLocalizedMessage());
// the operator.
sb.append('\t');
if (innerState.stepCount.get() == 1) {
/*
* For the startQ row @ nsteps==1, show the entire query. This is
* the only way people will be able to see the detailed annotations
* on predicates used in joins. New line characters are translated
* out to keep things in the table format.
*/
sb.append(BOpUtility.toString(innerState.query).replace('\n', ' '));
} else {
// Otherwise how just this bop.
sb.append(innerState.bopIndex.get(bopId).toString());
}
sb.append('\t');
sb.append(Integer.toString(shardId));
sb.append('\t');
sb.append(Integer.toString(fanIO));
sb.append('\t');
sb.append(innerState.serviceIds.size());
sb.append('\t');
sb.append(Long.toString(innerState.totalAvailableCount.get()));
sb.append('\t');
sb.append(Long.toString(innerState.totalRunningCount.get()));
sb.append('\t');
sb.append(Long.toString(innerState.totalLastPassRemainingCount.get()));
sb.append('\t');
sb.append(innerState.allDone.get());
final Integer[] bopIds = innerState.bopIndex.keySet().toArray(new Integer[0]);
Arrays.sort(bopIds);
for (int i = 0; i < bopIds.length; i++) {
final Integer id = bopIds[i];
final BOp bop = innerState.bopIndex.get(id);
if(!(bop instanceof PipelineOp))
continue; // skip non-pipeline operators.
final AtomicLong nrunning = innerState.runningMap.get(id);
final AtomicLong navailable = innerState.availableMap.get(id);
final Set> startedSet = innerState.startedOn.get(id);
final Set> doneSet = innerState.doneOn.get(id);
sb.append("\t" + (navailable == null ? "N/A" : navailable.get()));
sb.append("\t" + (nrunning == null ? "N/A" : nrunning.get()));
sb.append("\t" + (startedSet == null ? "N/A" : startedSet.size()));
sb.append("\t" + (doneSet == null ? "N/A" : doneSet.size()));
}
/*
* The statistics. This is at the end to keep the table pretty.
* Different kinds of operators may have additional statistics. They
* have to be explicitly handled here to format them into a table.
*/
if (stats != null) {
sb.append('\t');
sb.append(stats.elapsed.get());
sb.append('\t');
sb.append(stats.chunksIn.get());
sb.append('\t');
sb.append(stats.unitsIn.get());
sb.append('\t');
sb.append(stats.chunksOut.get());
sb.append('\t');
sb.append(stats.unitsOut.get());
if (stats instanceof PipelineJoinStats) {
final PipelineJoinStats t = (PipelineJoinStats) stats;
sb.append('\t');
sb.append(t.accessPathDups.get());
sb.append('\t');
sb.append(t.accessPathCount.get());
sb.append('\t');
sb.append(t.accessPathRangeCount.get());
sb.append('\t');
sb.append(t.accessPathChunksIn.get());
sb.append('\t');
sb.append(t.accessPathUnitsIn.get());
}
}
sb.append('\n');
return sb.toString();
}
} // class RunState