com.bigdata.bop.engine.QueryEngine Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Aug 21, 2010
*/
package com.bigdata.bop.engine;
import java.lang.ref.WeakReference;
import java.lang.reflect.Constructor;
import java.rmi.RemoteException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArraySet;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.log4j.Logger;
import org.eclipse.jetty.client.HttpClient;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IQueryAttributes;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.bindingSet.ListBindingSet;
import com.bigdata.bop.fed.QueryEngineFactory;
import com.bigdata.btree.BTree;
import com.bigdata.btree.IndexSegment;
import com.bigdata.btree.view.FusedView;
import com.bigdata.cache.ConcurrentWeakValueCache;
import com.bigdata.concurrent.FutureTaskMon;
import com.bigdata.counters.CounterSet;
import com.bigdata.counters.ICounterSetAccess;
import com.bigdata.journal.ConcurrencyManager;
import com.bigdata.journal.IIndexManager;
import com.bigdata.journal.Journal;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.rdf.sail.webapp.client.HttpClientConfigurator;
import com.bigdata.resources.IndexManager;
import com.bigdata.service.IBigdataFederation;
import com.bigdata.service.IDataService;
import com.bigdata.util.DaemonThreadFactory;
import com.bigdata.service.geospatial.GeoSpatialCounters;
import com.bigdata.util.InnerCause;
import com.bigdata.util.concurrent.IHaltable;
/**
* A class managing execution of concurrent queries against a local
* {@link IIndexManager}.
*
*
Design notes
*
* Much of the complexity of the current approach owes itself to having to run a
* separate task for each join for each shard in order to have the appropriate
* lock when running against the unisolated shard view. This also means that the
* join task is running inside of the concurrency manager and hence has the
* local view of the shard.
*
* The main, and perhaps the only, reason why we run unisolated rules is during
* closure, when we query against the unisolated indices and then write the
* entailments back on the unisolated indices.
*
* Supporting closure has always been complicated. This complexity is mostly
* handled by ProgramTask#executeMutation() and
* AbstractTripleStore#newJoinNexusFactory() which play games with the
* timestamps used to read and write on the database, with commit points
* designed to create visibility for tuples written by a mutation rule, and with
* the automated advance of the read timestamp for the query in each closure
* pass in order to make newly committed tuples visible to subsequent rounds of
* closure. For scale-out, we do shard-wise auto commits so we always have a
* commit point which makes each write visible and the read timestamp is
* actually a read-only transaction which prevents the historical data we need
* during a closure round from being released as we are driving updates onto the
* federation. For the RWStore, we are having a similar problem (in the HA
* branch since that is where we are working on the RWStore) where historically
* allocated records were being released as writes drove updates on the indices.
* Again, we "solved" the problem for the RWStore using a commit point followed
* by a read-only transaction reading on that commit point to hold onto the view
* on which the next closure round needs to read (this uncovered a problem with
* the RWStore and transaction service interaction which Martyn is currently
* working to resolve through a combination of shadow allocators and deferred
* deletes which are processed once the release time is advanced by the
* transaction service).
*
* The WORM does not have some of these problems with closure because we never
* delete history, so we do not need to create a commit point and a read-behind
* transaction. However, the WORM would have problems with concurrent access to
* the unisolated indices except that we hack that problem through the
* transparent use of the UnisolatedReadWriteIndex, which allows multiple
* threads to access the same unisolated index view using a read/write lock
* pattern (concurrent readers are allowed, but there is only one writer and it
* has exclusive access when it is running). This works out because we never run
* closure operations against the WORM through the concurrency manager. If we
* did, we would have to create a commit point after each mutation and use a
* read-behind transaction to prevent concurrent access to the unisolated index.
*
* The main advantage that I can see of the current complexity is that it allows
* us to do load+closure as a single operation on the WORM, resulting in a
* single commit point. This makes that operation ACID without having to use
* full read/write transactions. This is how we gain the ACID contract for the
* standalone Journal in the SAIL for the WORM. Of course, the SAIL does not
* have that contract for the RWStore because we have to do the commit and
* read-behind transaction in order to have visibility and avoid concurrent
* access to the unisolated index (by reading behind on the last commit point).
*
* I think that the reality is even one step more complicated. When doing truth
* maintenance (incremental closure), we bring the temporary graph to a fixed
* point (the rules write on the temp store) and then apply the delta in a
* single write to the database. That suggests that incremental truth
* maintenance would continue to be ACID, but that database-at-once-closure
* would be round-wise ACID.
*
* So, I would like to suggest that we break ACID for database-at-once-closure
* and always follow the pattern of (1) do a commit before each round of
* closure; and (2) create a read-behind transaction to prevent the release of
* that commit point as we drive writes onto the indices. If we follow this
* pattern then we can write on the unisolated indices without conflict and read
* on the historical views without conflict. Since there will be a commit point
* before each mutation rule runs (which corresponds to a closure round),
* database-at-once-closure will be atomic within a round, but will not be a
* single atomic operation. Per above, I think that we would retain the ACID
* property for incremental truth maintenance against a WORM or RW mode Journal.
*
*
* ----
*
*
* The advantage of this proposal (commit before each mutation rule and run
* query against a read-behind transaction) is that this could enormously
* simplify how we execute joins.
*
* Right now, we use a factory pattern to create a join task on each node for
* each shard for which that node receives binding sets for a query. The main
* reason for doing this is to gain the appropriate lock for the unisolated
* index. If we never run a query against the unisolated index then we can go
* around the concurrency manager and run a single "query manager" task for all
* joins for all shards for all queries. This has some great benefits which I
* will go into below.
*
* That "query manager" task would be responsible for accepting buffers
* containing elements or binding sets from other nodes and scheduling
* consumption of those data based on various criteria (order of arrival,
* priority, buffer resource requirements, timeout, etc.). This manager task
* could use a fork join pool to execute light weight operations (NIO,
* formulation of access paths from binding sets, mapping of binding sets onto
* shards, joining a chunk already read from an access path against a binding
* set, etc). Operations which touch the disk need to run in their own thread
* (until we get Java 7 async file IO, which is already available in a preview
* library). We could handle that by queuing those operations against a fixed
* size thread pool for reads.
*
* This is a radical change in how we handle distributed query execution, but I
* think that it could have a huge payoff by reducing the complexity of the join
* logic, making it significantly easier to execute different kinds of join
* operations, reducing the overhead for acquiring locks for the unisolated
* index views, reducing the #of threads consumed by joins (from one per shard
* per join per query to a fixed pool of N threads for reads), etc. It would
* centralize the management of resources on each node and make it possible for
* us to handle things like join termination by simply purging data from the
* query manager task for the terminated join.
*
* @author Bryan Thompson
*
* @todo Expander patterns will continue to exist until we handle the standalone
* backchainers in a different manner for scale-out so add support for
* those for now.
*
* @todo There is a going to be a relationship to recycling of intermediates
* (for individual {@link BOp}s or {@link BOp} tree fragments) and a
* distributed query cache which handles invalidation (for updates) and
* {@link BOp} aware reuse of result sets available in the cache. This
* sort of thing will have to be coordinated among the cache nodes.
*/
public class QueryEngine implements IQueryPeer, IQueryClient, ICounterSetAccess {
private final static transient Logger log = Logger
.getLogger(QueryEngine.class);
/**
* Error message used if a query is not running.
*/
protected static final transient String ERR_QUERY_NOT_RUNNING = "Query is not running:";
/**
* Annotations understood by the {@link QueryEngine}.
*
* @author Bryan Thompson
*/
public interface Annotations extends PipelineOp.Annotations {
/**
* Annotation may be used to impose a specific {@link UUID} for a query.
* This may be used by an external process such that it can then use
* {@link QueryEngine#getRunningQuery(UUID)} to gain access to the
* running query instance. It is an error if there is a query already
* running with the same {@link UUID}.
*/
String QUERY_ID = QueryEngine.class.getName() + ".queryId";
/**
* The name of the {@link IRunningQuery} implementation class which will
* be used to evaluate a query marked by this annotation (optional). The
* specified class MUST implement {@link IRunningQuery} and MUST have a
* constructor with the following signature:
*
*
* public MyRunningQuery(QueryEngine queryEngine, UUID queryId,
* boolean controller, IQueryClient clientProxy,
* PipelineOp query, IChunkMessage realSource)
*
*
* Note that classes derived from {@link QueryEngine} may override
* {@link QueryEngine#newRunningQuery(QueryEngine, UUID, boolean, IQueryClient, PipelineOp, IChunkMessage, IRunningQuery)}
* in which case they might not support this option.
*/
String RUNNING_QUERY_CLASS = QueryEngine.class.getName()
+ ".runningQueryClass";
// String DEFAULT_RUNNING_QUERY_CLASS = StandaloneChainedRunningQuery.class.getName();
String DEFAULT_RUNNING_QUERY_CLASS = ChunkedRunningQuery.class.getName();
}
/**
* Return a {@link CounterSet} which reports various statistics for the
* {@link QueryEngine}.
*/
@Override
public CounterSet getCounters() {
final CounterSet root = new CounterSet();
// Note: This counter is not otherwise tracked.
counters.deadlineQueueSize.set(deadlineQueue.size());
// global counters.
root.attach(counters.getCounters());
// geospatial counters
final CounterSet geoSpatial = root.makePath("GeoSpatial");
geoSpatial.attach(geoSpatialCounters.getCounters());
// // counters per tagged query group.
// {
//
// final CounterSet groups = root.makePath("groups");
//
// final Iterator> itr = groupCounters
// .entrySet().iterator();
//
// while (itr.hasNext()) {
//
// final Map.Entry e = itr.next();
//
// final String tag = e.getKey();
//
// final Counters counters = e.getValue();
//
// // Note: path component may not be empty!
// groups.makePath(tag == null | tag.length() == 0 ? "None" : tag)
// .attach(counters.getCounters());
//
// }
//
// }
return root;
}
/**
* Counters at the global level.
*/
final protected QueryEngineCounters counters = newCounters();
/**
* GeoSpatial counters
*/
final protected GeoSpatialCounters geoSpatialCounters = newGeoSpatialCounters();
// /**
// * Statistics for queries which are "tagged" so we can recognize their
// * instances as members of some group.
// */
// final protected ConcurrentHashMap groupCounters = new ConcurrentHashMap();
// /**
// * Factory for {@link Counters} instances associated with a query group. A
// * query is marked as a member of a group using {@link QueryHints#TAG}. This
// * is typically used to mark queries which are instances of the same query
// * template.
// *
// * @param tag
// * The tag identifying a query group.
// *
// * @return The {@link Counters} for that query group.
// *
// * @throws IllegalArgumentException
// * if the argument is null
.
// */
// protected Counters getCounters(final String tag) {
//
// if(tag == null)
// throw new IllegalArgumentException();
//
// Counters c = groupCounters.get(tag);
//
// if (c == null) {
//
// c = new Counters();
//
// final Counters tmp = groupCounters.putIfAbsent(tag, c);
//
// if (tmp != null) {
//
// // someone else won the data race.
// c = tmp;
//
// }
//
// }
//
// return c;
//
// }
/**
* Extension hook for new {@link QueryEngineCounters} instances.
*/
protected QueryEngineCounters newCounters() {
return new QueryEngineCounters();
}
/**
* Extension hook for new {@link GeoSpatialCounters} instances.
*/
protected GeoSpatialCounters newGeoSpatialCounters() {
return new GeoSpatialCounters();
}
/**
* The {@link QueryEngineCounters} object for this {@link QueryEngine}.
*/
protected QueryEngineCounters getQueryEngineCounters() {
return counters;
}
/**
* The {@link QueryEngineCounters} object for this {@link QueryEngine}.
*/
public GeoSpatialCounters getGeoSpatialCounters() {
return geoSpatialCounters;
}
/**
* Access to the local indices.
*
* Note: You MUST NOT use unisolated indices without obtaining the necessary
* locks. The {@link QueryEngine} is intended to run only against committed
* index views for which no locks are required.
*/
private final IIndexManager localIndexManager;
/**
* The {@link HttpClient} is used to make remote HTTP connections (SPARQL
* SERVICE call joins).
*/
private final AtomicReference clientConnectionManagerRef = new AtomicReference();
// /**
// * A pool used to service IO requests (reads on access paths).
// *
// * Note: An IO thread pool at this level must attach threads to operations
// * (access path reads) rather than to individual IO requests. In order to do
// * this at the level of individual IOs the pool would have to be integrated
// * into a lower layer, probably wrapping {@link FileChannelUtility}.
// */
// private final Executor iopool;
// /**
// * A pool for executing fork/join tasks servicing light weight tasks which
// * DO NOT block on IO. Examples of such tasks abound, including: NIO for
// * sending/receiving direct {@link ByteBuffer}s containing binding sets,
// * elements, solutions, etc; formulation of access paths from binding sets;
// * mapping of binding sets onto shards; joining a chunk already read from an
// * access path against a binding set; etc. What all of these tasks have in
// * common is that they DO NOT touch the disk. Until we get Java7 and async
// * I/O, operations which touch the disk CAN NOT be combined with the fork /
// * join model since they will trap the thread in which they are running
// * (this is not true for {@link Lock}s).
// *
// * Note: In order to enable the {@link ForkJoinPool} using Java6, you MUST
// * run java with -Xbootclasspath/p:jsr166.jar
, where you
// * specify the fully qualified path of the jsr166.jar file.
// */
// private final ForkJoinPool fjpool;
@Override
public UUID getServiceUUID() {
return ((IRawStore) localIndexManager).getUUID();
}
/**
* The {@link IBigdataFederation} iff running in scale-out.
*
* Note: The {@link IBigdataFederation} is required in scale-out in order to
* perform shard locator scans when mapping binding sets across the next
* join in a query plan.
*/
public IBigdataFederation> getFederation() {
return null;
}
/**
* The local index manager, which provides direct access to local
* {@link BTree} and {@link IndexSegment} objects. In scale-out, this is the
* {@link IndexManager} inside the {@link IDataService} and provides direct
* access to {@link FusedView}s (aka shards).
*
* Note: You MUST NOT use unisolated indices without obtaining the necessary
* locks. The {@link QueryEngine} is intended to run only against committed
* index views for which no locks are required.
*/
public IIndexManager getIndexManager() {
return localIndexManager;
}
/**
* Return the {@link ConcurrencyManager} for the {@link #getIndexManager()
* local index manager}.
*/
public ConcurrencyManager getConcurrencyManager() {
return ((Journal) localIndexManager).getConcurrencyManager();
}
/**
* The RMI proxy for this {@link QueryEngine} when used as a query controller.
* The default implementation returns this.
*/
public IQueryClient getProxy() {
return this;
}
/**
* Return the {@link HttpClient} used to make remote SERVICE call requests.
*/
public HttpClient getClientConnectionManager() {
HttpClient cm = clientConnectionManagerRef.get();
if (cm == null) {
// Note: Deliberate use of the ref as a monitor object.
synchronized (clientConnectionManagerRef) {
cm = clientConnectionManagerRef.get();
if (cm == null) {
if (!isRunning()) {
/*
* Shutdown.
*/
throw new IllegalStateException();
}
/*
* Lazy instantiation.
*/
clientConnectionManagerRef
.set(cm = HttpClientConfigurator
.getInstance().newInstance());
}
}
}
return cm;
}
/**
* Return true
iff running against an
* {@link IBigdataFederation}.
*/
public boolean isScaleOut() {
return false;
}
/**
* Lock used to guard register / halt of a query.
*/
private final ReentrantLock lock = new ReentrantLock();
/**
* Signaled when no queries are running.
*/
private final Condition nothingRunning = lock.newCondition();
/**
* The currently executing queries.
*/
private final ConcurrentHashMap runningQueries = new ConcurrentHashMap();
/**
* LRU cache used to handle problems with asynchronous termination of
* running queries.
*
* Note: Holding onto the query references here might pin memory retained by
* those queries. However, all we really need is the Haltable (Future) of
* that query in this map.
*
* @todo This should not be much of a hot spot even though it is not thread
* safe but the synchronized() call could force cache stalls anyway. A
* concurrent hash map with an approximate LRU access policy might be
* a better choice.
*
* @todo The maximum cache capacity here is a SWAG. It should be large
* enough that we can not have a false cache miss on a system which is
* heavily loaded by a bunch of light queries.
*/
private final LinkedHashMap> doneQueries = new LinkedHashMap>(
16/* initialCapacity */, .75f/* loadFactor */, true/* accessOrder */) {
private static final long serialVersionUID = 1L;
@Override
protected boolean removeEldestEntry(Map.Entry> eldest) {
return size() > 100/* maximumCacheCapacity */;
}
};
/**
* A high concurrency cache operating as an LRU designed to close a data
* race between the asynchronous start of a submitted query or update
* operation and the explicit asynchronous CANCEL of that operation using
* its pre-assigned {@link UUID}.
*
* When a CANCEL request is received, we probe both the
* {@link #runningQueries} and the {@link #doneQueries}. If no operation is
* associated with that request, then we probe the running UPDATE
* operations. Finally, if no such operation was discovered, then the
* {@link UUID} of the operation to be cancelled is entered into this
* collection.
*
* Before a query starts, we consult the {@link #pendingCancelLRU}. If the
* {@link UUID} of the query is discovered, then the query is cancelled
* rather than run.
*
* Note: The capacity of the backing hard reference queue is quite small.
* {@link UUID}s are only entered into this collection if a CANCEL request
* is asynchronously received either (a) before; or (b) long enough after a
* query or update is executed that is not not found in either the running
* queries map or the recently done queries map.
*
* TODO There are some cases that are not covered by this. First, we do not
* have {@link UUID}s for all REST API methods and thus they can not all be
* cancelled. If we allowed an HTTP header to specify the UUID of the
* request, then we could associate a UUID with all requests. The ongoing
* refactor to support clean interrupt of NSS requests (#753) and the
* ongoing refactor to support concurrent unisolated operations against the
* same journal (#566) will provide us with the mechanisms to identify all
* such operations so we can check their assigned UUIDs and cancel them when
* requested.
*
* @see REST API Query
* Cancellation
* @see HA doLocalAbort()
* should interrupt NSS requests and AbstractTasks
* @see Concurrent unisolated
* operations against multiple KBs on the same Journal
* @see #startEval(UUID, PipelineOp, Map, IChunkMessage)
*/
private final ConcurrentWeakValueCache pendingCancelLRU = new ConcurrentWeakValueCache<>(
50/* queueCapacity (SWAG, but see above) */);
/**
* Add a query {@link UUID} to the LRU of query identifiers for which we
* have received a CANCEL request, but were unable to find a running QUERY,
* recently done query, or running UPDATE request.
*
* @param queryId
* The UUID of the operation to be cancelled.
*
* @see REST API Query
* Cancellation
*/
public void addPendingCancel(final UUID queryId) {
if (queryId == null)
throw new IllegalArgumentException();
pendingCancelLRU.putIfAbsent(queryId, queryId);
}
/**
* Return true
iff the {@link UUID} is the the collection of
* {@link UUID}s for which we have already received a CANCEL request.
*
* Note: The {@link UUID} is removed from the pending cancel collection as a
* side-effect.
*
* @param queryId
* The {@link UUID} of the operation.
*
* @return true
if that operation has already been marked for
* cancellation.
*/
public boolean pendingCancel(final UUID queryId) {
if (queryId == null)
throw new IllegalArgumentException();
return pendingCancelLRU.remove(queryId) != null;
}
/**
* A queue of {@link ChunkedRunningQuery}s having binding set chunks available for
* consumption.
*
* @todo Handle priority for selective queries based on the time remaining
* until the timeout.
*
* Handle priority for unselective queries based on the order in which
* they are submitted?
*
* Be careful when testing out a {@link PriorityBlockingQueue} here.
* First, that collection is intrinsically bounded (it is backed by an
* array) so it will BLOCK under heavy load and could be expected to
* have some resize costs if the queue size becomes too large. Second,
* either {@link ChunkedRunningQuery} needs to implement an appropriate
* {@link Comparator} or we need to pass one into the constructor for
* the queue.
*/
final private BlockingQueue priorityQueue = new LinkedBlockingQueue();
// final private BlockingQueue priorityQueue = new PriorityBlockingQueue(
// );
/**
* A queue arranged in order of increasing deadline times. Only queries with
* an explicit deadline are added to this priority queue. The head of the
* queue contains the query whose deadline will expire soonest. A thread can
* thus poll the head of the queue to determine whether the deadline would
* have passed. Such queries can be removed from the queue and their
* {@link AbstractRunningQuery#checkDeadline()} method invoked to force
* their timely termination.
*
* {@link AbstractRunningQuery#startOp(IStartOpMessage)} and
* {@link AbstractRunningQuery#haltOp(IHaltOpMessage)} check to see if the
* deadline for a query has expired. However, those methods are only invoked
* when a query plan operator starts and halts. In cases where the query is
* compute bound within a single operator (e.g., ORDER BY or an unconstrained
* cross-product JOIN), the query will not be checked for termination. This
* priority queue is used to ensure that the query deadline is tested even
* though it may be in a compute bound operator.
*
* If the deadline has expired, {@link IRunningQuery#cancel(boolean)} will
* be invoked. In order for a compute bound operator to terminate in a
* timely fashion, it MUST periodically test {@link Thread#interrupted()}.
*
* Note: The deadline of a query may be set at most once. Thus, a query
* which is entered into the {@link #deadlineQueue} may not have its
* deadline modified. This means that we do not have to search the priority
* queue for an existing reference to the query. It also means that we are
* able to store an object that wraps the query with a {@link WeakReference}
* and thus can avoid pinning the query on the heap until its deadline
* expires. That means that we do not need to remove an entry from the
* deadline queue each time a query terminates, but we do need to
* periodically trim the queue to ensure that queries with distant deadlines
* do not hang around in the queue for long periods of time after their
* deadline has expired. This can be done by scanning the queue and removing
* all entries whose {@link WeakReference} has been cleared.
*
* @see
* Query timeout only checked at operator start/stop.
*/
final private PriorityBlockingQueue deadlineQueue = new PriorityBlockingQueue();
/**
* Queries with a deadline that lies significantly in the future can lie
* around in the priority queue until that deadline is reached if there are
* other queries in front of them that are not terminated and whose deadline
* has not be reached. Therefore, periodically, we need to scan the queue
* and clear out entries for terminated queries. This is done any time the
* size of the queue is at least this many elements when we examine the
* queue in {@link #checkDeadlines()}.
*/
final static private int DEADLINE_QUEUE_SCAN_SIZE = 200;
/**
* The maximum granularity before we will check the deadline priority queue
* for queries that need to be terminated because their deadline has
* expired.
*/
final static private long DEADLINE_CHECK_MILLIS = 100;
/**
* Add the query to the deadline priority queue
*
* @exception IllegalArgumentException
* if the query deadline has not been set.
*
* @see
* Query timeout only checked at operator start/stop.
*/
void addQueryToDeadlineQueue(final AbstractRunningQuery query) {
final long deadline = query.getDeadline();
if (deadline == Long.MAX_VALUE) {
/*
* Do not allow queries with an unbounded deadline into the priority
* queue.
*/
throw new IllegalArgumentException();
}
final long deadlineNanos = TimeUnit.MILLISECONDS.toNanos(deadline);
deadlineQueue.add(new QueryDeadline(deadlineNanos, query));
}
/**
* Scan the priority queue of queries with a specified deadline, halting any
* queries whose deadline has expired.
*/
static private void checkDeadlines(final long nowNanos,
final PriorityBlockingQueue deadlineQueue) {
/*
* While the queue is thread safe, we want at most one thread at a time
* to be inspecting the queue for queries whose deadlines have expired.
*/
synchronized (deadlineQueue) {
/*
* Check the head of the deadline queue for any queries whose
* deadline has expired.
*/
checkHeadOfDeadlineQueue(nowNanos, deadlineQueue);
if (deadlineQueue.size() > DEADLINE_QUEUE_SCAN_SIZE) {
/*
* Scan the deadline queue, removing entries for expired
* queries.
*/
scanDeadlineQueue(nowNanos, deadlineQueue);
}
}
}
/**
* Check the head of the deadline queue for any queries whose deadline has
* expired.
*/
static private void checkHeadOfDeadlineQueue(final long nowNanos,
final PriorityBlockingQueue deadlineQueue) {
QueryDeadline x;
// remove the element at the head of the queue.
while ((x = deadlineQueue.poll()) != null) {
// test for query done or deadline expired.
if (x.checkDeadline(nowNanos) == null) {
/*
* This query is known to be done. It was removed from the
* priority queue above. We need to check the next element in
* the priority order to see whether it is also done.
*/
continue;
}
if (x.deadlineNanos > nowNanos) {
/*
* This query has not yet reached its deadline. That means that
* no other query in the deadline queue has reached its
* deadline. Therefore we are done for now.
*/
// Put the query back on the deadline queue.
deadlineQueue.add(x);
break;
}
}
}
/**
* Queries with a deadline that lies significantly in the future can lie
* around in the priority queue until that deadline is reached if there are
* other queries in front of them that are not terminated and whose deadline
* has not be reached. Therefore, periodically, we need to scan the queue
* and clear out entries for terminated queries.
*/
static private void scanDeadlineQueue(final long nowNanos,
final PriorityBlockingQueue deadlineQueue) {
final List c = new ArrayList(
DEADLINE_QUEUE_SCAN_SIZE);
// drain up to that many elements.
deadlineQueue.drainTo(c, DEADLINE_QUEUE_SCAN_SIZE);
int ndropped = 0, nrunning = 0;
for (QueryDeadline x : c) {
if (x.checkDeadline(nowNanos) != null) {
// return this query to the deadline queue.
deadlineQueue.add(x);
nrunning++;
} else {
ndropped++;
}
}
if (log.isInfoEnabled())
log.info("Scan: threadhold=" + DEADLINE_QUEUE_SCAN_SIZE
+ ", ndropped=" + ndropped + ", nrunning=" + nrunning
+ ", deadlineQueueSize=" + deadlineQueue.size());
}
/**
*
* @param localIndexManager
* The local index manager.
*/
public QueryEngine(final IIndexManager localIndexManager) {
if (localIndexManager == null)
throw new IllegalArgumentException();
this.localIndexManager = localIndexManager;
// this.iopool = new LatchedExecutor(indexManager.getExecutorService(),
// nThreads);
// this.iopool = Executors.newFixedThreadPool(nThreads,
// new DaemonThreadFactory(getClass().getName()));
// this.fjpool = new ForkJoinPool();
}
/**
* Initialize the {@link QueryEngine}. It will accept binding set chunks and
* run them against running queries until it is shutdown.
*/
public void init() {
final FutureTask ft = new FutureTaskMon(new QueryEngineTask(
priorityQueue, deadlineQueue), (Void) null);
if (engineFuture.compareAndSet(null/* expect */, ft)) {
engineService.set(Executors
.newSingleThreadExecutor(new DaemonThreadFactory(
QueryEngine.class + ".engineService")));
engineService.get().execute(ft);
} else {
throw new IllegalStateException("Already running");
}
}
/**
* {@link QueryEngine}s are used with a singleton pattern managed by the
* {@link QueryEngineFactory}. They are torn down automatically once they
* are no longer reachable. This behavior depends on not having any hard
* references back to the {@link QueryEngine}.
*/
@Override
protected void finalize() throws Throwable {
shutdownNow();
super.finalize();
}
/**
* The service on which we run the query engine. This is started by {@link #init()}.
*/
private final AtomicReference engineService = new AtomicReference();
/**
* The {@link Future} for the query engine. This is set by {@link #init()}.
*/
private final AtomicReference> engineFuture = new AtomicReference>();
/**
* Volatile flag is set for normal termination. When set, no new queries
* will be accepted but existing queries will run to completion.
*/
private volatile boolean shutdown = false;
/**
* Return if the query engine is running.
*
* @throws IllegalStateException
* if the query engine is shutting down.
*/
protected void assertRunning() {
if (engineFuture.get() == null)
throw new IllegalStateException("Not initialized.");
if (shutdown)
throw new IllegalStateException("Shutting down.");
}
protected boolean isRunning() {
return engineFuture.get() != null && !shutdown;
}
/**
* Executes the {@link Runnable} on the local {@link IIndexManager}'s
* {@link ExecutorService}.
*
* @param r
* The {@link Runnable}.
*/
final protected void execute(final Runnable r) {
localIndexManager.getExecutorService().execute(r);
}
/**
* Runnable submits chunks available for evaluation against running queries.
*
* Note: This is a static inner class in order to avoid a hard reference
* back to the outer {@link QueryEngine} object. This makes it possible
* for the JVM to finalize the {@link QueryEngine} if the application no
* longer holds a hard reference to it. The {@link QueryEngine} is then
* automatically closed from within its finalizer method.
*/
static private class QueryEngineTask implements Runnable {
final private BlockingQueue priorityQueue;
final private PriorityBlockingQueue deadlineQueue;
public QueryEngineTask(
final BlockingQueue priorityQueue,
final PriorityBlockingQueue deadlineQueue) {
if (priorityQueue == null)
throw new IllegalArgumentException();
if (deadlineQueue == null)
throw new IllegalArgumentException();
this.priorityQueue = priorityQueue;
this.deadlineQueue = deadlineQueue;
}
@Override
public void run() {
if(log.isInfoEnabled())
log.info("Running: " + this);
try {
final long deadline = TimeUnit.MILLISECONDS
.toNanos(DEADLINE_CHECK_MILLIS);
long mark = System.nanoTime();
long remaining = deadline;
while (true) {
try {
//log.warn("Polling deadline queue: remaining="+remaining+", deadlinkCheckMillis="+DEADLINE_CHECK_MILLIS);
final AbstractRunningQuery q = priorityQueue.poll(
remaining, TimeUnit.NANOSECONDS);
final long now = System.nanoTime();
if ((remaining = deadline - (now - mark)) < 0) {
//log.error("Checking deadline queue");
/*
* Check for queries whose deadline is expired.
*
* Note: We only do this every DEADLINE_CHECK_MILLIS
* and then reset [mark] and [remaining].
*
* Note: In queue.pool(), we only wait only up to
* the [remaining] time before the next check in
* queue.poll().
*/
checkDeadlines(now, deadlineQueue);
mark = now;
remaining = deadline;
}
// Consume chunk already on queue for this query.
if (q != null && !q.isDone())
q.consumeChunk();
} catch (InterruptedException e) {
/*
* Note: Uncomment the stack trace here if you want to
* find where the query was interrupted.
*
* Note: If you want to find out who interrupted the
* query, then you can instrument BlockingBuffer#close()
* in PipelineOp#newBuffer(stats).
*/
if (log.isInfoEnabled())
log.info("Interrupted."
// ,e
);
return;
} catch (Throwable t) {
// log and continue
log.error(t, t);
continue;
}
} // while(true)
} finally {
if (log.isInfoEnabled())
log.info("QueryEngineTask is done.");
}
}
} // QueryEngineTask
/**
* Add a chunk of intermediate results for consumption by some query. The
* chunk will be attached to the query and the query will be scheduled for
* execution.
*
* @param msg
* A chunk of intermediate results.
*
* @return true
if the chunk was accepted. This will return
* false
if the query is done (including cancelled) or
* the query engine is shutdown. The {@link IChunkMessage} will have
* been {@link IChunkMessage#release() released} if it was not
* accepted.
*
* @throws IllegalArgumentException
* if the chunk is null
.
* @throws IllegalStateException
* if the chunk is not materialized.
*/
protected boolean acceptChunk(final IChunkMessage msg) {
if (msg == null)
throw new IllegalArgumentException();
if (!msg.isMaterialized())
throw new IllegalStateException();
final AbstractRunningQuery q = getRunningQuery(msg.getQueryId());
if(q == null) {
/*
* The query is not registered on this node.
*/
throw new IllegalStateException();
}
// add chunk to the query's input queue on this node.
if (!q.acceptChunk(msg)) {
// query is no longer running.
msg.release();
return false;
}
if(!isRunning()) {
// query engine is no longer running.
msg.release();
return false;
}
// add query to the engine's task queue.
priorityQueue.add(q);
return true;
}
/**
* Shutdown the {@link QueryEngine} (blocking). The {@link QueryEngine} will
* not accept new queries, but existing queries will run to completion.
*/
public void shutdown() {
// normal termination.
shutdown = true;
lock.lock();
try {
while (!runningQueries.isEmpty()) {
try {
nothingRunning.await();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
} finally {
lock.unlock();
}
// hook for subclasses.
didShutdown();
// stop the query engine.
final Future> f = engineFuture.get();
if (f != null) {
if(log.isInfoEnabled())
log.info("Cancelling engineFuture: "+this);
f.cancel(true/* mayInterruptIfRunning */);
}
// stop the service on which we ran the query engine.
final ExecutorService s = engineService.get();
if (s != null) {
if(log.isInfoEnabled())
log.info("Terminating engineService: "+this);
s.shutdownNow();
}
final HttpClient cm = clientConnectionManagerRef.get();
if (cm != null) {
if (log.isInfoEnabled())
log.info("Terminating HttpClient: " + this);
try {
cm.stop();
} catch (Exception e) {
log.error("Problem shutting down HttpClient", e);
}
}
// clear the queues
priorityQueue.clear();
deadlineQueue.clear();
// clear references.
engineFuture.set(null);
engineService.set(null);
clientConnectionManagerRef.set(null);
}
/**
* Hook is notified by {@link #shutdown()} when all running queries have
* terminated.
*/
protected void didShutdown() {
}
/**
* Do not accept new queries and halt any running binding set chunk tasks.
*/
public void shutdownNow() {
shutdown = true;
/*
* Stop the QueryEngineTask: this is the task that accepts chunks that
* are available for evaluation and assigns them to the
* AbstractRunningQuery.
*/
final Future> f = engineFuture.get();
if (f != null) {
if (log.isInfoEnabled())
log.info("Cancelling engineFuture: " + this);
f.cancel(true/* mayInterruptIfRunning */);
}
// stop the service on which we ran the QueryEngineTask.
final ExecutorService s = engineService.get();
if (s != null) {
if (log.isInfoEnabled())
log.info("Terminating engineService: "+this);
s.shutdownNow();
}
final HttpClient cm = clientConnectionManagerRef.get();
if (cm != null) {
if (log.isInfoEnabled())
log.info("Terminating HttpClient: " + this);
try {
cm.stop();
} catch (Exception e) {
log.error("Problem stopping HttpClient", e);
}
}
// halt any running queries.
for(AbstractRunningQuery q : runningQueries.values()) {
q.cancel(true/*mayInterruptIfRunning*/);
}
// clear the queues
priorityQueue.clear();
deadlineQueue.clear();
// clear references.
engineFuture.set(null);
engineService.set(null);
clientConnectionManagerRef.set(null);
}
/*
* IQueryPeer
*/
@Override
@Deprecated // see IQueryClient
public void declareQuery(final IQueryDecl queryDecl) throws RemoteException {
throw new UnsupportedOperationException();
}
@Override
public void bufferReady(final IChunkMessage msg) {
throw new UnsupportedOperationException();
}
/**
* {@inheritDoc}
*
* The default implementation is a NOP.
*/
@Override
public void cancelQuery(final UUID queryId, final Throwable cause) {
// NOP
}
/*
* IQueryClient
*/
@Override
public PipelineOp getQuery(final UUID queryId) {
final AbstractRunningQuery q = getRunningQuery(queryId);
if (q == null)
throw new IllegalArgumentException();
return q.getQuery();
}
@Override
public void startOp(final IStartOpMessage msg) throws RemoteException {
final AbstractRunningQuery q = getRunningQuery(msg.getQueryId());
if (q != null) {
q.startOp(msg);
}
}
@Override
public void haltOp(final IHaltOpMessage msg) throws RemoteException {
final AbstractRunningQuery q = getRunningQuery(msg.getQueryId());
if (q != null) {
q.haltOp(msg);
}
}
// /**
// * Return an {@link IAsynchronousIterator} that will read a single, empty
// * {@link IBindingSet}.
// */
// private static ThickAsynchronousIterator newBindingSetIterator() {
//
// return newBindingSetIterator(new ListBindingSet());
//
// }
// /**
// * Return an {@link IAsynchronousIterator} that will read a single
// * {@link IBindingSet}.
// *
// * @param bindingSet
// * the binding set.
// */
// private static ThickAsynchronousIterator newBindingSetIterator(
// final IBindingSet bindingSet) {
//
// return new ThickAsynchronousIterator(
// new IBindingSet[][] { new IBindingSet[] { bindingSet } });
//
// }
// /**
// * Return an {@link IAsynchronousIterator} that will read the source
// * {@link IBindingSet}s.
// *
// * @param bsets
// * The source binding sets.
// */
// private static ThickAsynchronousIterator newBindingSetIterator(
// final IBindingSet[] bsets) {
//
// return new ThickAsynchronousIterator(
// new IBindingSet[][] { bsets });
//
// }
/** Use a random UUID unless the UUID was specified on the query. */
private static UUID getQueryUUID(final BOp op) {
return op.getProperty(QueryEngine.Annotations.QUERY_ID,
UUID.randomUUID());
}
/**
* Return the starting point for pipeline evaluation/
*/
private int getStartId(final BOp op) {
final BOp startOp = BOpUtility.getPipelineStart(op);
final int startId = startOp.getId();
return startId;
}
private LocalChunkMessage newLocalChunkMessage(final UUID queryId,
final BOp op, final IBindingSet src) {
return new LocalChunkMessage(this/* queryEngine */, queryId,
getStartId(op), -1 /* partitionId */, src);
}
private LocalChunkMessage newLocalChunkMessage(final UUID queryId,
final BOp op, final IBindingSet[] src) {
return new LocalChunkMessage(this/* queryEngine */, queryId,
getStartId(op), -1 /* partitionId */, src);
}
private LocalChunkMessage newLocalChunkMessage(final UUID queryId,
final BOp op, final IBindingSet[][] src) {
return new LocalChunkMessage(this/* queryEngine */, queryId,
getStartId(op), -1 /* partitionId */, src);
}
// /**
// * Return a {@link LocalChunkMessage} for the query wrapping the specified
// * source.
// *
// * @param queryId
// * The query's {@link UUID}.
// * @param op
// * The query.
// * @param solutionCount
// * The #of solutions which can be drained from that source.
// * @param src
// * The source to be wrapped.
// *
// * @return The message.
// *
// * @deprecated We are trying to get the {@link IAsynchronousIterator} out
// * of the API here.
// */
// private LocalChunkMessage newLocalChunkMessage(final UUID queryId,
// final BOp op, final int solutionCount,
// final IAsynchronousIterator src) {
//
// return new LocalChunkMessage(this/* queryEngine */, queryId,
// getStartId(op), -1 /* partitionId */, solutionCount, src);
//
// }
/**
* Evaluate a query. This node will serve as the controller for the query.
*
* @param query
* The query to evaluate.
*
* @return The {@link IRunningQuery}.
*
* @throws IllegalStateException
* if the {@link QueryEngine} has been {@link #shutdown()}.
* @throws Exception
*/
public AbstractRunningQuery eval(final BOp op) throws Exception {
return eval(op, new ListBindingSet());
}
/**
* Evaluate a query. This node will serve as the controller for the query.
*
* @param query
* The query to evaluate.
* @param bset
* The initial binding set to present.
*
* @return The {@link IRunningQuery}.
*
* @throws IllegalStateException
* if the {@link QueryEngine} has been {@link #shutdown()}.
* @throws Exception
*/
public AbstractRunningQuery eval(final BOp op, final IBindingSet bset)
throws Exception {
final UUID queryId = getQueryUUID(op);
return eval(queryId, (PipelineOp) op, null/* attributes */,
newLocalChunkMessage(queryId, op, bset));
}
/**
* Note: Used only by the test suite.
*/
public AbstractRunningQuery eval(final UUID queryId, final BOp op,
final IBindingSet bset) throws Exception {
return eval(queryId, (PipelineOp) op, null/* attributes */,
newLocalChunkMessage(queryId, op, bset));
}
/**
* Note: Used only by the test suite.
*/
public AbstractRunningQuery eval(final UUID queryId, final BOp op,
final Map