![JAR search and dependency download from the Maven repository](/logo.png)
com.bigdata.bop.fed.FederatedRunningQuery Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Sep 6, 2010
*/
package com.bigdata.bop.fed;
import java.rmi.RemoteException;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.log4j.Logger;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpEvaluationContext;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.engine.ChunkedRunningQuery;
import com.bigdata.bop.engine.IChunkHandler;
import com.bigdata.bop.engine.IChunkMessage;
import com.bigdata.bop.engine.IQueryClient;
import com.bigdata.bop.engine.IQueryPeer;
import com.bigdata.bop.engine.QueryEngine;
import com.bigdata.bop.engine.StaticAnalysisStats;
import com.bigdata.io.DirectBufferPoolAllocator.IAllocationContext;
import com.bigdata.service.IBigdataFederation;
/**
* Extends {@link ChunkedRunningQuery} to provide additional state and logic required
* to support distributed query evaluation against an {@link IBigdataFederation}
* .
*
* @author Bryan Thompson
* @version $Id: FederatedRunningQuery.java 3511 2010-09-06 20:45:37Z
* thompsonbry $
*/
public class FederatedRunningQuery extends ChunkedRunningQuery {
private final static transient Logger log = Logger
.getLogger(FederatedRunningQuery.class);
/**
* The {@link UUID} of the service which is the {@link IQueryClient} running
* this query.
*/
/*private*/ final UUID queryControllerUUID;
/**
* Used to map {@link IBindingSet}s across the federation.
*/
private final IChunkHandler chunkHandler;
/**
* A map of all allocation contexts associated with this query.
*
* FIXME Separate out the life cycle of the allocation from the allocation
* context. This is necessary so we can send chunks to multiple receivers or
* retry transmission if a receiver dies.
*
* FIXME When a chunk is transferred to the receiver, its allocation(s) must
* be released on the sender. That should be done based on the life cycle of
* the allocation. This implies that we should maintain a list for each life
* cycle so they can be cleared without scanning the set of all allocations.
*
* FIXME When the query is done, those allocations MUST be released
* (excepting only allocations associated with the solutions which need to
* be released as those data are consumed).
*
*
* Query terminates: releaseAllocationContexts(queryId)
* BOp terminates: releaseAllocationContexts(queryId,bopId)
* BOp+shard terminates: releaseAllocationContexts(queryId,bopId,shardId)
* receiver takes data: releaseAllocations(IChunkMessage)
*
*
* The allocation contexts which use a bopId MUST use the bopId of the
* operator which will consume the chunk. That way the producer can release
* outputs buffered for that consumer as they are transferred to the
* consumer. [If we leave the allocations in place until the bop evaluates
* the chunk then we could retry if the node running the bop fails.]
*
* Allocations which are tied to the life cycle of a {@link BOp}, rather the
* the chunks processed for that {@link BOp}, should use (queryId,bopId).
* For example, a DHT imposing distinct on a default graph access path has a
* life cycle linked to the join reading on that access path (not the
* predicate since the predicate is not part of the pipeline and is
* evaluated by the join rather than the query engine).
*
* Allocations tied to the life cycle of a query will not be released until
* the query terminates. For example, a temporary graph containing the
* ontology for a parallel distributed closure operation.
*/
private final ConcurrentHashMap allocationContexts = new ConcurrentHashMap();
/**
* Extended to release all allocations associated with the specified
* operator.
*
* {@inheritDoc}
*/
@Override
protected void releaseNativeMemoryForOperator(final int bopId) {
final Iterator> itr = allocationContexts
.entrySet().iterator();
while (itr.hasNext()) {
final Map.Entry e = itr
.next();
if (e.getKey().hasOperatorScope(bopId)) {
e.getValue().release();
}
}
}
/**
* Extended to release all {@link IAllocationContext}s associated with the
* query when it terminates.
*
* {@inheritDoc}
*/
@Override
protected void releaseNativeMemoryForQuery() {
for(IAllocationContext ctx : allocationContexts.values()) {
ctx.release();
}
allocationContexts.clear();
super.releaseNativeMemoryForQuery();
}
public FederatedRunningQuery(final FederatedQueryEngine queryEngine,
final UUID queryId, final boolean controller,
final IQueryClient clientProxy, final UUID queryControllerId,
final PipelineOp query, final IChunkMessage realSource) {
super(queryEngine, queryId, /* begin, */controller, clientProxy, query,
realSource);
if (queryControllerId == null)
throw new IllegalArgumentException();
this.queryControllerUUID = queryControllerId;
this.chunkHandler = query.getProperty(
FederatedQueryEngine.Annotations.CHUNK_HANDLER,
FederationChunkHandler.INSTANCE);
if (!getQuery().getEvaluationContext().equals(
BOpEvaluationContext.CONTROLLER)) {
/*
* In scale-out, the top-level operator in the query plan must be
* evaluated on the query controller in order for the solutions to
* be transferred to the query controller where they may be consumed
* by the client.
*/
throw new RuntimeException(
"The top-level of a query must be evaluated on the query controller: query="
+ getQuery());
}
// peers = isController() ? new ConcurrentHashMap()
// : null;
}
@Override
public FederatedQueryEngine getQueryEngine() {
return (FederatedQueryEngine) super.getQueryEngine();
}
/**
* Overridden to make this visible to the {@link FederatedQueryEngine}.
*
* {@inheritDoc}
*/
@Override
protected boolean acceptChunk(final IChunkMessage msg) {
return super.acceptChunk(msg);
}
/**
* Resolve the proxy for an {@link IQueryPeer}. This is special cased for
* both this service (the actual reference is returned) and the query
* controller (we use an alternative path to discover the query controller
* since it might not be registered against a lookup service if it is not a
* data service).
*
* @param serviceUUID
* The service identifier for the peer.
*
* @return The proxy for the service, the actual {@link QueryEngine}
* reference if the identified service is this service, or
* null
if the service could not be discovered.
*/
public IQueryPeer getQueryPeer(final UUID serviceUUID) {
if (serviceUUID == null)
throw new IllegalArgumentException();
final IQueryPeer queryPeer;
final FederatedQueryEngine queryEngine = getQueryEngine();
if(serviceUUID.equals(queryEngine.getServiceUUID())) {
/*
* Return a hard reference to this query engine (NOT a proxy).
*
* Note: This is used to avoid RMI when the message will be consumed
* by the service which produced that message. This is a deliberate
* performance optimization which is supported by all of the data
* structures involved.
*/
queryPeer = queryEngine;
// if(log.isTraceEnabled()) log.trace("Target is self: "+serviceUUID);
} else if (serviceUUID.equals(queryControllerUUID)) {
// The target is the query controller.
queryPeer = getQueryController();
// if(log.isTraceEnabled()) log.trace("Target is controller: "+serviceUUID);
} else {
// The target is some data service.
// if(log.isTraceEnabled()) log.trace("Target is peer: "+serviceUUID);
queryPeer = queryEngine.getQueryPeer(serviceUUID);
}
return queryPeer;
}
/**
* Return the {@link IAllocationContext} for the given key.
*
* @param key
* The key.
*
* @return The allocation context.
*/
public IAllocationContext getAllocationContext(
final AllocationContextKey key) {
final IAllocationContext ctx = getQueryEngine().getResourceService()
.getAllocator().getAllocationContext(key);
// note the allocation contexts associated with this running query.
final IAllocationContext ctx2 = allocationContexts
.putIfAbsent(key, ctx);
if (ctx2 != null)
return ctx2;
return ctx;
}
/**
* Overridden to notify each peer on which the query was started.
*
* {@inheritDoc}
*/
@Override
protected boolean cancelQueryOnPeers(final Throwable cause,
final Set startedOn) {
boolean cancelled = super.cancelQueryOnPeers(cause, startedOn);
final UUID queryId = getQueryId();
for (UUID serviceId : startedOn) {
final IQueryPeer peer = getQueryPeer(serviceId);
try {
peer.cancelQuery(queryId, cause);
} catch (RemoteException e) {
/*
* If we do not manage to notify the peer then the peer will not
* release resources assigned to the query in a timely manner.
* However, assuming the peer eventually manages to send some
* data to another node which has been notified (or to the
* controller), then the peer will receive a notice that the
* query either has been cancelled or is unknown. Either way,
* the peer will eventually release its resources allocated to
* the query once it reconnects.
*/
log.error("Could not notify: " + peer, e);
}
cancelled = true;
}
return cancelled;
}
@Override
protected IChunkHandler getChunkHandler() {
return chunkHandler;
}
/**
* Overridden to broadcast to all nodes and/or shards on which the operator
* has run in scale-out. Broadcast will be to nodes if the operator is hash
* partitioned. Broadcast will be to shards if the operator is key-range
* partitioned.
*
* {@inheritDoc}
*
* @see BOpEvaluationContext
*/
@Override
protected void doLastPass(final int bopId, final Set doneOn) {
final BOp op = getBOp(bopId);
switch(op.getEvaluationContext()) {
case CONTROLLER:
super.doLastPass(bopId, doneOn);
return;
case ANY:
case HASHED: {
/*
* For both ANY and HASHED we want to send a last pass message to
* each service on which the operator was evaluated. For ANY, the
* set of such services will be discovered dynamically as the query
* is evaluated. For HASHED, the set of such services will be
* defined by the time we start evaluation of that operator.
*/
for (UUID serviceId : ((Set) doneOn)) {
final IChunkMessage emptyMessage = new EmptyChunkMessage(
getQueryController(), getQueryId(), bopId,
-1/* shardId */, true/* lastInvocation */);
try {
final IQueryPeer queryPeer = getQueryPeer(serviceId);
queryPeer.bufferReady(emptyMessage);
} catch (RemoteException e) {
throw new RuntimeException(
"Could not send message: serviceId=" + serviceId
+ " : " + e, e);
}
}
return;
}
case SHARDED: {
/*
* Generate a last pass message for each shard on which the operator
* was invoked.
*/
for (Integer shardId : ((Set) doneOn)) {
final IChunkMessage emptyMessage = new EmptyChunkMessage(
getQueryController(), getQueryId(), bopId, shardId,
true/* lastInvocation */);
acceptChunk(emptyMessage);
}
return;
}
default:
throw new AssertionError();
}
}
}