com.bigdata.service.ndx.pipeline.AbstractMasterTask Maven / Gradle / Ivy
/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Apr 16, 2009
*/
package com.bigdata.service.ndx.pipeline;
import java.util.Iterator;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.log4j.Logger;
import com.bigdata.btree.keys.KVO;
import com.bigdata.mdi.PartitionLocator;
import com.bigdata.relation.accesspath.BlockingBuffer;
import com.bigdata.relation.accesspath.BufferClosedException;
import com.bigdata.relation.accesspath.IAsynchronousIterator;
import com.bigdata.resources.StaleLocatorException;
import com.bigdata.service.Split;
import com.bigdata.service.ndx.ISplitter;
import com.bigdata.util.concurrent.AbstractHaltableProcess;
/**
* Abstract base class for a master task which consumes chunks of elements
* written onto a {@link BlockingBuffer} and distributes those chunks to
* subtasks according to some abstraction which is not defined by this class.
*
* Design discussion
*
* The asynchronous write API exposes a blocking buffer to the application which
* accepts concurrent writes of {@link KVO}[] chunks, in which each {@link KVO}
* represents a tuple to be written on a scale-out index. The buffer is drained
* by a master task, which transfers chunks to sinks tasks, each of which writes
* on a specific index partition.
*
* The master is provisioned with a CTOR which is used to convert the
* {@link KVO}s into unsigned byte[]
keys and
* byte[]
values for writes on the scale-out index. The
* master may be provisioned with a mechanism to filter out duplicate tuples.
*
* Writes on the index partitions are asynchronous with respect to the
* application. However, a {@link KVOC} / {@link KVOLatch} combination can be
* used to coordinate notification when some set of tuples of interest have been
* successfully written onto the scale-out index. This combination can also be
* used to pass back values from the write operation if they are assigned by
* side-effect onto the {@link KVO#obj} reference.
*
* The asynchronous write implementation is divided into a master, with an input
* queue and a redirect queue, and sinks, each of which has an input queue and
* writes chunks onto a specific index partition for the scale-out index. The
* input queues for the master and the sinks are bounded. The redirect input
* queue is unbounded. The master and each sink is assigned its own worker
* thread.
*
* The master transfers chunks from its input queue to the sinks. It polls the
* redirect queue for a chunk. If that queue was empty, then it polls a chunk
* from the input queue. If no chunks are available, it needs to check again.
* The master stops polling the input queue when the input queue is closed, but
* it continues to drain the redirect queue until all sinks are done or the
* master is canceled.
*
* Note: The requirement for polling arises because: (a) we are not coordinating
* signals for the arrival of a chunk on the input or redirect queues; and (b) a
* chunk can be redirected at any time if there is an outstanding write by a
* sink on an index partition.
*
* The atomic decision to terminate the master is made using a {@link #lock}.
* The lock is specific to the life cycle of the sinks. The lock is held when a
* sink is created. When a sink terminates, its last action is to grab the lock
* and signal the {@link #subtaskDone} {@link Condition}. The master terminates
* when, while holding the lock, it observes that no sinks are running AND the
* redirect queue is empty. Since chunks are placed onto the redirect queue by
* sinks (and there are no sinks running) and by the master (which is not
* issuing a redirect since it is running its termination logic) these criteria
* are sufficient for termination. However, the sink must ensure that its buffer
* is closed before it terminates, even if it terminates by exception, so that
* an attempt to transfer a chunk to the sink will not block forever.
*
* Once the master is holding a chunk, it splits the chunk into a set of dense
* chunks correlated with the locators of the index partitions on which those
* chunks will be written. The split operation is NOT atomic, but it is
* consistent in the following sense. If a {@link Split} is identified based on
* old information, then the chunk will be directed to an index partition which
* no longer exists. An attempt to write on that index partition will result in
* a stale locator exception, which is handled.
*
* Once the chunk has been split, the split chunks are transferred to the
* appropriate sink(s). Since the master is not holding any locks, a blocking
* put() may be used to transfer the chunk to sink.
*
* The sink drain chunks from its input queue. If the input queue is empty and
* the idle timeout expires before a chunk is transferred to the input queue,
* then the sink will be asynchronously closed and an
* {@link IdleTimeoutException} will be set on the input queue. If the master
* attempts to transfer a chunk to the sink's input queue after the idle
* timeout, then an exception wrapping the idle timeout exception will be
* thrown. The master handles the wrapped idle timeout exception by re-opening
* the sink and will retry the transfer of the chunk to the (new) sink's input
* queue. After the sink closes it's input queue by an idle timeout, it will
* continue to drain the input queue until it is empty, at which point the sink
* will terminate (this handles the case where the master concurrently
* transferred a chunk to the sink's input queue before it was closed by the
* idle time out).
*
* The sink combines chunks drained from its input queue until the target chunk
* size for a write is achieved or until the chunk timeout for the sink is
* exceeded. The sink then writes on the index partition corresponding to its
* locator. This write occurs in the thread assigned to the sink and the sink
* will block during the write request.
*
* If a stale locator exception is received by the sink in response to a write,
* it will: (a) notify the client of the stale locator exception; (b) close the
* input queue, setting the stale locator exception as the cause; (c) place the
* chunk for that write onto the master's (unbounded) redirect queue; and (d)
* drain its input queue and transfer the chunks to the master's redirect queue.
*
* If the master attempts to transfer a chunk to the input queue for a sink
* which has closed its input queue in response to a stale locator exception,
* then an exception will be thrown with the stale locator exception as the
* inner cause. The master will trap that exception and place the chunk on the
* redirect queue instead.
*
* If the sink RMI is successful, the sink will invoke the optional result
* handler and touch each tuple in the chunk using KVO#done(). These protocols
* can be used to pass results from asynchronous writes back to the application.
*
* @param
* The generic type of the value returned by {@link Callable#call()}
* for the master.
* @param
* The generic type of the elements in the chunks stored in the
* {@link BlockingBuffer}.
* @param
* The generic type of the subtask implementation class.
* @param
* The generic type of the locator object used to lookup a subtask in
* the internal map (must be unique and must implement hashCode() and
* equals() per their contracts).
*
* @author Bryan Thompson
* @version $Id$
* @see ISplitter
*
* @todo Update javadoc to reflect that the master no longer waits for a closed
* sink in {@link #getSink(Object, boolean)} but instead places the sink
* onto the {@link #finishedSubtaskQueue}.
*/
public abstract class AbstractMasterTask/
H extends AbstractMasterStats, //
E, //
S extends AbstractSubtask,//
L>//
extends AbstractHaltableProcess implements Callable, IMasterTask {
static protected transient final Logger log = Logger
.getLogger(AbstractMasterTask.class);
/**
* The top-level buffer on which the application is writing.
*/
protected final BlockingBuffer buffer;
/**
* A unbounded queue of chunks for which a {@link StaleLocatorException} was
* received. When this buffer is not empty, it is drained by preference over
* the {@link #buffer}.
*
* This design allows us to avoid deadlocks when a sink is full and the
* master is blocked attempting to add another chunk to that sink. If a
* {@link StaleLocatorException} is thrown for the outstanding write by that
* sink, then this situation would deadlock since the {@link #lock} is
* already held and the sink is unable to drain.
*/
private final BlockingQueue redirectQueue = new LinkedBlockingQueue(/* unbounded */);
/**
* The #of chunks on the master's redirectQueue.
*/
public final int getRedirectQueueSize() {
return redirectQueue.size();
}
/**
* Places a chunk onto the master's redirectQueue.
*
* @param chunk
* The chunk.
*
* @throws InterruptedException
*/
protected final void redirectChunk(final E[] chunk)
throws InterruptedException {
/*
* @todo Acquiring the lock here should not be (and probably is not)
* necessary for the master's atomic termination condition.
*/
lock.lockInterruptibly();
try {
redirectQueue.put(chunk);
} finally {
lock.unlock();
}
}
public BlockingBuffer getBuffer() {
return buffer;
}
/**
* The iterator draining the {@link #buffer}.
*
* Note: DO NOT close this iterator from within {@link #call()} as that
* would cause this task to interrupt itself!
*/
protected final IAsynchronousIterator src;
/**
* Map from the index partition identifier to the open subtask handling
* writes bound for that index partition.
*/
private final ConcurrentHashMap sinks;
/**
* Maps an operation across the subtasks.
*
* @param op
* The operation, which should be light weight
*
* @throws InterruptedException
* @throws ExecutionException
* if a subtask throws an exception.
*/
public void mapOperationOverSubtasks(final SubtaskOp op)
throws InterruptedException, ExecutionException {
final Iterator itr = sinks.values().iterator();
while(itr.hasNext()) {
try {
op.call(itr.next());
} catch (Exception ex) {
throw new ExecutionException(ex);
}
}
}
/**
* Lock used for sink life cycle events only.
*/
private final ReentrantLock lock = new ReentrantLock();
/**
* Condition is signaled by a subtask when it is finished. This is used by
* {@link #awaitAll()} to while waiting for subtasks to complete. If all
* subtasks in {@link #subtasks} are complete when this signal is received
* then the master may terminate.
*/
private final Condition subtaskDone = lock.newCondition();
/**
* A queue of subtasks which have finished running. The master polls this
* queue in {@link #call()} in order to clear finished sinks from
* {@link #sinks} in a timely manner during normal operations.
* {@link #awaitAll()} and {@link #cancelAll(boolean)} both handle this in
* their own way.
*/
private final BlockingQueue finishedSubtaskQueue = new LinkedBlockingQueue();
/**
* Notify the master that a subtask is done. The subtask is placed onto the
* {@link #finishedSubtaskQueue} queue. The master polls that queue in
* {@link #call()} and {@link #awaitAll()} and checks the {@link Future} of
* each finished subtask using {@link #drainFutures()}. If a {@link Future}
* reports an error, then the master is halted. This is how we ensure that
* all subtasks complete normally.
*
* @param subtask
* The subtask.
*
* @throws InterruptedException
*/
protected void notifySubtaskDone(final AbstractSubtask subtask)
throws InterruptedException {
if (subtask == null)
throw new IllegalArgumentException();
if (subtask.buffer.isOpen())
throw new IllegalStateException();
lock.lockInterruptibly();
try {
// atomic transfer from [sinks] to [finishedSubtasks].
moveSinkToFinishedQueueAtomically((L) subtask.locator,
(AbstractSubtask) subtask);
// signal condition (used by awaitAll()).
subtaskDone.signalAll();
} finally {
lock.unlock();
}
}
/**
* Statistics for this (and perhaps other) masters.
*/
public final H stats;
public H getStats() {
return stats;
}
/**
* The timeout in nanoseconds before closing an idle output sink.
*/
protected final long sinkIdleTimeoutNanos;
/**
* The time in nanoseconds that the {@link AbstractSubtask sink} will wait
* inside of the {@link IAsynchronousIterator} when it polls the iterator
* for a chunk. If this value is too large then the sink will block for
* noticeable lengths of time and will be less responsive to interrupts.
* Something in the 10s of milliseconds is appropriate.
*/
protected final long sinkPollTimeoutNanos;
/**
*
* @param stats
* Statistics for the master.
* @param buffer
* The buffer on which data is written by the application and
* from which it is drained by the master.
* @param sinkIdleTimeoutNanos
* The time in nanoseconds after which an idle sink will be
* closed. Any buffered writes are flushed when the sink is
* closed. This must be GTE the sinkChunkTimeout
* otherwise the sink will decide that it is idle when it was
* just waiting for enough data to prepare a full chunk.
* @param sinkPollTimeoutNanos
* The time in nanoseconds that the {@link AbstractSubtask sink}
* will wait inside of the {@link IAsynchronousIterator} when it
* polls the iterator for a chunk. If this value is too large
* then the sink will block for noticeable lengths of time and
* will be less responsive to interrupts. Something in the 10s of
* milliseconds is appropriate.
*
* @todo sinkQueueCapacity, sinkChunkSize, and sinkChunkTimeoutNanos should
* be arguments for this class and a default
* {@link #newSubtaskBuffer()} implementation should be provided. The
* unit tests for the {@link AbstractMasterTask} need to be updated
* for that change, as do the other derived classes.
*/
public AbstractMasterTask(final H stats, final BlockingBuffer buffer,
final long sinkIdleTimeoutNanos, final long sinkPollTimeoutNanos) {
if (stats == null)
throw new IllegalArgumentException();
if (buffer == null)
throw new IllegalArgumentException();
if (sinkIdleTimeoutNanos <= 0)
throw new IllegalArgumentException();
if (sinkPollTimeoutNanos <= 0)
throw new IllegalArgumentException();
this.stats = stats;
this.buffer = buffer;
this.sinkIdleTimeoutNanos = sinkIdleTimeoutNanos;
this.sinkPollTimeoutNanos = sinkPollTimeoutNanos;
this.src = buffer.iterator();
/*
* The current access to this map is relatively limited. It is accessed
* when assembling the performance counters and by the worker thread for
* the master task. The size of the map is the #of index partitions. The
* #of index partitions does not change rapidly, so this map will not be
* resized frequently. The only time there is a rapid change in the #of
* index partitions is when we scatter-split an index.
*/
this.sinks = new ConcurrentHashMap();
stats.addMaster(this);
}
public H call() throws Exception {
/*
* Note: If idle timeouts are allowed then we need to reopen the sink if
* it has closed by a timeout.
*/
final boolean reopen = sinkIdleTimeoutNanos != 0;
try {
/*
* Note: This polls the master's input buffer so it can check the
* redirectQueue in a timely manner.
*/
while (true) {
halted();
drainFutures();
// drain the redirectQueue if not empty.
E[] a = redirectQueue.poll();
if (a == null) {
// poll the master's input queue.
if (src.hasNext(buffer.getChunkTimeout(),
TimeUnit.NANOSECONDS)) {
// drain chunk from the master's input queue.
a = src.next();
} else {
/*
* Nothing available right now.
*/
if (!buffer.isOpen() && buffer.isEmpty()) {
/*
* If the master's input buffer is closed and has
* been drained then we stop polling here, but we
* will continue to drain the redirectQueue in
* awaitAll().
*/
break;
} else {
// Nothing available - poll again.
continue;
}
}
} else {
if (log.isInfoEnabled())
log.info("Read chunk from redirectQueue");
}
// empty chunk?
if (a.length == 0)
continue;
synchronized (stats) {
// update the master stats.
stats.chunksIn.incrementAndGet();
stats.elementsIn.addAndGet(a.length);
}
handleChunk(a, reopen);
} // while(true)
awaitAll();
} catch (Throwable t) {
log.error("Cancelling: job=" + this + ", cause=" + t, t);
try {
cancelAll(true/* mayInterruptIfRunning */);
} catch (Throwable t2) {
log.error(t2);
}
throw new RuntimeException(t);
}
// Done.
return stats;
}
/**
* Handle the next chunk of elements from the {@link #buffer}.
*
* @param chunk
* A chunk.
* @param reopen
* When false
it is an error if the output buffer
* has been closed. When true
the output buffer
* will be (re-)opened as necessary. This will be
* false
when invoked by {@link #call()} (since
* the output buffers are not closed until the master's buffer is
* closed) and should be true
if you are handling
* redirects.
*/
abstract protected void handleChunk(E[] chunk, boolean reopen)
throws InterruptedException;
/**
* Extension hook for implementations where the clients accept work for
* asynchronous processing and notify the master as work items completed
* successfully or fail. The {@link AbstractMasterTask} will not terminate
* unless this method returns true
when queried while holding
* the {@link #lock}. A true return indicates that there are no pending
* work items. The default implementation returns true
.
*
* The work perform by the client must be idempotent (it must be safe to
* re-perform the operation). Pending work items may be in an unknown state,
* the master may submit the same work item to multiple clients (where that
* is permitted by the locator semantics), the client task may fail before
* the work item is complete, and a failed client can cause work items
* associated with that client to be posted to another client.
*
* To handle master termination, the pending set must track outstanding work
* items. Those work item should be removed from the pending set as soon as
* any client has successfully completed that work item (since the work is
* idempotent).
*
* To handle client failure, the subtask must track the pending set for its
* client. If the client dies, then the subtask must handle the client by
* placing all pending work items for that client (including any in the
* chunk for the current request) onto the {@link #redirectQueue}.
*/
protected boolean nothingPending() {
return true;
}
/**
* Extension hook invoked when the master's buffer is exhausted by
* {@link #awaitAll()}. The default implementation is a NOP.
*/
protected void willShutdown() throws InterruptedException {
// NOP.
}
/**
* Await the completion of the writes on each index partition. The master
* will terminate when there are no active subtasks and the redirect queue
* is empty. That condition is tested atomically.
*
* @throws ExecutionException
* This will report the first cause.
* @throws InterruptedException
* If interrupted while awaiting the {@link #lock} or the child
* tasks.
*
* @todo The test suite should include a case where a set of redirects
* appear just as the sinks are processing their last chunk.
*/
private void awaitAll() throws InterruptedException, ExecutionException {
if(buffer.isOpen()) {
/*
* The buffer must be closed as a precondition otherwise the
* application can continue to write data on the master's input
* queue and the termination conditions can not be satisified.
*/
throw new IllegalStateException();
}
/*
* Extension hook may be used to map the pending set over the available
* clients during shutdown.
*/
willShutdown();
while (true) {
halted();
/*
* Note: We need to hold this lock in order to make an decision
* atomic concerning whether or not the master's termination
* conditions have been satisfied.
*
* Note: If we poll the redirectQueue and find that there is another
* chunk to be processed, then we MUST NOT hold the lock when
* processing that chunk. Doing so will lead to a deadlock in
* AbstractSubtask#call() when it tries to grab the lock so that it
* can signal subtaskDone. Therefore we process the chunks outside
* of this code block, once we have released the lock.
*/
final E[] a;
lock.lockInterruptibly();
try {
a = redirectQueue.poll();
if (a == null) {
/*
* There is nothing available from the redirect queue.
*/
if (finishedSubtaskQueue.isEmpty() && sinks.isEmpty()
&& redirectQueue.isEmpty() && nothingPending()) {
/*
* We are done since there are no running sinks, and no
* sinks whose Future we still need to test, and the
* redirectQueue is empty.
*
* Note: We MUST be holding the lock for this
* termination condition to be atomic.
*/
if (log.isInfoEnabled())
log.info("All subtasks are done: " + this);
return;
}
/*
* Check for sinks which have finished.
*/
if (log.isDebugEnabled())
log.debug("Waiting for " + sinks.size()
+ " subtasks : " + this);
drainFutures();
if (!finishedSubtaskQueue.isEmpty()) {
/*
* Yield the lock and wait up to a timeout for a sink to
* complete. We can not wait that long because we are
* still polling the redirect queue!
*
* @todo config timeout
*/
subtaskDone.await(50, TimeUnit.MILLISECONDS);
}
}
} finally {
lock.unlock();
}
if (a != null) {
/*
* Handle a redirected chunk.
*
* Note: We DO NOT hold the [lock] here!
*/
handleChunk(a, true/* reopen */);
}
} // while(true)
}
/**
* Cancel all running tasks, discarding any buffered data.
*
* Note: This method does not wait on the canceled tasks.
*
* Note: The caller should have already invoked {@link #halt(Throwable)}.
*/
private void cancelAll(final boolean mayInterruptIfRunning)
throws InterruptedException {
log.warn("Cancelling job: " + this);
/*
* Close the buffer (nothing more may be written).
*
* Note: We DO NOT close the iterator draining the buffer since that
* would cause this task to interrupt itself!
*/
buffer.close();
// Clear the backing queue.
buffer.clear();
// cancel the futures.
for (S sink : sinks.values()) {
final Future> f = sink.buffer.getFuture();
if (!f.isDone()) {
f.cancel(mayInterruptIfRunning);
}
}
// wait for all sinks to complete.
for( S sink : sinks.values()) {
final Future> f = sink.buffer.getFuture();
try {
f.get();
} catch (InterruptedException ex) {
throw ex;
} catch (ExecutionException ex) {
/*
* Ignore exceptions here since we are halting anyway and we can
* expect a bunch of canceled tasks because we just interrupted
* all of the subtasks.
*/
log.warn("sink=" + sink + " : " + ex);
}
}
// clear references to the sinks.
sinks.clear();
// clear queue of finished sinks (we do not need to check their futures).
finishedSubtaskQueue.clear();
// clear the redirect queue.
redirectQueue.clear();
}
/**
* Return the sink for the locator. The sink is created if it does not exist
* using {@link #newSubtaskBuffer()} and
* {@link #newSubtask(Object, BlockingBuffer)}.
*
* Note: The caller is single threaded since this is invoked from the
* master's thread. This code depends on that assumption.
*
* @param locator
* The locator (unique subtask key).
* @param reopen
* true
IFF a closed buffer should be re-opened
* (in fact, this causes a new buffer to be created and the new
* buffer will be drained by a new
* {@link IndexPartitionWriteTask}).
*
* @return The sink for that locator.
*
* @throws IllegalArgumentException
* if the argument is null
.
* @throws InterruptedException
* if interrupted.
* @throws RuntimeException
* if {@link #halted()}
*/
protected S getSink(final L locator, final boolean reopen)
throws InterruptedException {
if (locator == null)
throw new IllegalArgumentException();
// operation not allowed if halted.
halted();
S sink = sinks.get(locator);
if (sink != null && sink.buffer.isOpen()) {
/*
* The sink is good, so return it.
*
* Note: the caller must handle the exception if the sink is
* asynchronously closed before the caller can use it.
*/
return sink;
}
/*
* @todo This lock should not be necessary (and probably is not) since
* the caller is single threaded.
*/
lock.lockInterruptibly();
try {
if (reopen && sink != null && !sink.buffer.isOpen()) {
if (log.isInfoEnabled())
log.info("Reopening sink (was closed): " + this
+ ", locator=" + locator);
/*
* Note: Instead of waiting for the sink here, the sink will
* notify the master when it is done and be placed onto the
* [finishedSinks] queue. When it's Future#isDone(), we will
* drain it from the queue and check it's Future for errors.
*
* This allows the master to not block when a sink is closed but
* still awaiting an RMI. It also implies that there can be two
* active sinks for the same locator, but only one will be
* recorded in [sinks] at any given time.
*/
// remove the old sink from the map (IFF that is the reference
// found).
moveSinkToFinishedQueueAtomically(locator, sink);
// clear reference so we can re-open the sink.
sink = null;
}
if (sink == null) {
if (log.isInfoEnabled())
log.info("Creating output buffer: " + this + ", locator="
+ locator);
final BlockingBuffer out = newSubtaskBuffer();
sink = newSubtask(locator, out);
final S oldval = sinks.put(locator, sink);
// should not be an entry in the map for that locator.
assert oldval == null : "locator=" + locator;
// if (oldval == null) {
/**
* Start subtask.
*
* @see
* BlockingBuffer.close() does not unblock threads
*/
{
// Wrap the computation as a FutureTask.
@SuppressWarnings({ "unchecked", "rawtypes" })
final FutureTask extends AbstractSubtaskStats> ft = new FutureTask(
sink);
// Set Future on the BlockingBuffer.
out.setFuture(ft);
// Assign a worker thread to the sink.
submitSubtask(ft);
}
stats.subtaskStartCount.incrementAndGet();
// } else {
//
// // concurrent create of the sink.
// sink = oldval;
//
// }
}
return sink;
} finally {
lock.unlock();
}
}
/**
* Factory for a new buffer for a subtask.
*/
abstract protected BlockingBuffer newSubtaskBuffer();
/**
* Factory for a new subtask.
*
* @param locator
* The unique key for the subtask.
* @param out
* The {@link BlockingBuffer} on which the master will write for
* that subtask.
*
* @return The subtask.
*/
abstract protected S newSubtask(L locator, BlockingBuffer out);
// /**
// * Submit the subtask to an {@link Executor}.
// *
// * @param subtask
// * The subtask.
// *
// * @return The {@link Future}.
// */
// abstract protected Future extends AbstractSubtaskStats> submitSubtask(S subtask);
/**
* Submit the subtask to an {@link Executor}.
*
* @param subtask
* The {@link FutureTask} used to execute thee subtask.
*
* @see
* BlockingBuffer.close() does not unblock threads
*/
abstract protected void submitSubtask(
FutureTask extends AbstractSubtaskStats> subtask);
/**
* Drains any {@link Future}s from {@link #finishedSubtaskQueue} which are done
* and halts the master if there is an error for a {@link Future}.
*
* @throws InterruptedException
* if interrupted.
*/
private void drainFutures() throws InterruptedException, ExecutionException {
while (true) {
halted();
/*
* Poll the queue of sinks that have finished running
* (non-blocking).
*
* Note: We don't actually remove the subtask from the queue until
* its Future is available.
*/
final S sink = finishedSubtaskQueue.peek();
if (sink == null) {
// queue is empty.
return;
}
if(sink.buffer.isOpen())
throw new IllegalStateException(sink.toString());
final Future> f = sink.buffer.getFuture();
if(!sink.buffer.getFuture().isDone()) {
// Future is not available for the next sink in the queue.
return;
}
try {
// check the future.
f.get();
// remove the head of the queue.
if (sink != finishedSubtaskQueue.remove()) {
// The wrong sink is at the head of the queue.
throw new AssertionError();
}
} catch(ExecutionException ex) {
// halt on error.
throw halt(ex);
} finally {
/*
* Increment this counter immediately when the subtask is done
* regardless of the outcome. This is used by some unit tests to
* verify idle timeouts and the like.
*/
stats.subtaskEndCount.incrementAndGet();
if (log.isDebugEnabled())
log.debug("subtaskEndCount incremented: " + sink.locator);
}
}
}
/**
* Transfer a sink from {@link #sinks} to {@link #finishedSubtaskQueue}. The
* entry for the locator is removed from {@link #sinks} atomically IFF that
* map the given sink is associated with the given locator in
* that map.
*
* This is done atomically using the {@link #lock}. This is invoked both by
* {@link AbstractSubtask#call()} (when it is preparing to exit call()) and
* by {@link #getSink(Object, boolean)} (when it discovers that the sink for
* a locator is closed, but not yet finished with its work).
*
* Note: The sink MUST be atomically transferred from {@link #sinks} to
* {@link #finishedSubtaskQueue} and {@link #awaitAll()} MUST verify that
* both are empty in order for the termination condition to be atomic.
*
* @param locator
* The locator.
* @param sink
* The sink.
*
* @todo this method really should be private. It is exposed for one of the
* unit tests.
*/
protected void moveSinkToFinishedQueueAtomically(final L locator,
final AbstractSubtask sink) throws InterruptedException {
if (locator == null)
throw new IllegalArgumentException();
if (sink == null)
throw new IllegalArgumentException();
lock.lockInterruptibly();
try {
/*
* Place on queue (check by call(), awaitAll()). It is safe to do
* this even if the sink has already been moved, in which case its
* Future will be checked twice, which is not a problem.
*/
finishedSubtaskQueue.put((S) sink);
/*
* Remove map entry IFF it is for the same reference.
*/
if (sinks.remove(locator, sink)) {
if (log.isDebugEnabled())
log.debug("Removed output buffer: " + locator);
}
} finally {
lock.unlock();
}
}
/**
* Resolves the output buffer onto which the split must be written and adds
* the data to that output buffer.
*
* @param split
* The {@link Split} identifies both the tuples to be dispatched
* and the {@link PartitionLocator} on which they must be
* written.
* @param a
* The array of tuples. Only those tuples addressed by the
* split will be written onto the output buffer.
* @param reopen
* true
IFF a closed buffer should be re-opened (in
* fact, this causes a new buffer to be created and the new
* buffer will be drained by a new {@link AbstractSubtask}).
*
* @throws InterruptedException
* if the thread is interrupted.
*/
@SuppressWarnings("unchecked")
protected void addToOutputBuffer(final L locator, final E[] a,
final int fromIndex, final int toIndex, boolean reopen)
throws InterruptedException {
final int n = (toIndex - fromIndex);
if (n == 0)
return;
/*
* Make a dense chunk for this split.
*/
final E[] b = (E[]) java.lang.reflect.Array.newInstance(a.getClass()
.getComponentType(), n);
System.arraycopy(a, fromIndex, b, 0, n);
final long begin = System.nanoTime();
boolean added = false;
while (!added) {
halted();
// resolve the sink / create iff necessary.
final S sink = getSink(locator, reopen);
try {
added = sink.buffer.add(b, offerWarningTimeoutNanos,
TimeUnit.NANOSECONDS);
final long now = System.nanoTime();
if (added) {
/*
* Update timestamp of the last chunk written on that sink.
*/
sink.lastChunkNanos = now;
} else {
log.warn("Sink is slow: elapsed="
+ TimeUnit.NANOSECONDS.toMillis(now - begin)
+ "ms, sink=" + sink);
}
} catch (BufferClosedException ex) {
if (ex.getCause() instanceof StaleLocatorException) {
/*
* Note: The sinks sets the exception when closing the
* buffer when handling the stale locator exception and
* transfers the outstanding and all queued chunks to the
* redirectQueue.
*
* When we trap the stale locator exception here we need to
* transfer the chunk to the redirectQueue since the buffer
* was closed (and drained) asynchronously.
*/
if (log.isInfoEnabled())
log
.info("Sink closed asynchronously by stale locator exception: "
+ sink);
redirectChunk( b );
added = true;
} else if (ex.getCause() instanceof IdleTimeoutException
|| ex.getCause() instanceof MasterExhaustedException) {
/*
* Note: The sinks sets the exception if it closes the input
* queue by idle timeout or because the master was
* exhausted.
*
* These exceptions are trapped here and cause the sink to
* be re-opened (simply by restarting the loop).
*/
if (log.isInfoEnabled())
log.info("Sink closed asynchronously: cause="
+ ex.getCause() + ", sink=" + sink);
// definitely re-open!
reopen = true;
} else {
// anything else is a problem.
throw ex;
}
}
}
synchronized (stats) {
stats.chunksTransferred.incrementAndGet();
stats.elementsTransferred.addAndGet(b.length);
stats.elementsOnSinkQueues.addAndGet(b.length);
stats.elapsedSinkOfferNanos += (System.nanoTime() - begin);
}
}
/**
* This timeout is used to log warning messages when a sink is slow.
*/
private final static long offerWarningTimeoutNanos = TimeUnit.MILLISECONDS
.toNanos(5000);
}