com.bigdata.service.ndx.AbstractDataServiceProcedureTask Maven / Gradle / Ivy

Go to download
package com.bigdata.service.ndx;

import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.Callable;

import org.apache.log4j.Level;
import org.apache.log4j.Logger;

import com.bigdata.btree.proc.IIndexProcedure;
import com.bigdata.btree.proc.IKeyArrayIndexProcedure;
import com.bigdata.btree.proc.IResultHandler;
import com.bigdata.btree.proc.ISimpleIndexProcedure;
import com.bigdata.mdi.PartitionLocator;
import com.bigdata.resources.StaleLocatorException;
import com.bigdata.service.DataService;
import com.bigdata.service.IDataService;
import com.bigdata.service.Split;
import com.bigdata.util.InnerCause;
import com.bigdata.util.concurrent.TaskCounters;

/**
 * Helper class for submitting an {@link IIndexProcedure} to run on an
 * {@link IDataService}. The class traps {@link StaleLocatorException}s and
 * handles the redirection of requests to the appropriate {@link IDataService}.
 * When necessary, the data for an {@link IKeyArrayIndexProcedure} will be
 * re-split in order to distribute the requests to the new index partitions
 * following a split of the target index partition.
 * 
 * Note: If an index partition is moved then the key range is unchanged.
 * 

 * Note: If an index partition is split, then the key range for each new index
 * partition is a sub-range of the original key range and the total key range
 * for the new index partitions is exactly the original key range.
 * 
 * Note: If an index partition is joined with another index partition then the
 * key range of the new index partition is increased. However, we will wind up
 * submitting one request to the new index partition for each index partition
 * that we knew about at the time that the procedure was first mapped across the
 * index partitions.
 * 
 * @author Bryan Thompson
 * @version $Id$
 */
abstract class AbstractDataServiceProcedureTask implements Callable {

    /**
     * Note: Invocations of the non-batch API are logged at the WARN level since
     * they result in an application that can not scale-out efficiently.
     */
    protected static final transient Logger log = Logger
            .getLogger(AbstractDataServiceProcedureTask.class);

    /**
     * True iff the {@link #log} level is WARN or less.
     */
    final protected boolean WARN = log.getEffectiveLevel().toInt() <= Level.WARN
            .toInt();

    protected final IScaleOutClientIndex ndx;

    /**
     * The timestamp for the operation. This will be the timestamp for the view
     * (the outer class) unless the operation is read-only, in which case a
     * different timestamp may be chosen either to improve concurrency or to
     * provide globally read-consistent operations (in the latter cases this
     * will be a read-only transaction identifier).
     */
    protected final long ts;

    /**
     * The split assigned to this instance of the procedure.
     */
    protected final Split split;

    /**
     * The procedure to be executed against that {@link #split}.
     */
    protected final IIndexProcedure proc;

    /**
     * Used to aggregate results when a procedure can span more than one
     * index partition.
     */
    protected final IResultHandler resultHandler;

    private final TaskCounters taskCounters;

    //        protected final TaskCounters taskCountersByProc;
    protected final IndexSyncRPCCounters taskCountersByIndex;

    private long nanoTime_submitTask;

    private long nanoTime_beginWork;

    private long nanoTime_finishedWork;

    /**
     * If the task fails then this will be populated with an ordered list of
     * the exceptions. There will be one exception per-retry of the task.
     * For some kinds of failure this list MAY remain unbound.
     */
    protected List causes = null;

    /**
     * A human friendly representation.
     */
    public String toString() {

        return "index=" + ndx.getName() + ", ts=" + ts + ", procedure "
                + proc.getClass().getName() + " : " + split;

    }

    /**
     * Return the #of elements in the operation. This is used to update
     * {@link IndexSyncRPCCounters#elementsOut}.
     */
    abstract protected int getElementCount();
    
    /**
     * Variant used for procedures that are NOT instances of
     * {@link IKeyArrayIndexProcedure}.
     * 
     * @param ts
     * @param split
     * @param proc
     * @param resultHandler
     */
    public AbstractDataServiceProcedureTask(IScaleOutClientIndex ndx,
            final long ts, final Split split, final IIndexProcedure proc,
            final IResultHandler resultHandler) {

        if (ndx == null)
            throw new IllegalArgumentException();

        if (split.pmd == null)
            throw new IllegalArgumentException();

        if (!(split.pmd instanceof PartitionLocator)) {

            throw new IllegalArgumentException("Split does not have a locator");

        }

        if (proc == null)
            throw new IllegalArgumentException();

        this.ndx = ndx;

        this.ts = ts;

        this.split = split;

        this.proc = proc;

        this.resultHandler = resultHandler;

        this.taskCounters = ndx.getFederation().getTaskCounters();

        //            this.taskCountersByProc = fed.getTaskCounters(proc);
        //            
        this.taskCountersByIndex = ndx.getFederation().getIndexCounters(
                ndx.getName()).synchronousCounters;

        /*
         * This timestamp is set when the task is created since that is when the
         * request was submitted.
         */
        nanoTime_submitTask = System.nanoTime();

        if(proc.isReadOnly()) {
            synchronized(taskCountersByIndex) {
                taskCountersByIndex.readOnlyRequestCount++;
            }
        }
        
    }

    final public Void call() throws Exception {

        // the index partition locator.
        final PartitionLocator locator = (PartitionLocator) split.pmd;

        /*
         * Track the total inter-arrival time.
         */
        synchronized (taskCounters.lastArrivalNanoTime) {
            final long lastArrivalNanoTime = taskCounters.lastArrivalNanoTime
                    .get();
            final long now = System.nanoTime();
            final long delta = now - lastArrivalNanoTime;
            // cumulative inter-arrival time.
            taskCounters.interArrivalNanoTime.addAndGet(delta);
            // update timestamp of the last task arrival.
            taskCounters.lastArrivalNanoTime.set(now);
        }
        
        taskCounters.taskSubmitCount.incrementAndGet();
        //            taskCountersByProc.taskSubmitCount.incrementAndGet();
        taskCountersByIndex.taskSubmitCount.incrementAndGet();

        nanoTime_beginWork = System.nanoTime();
        final long queueWaitingTime = nanoTime_beginWork - nanoTime_submitTask;
        taskCounters.queueWaitingNanoTime.addAndGet(queueWaitingTime);
        //            taskCountersByProc.queueWaitingTime.addAndGet(queueWaitingTime);
        taskCountersByIndex.queueWaitingNanoTime.addAndGet(queueWaitingTime);
        synchronized (taskCountersByIndex) {
            taskCountersByIndex.elementsOut += getElementCount();
            taskCountersByIndex.requestCount++;
        }

        try {

            submit(locator);

            taskCounters.taskSuccessCount.incrementAndGet();
            //                taskCountersByProc.taskSuccessCount.incrementAndGet();
            taskCountersByIndex.taskSuccessCount.incrementAndGet();

        } catch (Exception ex) {

            taskCounters.taskFailCount.incrementAndGet();
            //                taskCountersByProc.taskFailCount.incrementAndGet();
            taskCountersByIndex.taskFailCount.incrementAndGet();

            throw ex;

        } finally {

            nanoTime_finishedWork = System.nanoTime();

            taskCounters.taskCompleteCount.incrementAndGet();
            //                taskCountersByProc.taskCompleteCount.incrementAndGet();
            taskCountersByIndex.taskCompleteCount.incrementAndGet();

            // increment by the amount of time that the task was executing.
            final long serviceNanoTime = nanoTime_finishedWork
                    - nanoTime_beginWork;
            taskCounters.serviceNanoTime.addAndGet(serviceNanoTime);
            //                taskCountersByProc.serviceNanoTime.addAndGet(serviceNanoTime);
            taskCountersByIndex.serviceNanoTime.addAndGet(serviceNanoTime);

            // increment by the total time from submit to completion.
            final long queuingNanoTime = nanoTime_finishedWork
                    - nanoTime_submitTask;
            taskCounters.queuingNanoTime.addAndGet(queuingNanoTime);
            //                taskCountersByProc.queuingNanoTime.addAndGet(queuingNanoTime);
            taskCountersByIndex.queuingNanoTime.addAndGet(queuingNanoTime);

        }

        return null;

    }

    /**
     * Submit the procedure to the {@link IDataService} identified by the
     * locator.
     * 
     * @param locator
     *            An index partition locator.
     *            
     * @throws Exception
     */
    final protected void submit(final PartitionLocator locator)
            throws Exception {

        if (locator == null)
            throw new IllegalArgumentException();

        if (Thread.interrupted())
            throw new InterruptedException();

        // resolve service UUID to data service.
        final IDataService dataService = ndx.getDataService(locator);

        if (dataService == null)
            throw new RuntimeException("DataService not found: " + locator);

        // the name of the index partition.
        final String name = DataService.getIndexPartitionName(//
                ndx.getName(), // the name of the scale-out index.
                split.pmd.getPartitionId() // the index partition identifier.
                );

        if (log.isInfoEnabled())
            log.info("Submitting task=" + this + " on " + dataService);

        try {

            submit(dataService, name);

        } catch (Exception ex) {

            if (causes == null)
                causes = new LinkedList();

            causes.add(ex);

            final StaleLocatorException cause = (StaleLocatorException) InnerCause
                    .getInnerCause(ex, StaleLocatorException.class);

            if (cause != null) {

                // notify the client so that it can refresh its cache.
                ndx.staleLocator(ts, locator, cause);

                // retry the operation.
                retry();

            } else {

                throw ex;

            }

        }

    }

    /**
     * Submit the procedure to the {@link IDataService} and aggregate the
     * result with the caller's {@link IResultHandler} (if specified).
     * 
     * @param dataService
     *            The data service on which the procedure will be executed.
     * @param name
     *            The name of the index partition on that data service.
     * 
     * @todo do not require aggregator for {@link ISimpleIndexProcedure} so
     *       make this abstract in the base class or just override for
     *       {@link ISimpleIndexProcedure}. This would also mean that
     *       {@link #call()} would return the value for
     *       {@link ISimpleIndexProcedure}.
     */
    @SuppressWarnings("unchecked")
    final private void submit(final IDataService dataService, final String name)
            throws Exception {

        /*
         * Note: The timestamp here is the one specified for the task. This
         * allows us to realize read-consistent procedures by choosing a
         * read-only transaction based on the lastCommitTime of the
         * federation for the procedure.
         */

        final Object result = dataService.submit(ts, name, proc).get();

        if (resultHandler != null) {

            resultHandler.aggregate(result, split);

        }

    }

    /**
     * Invoked when {@link StaleLocatorException} was thrown. Since the
     * procedure was being run against an index partition of some scale-out
     * index this exception indicates that the index partition locator was
     * stale. We re-cache the locator(s) for the same key range as the index
     * partition which we thought we were addressing and then re-map the
     * operation against those updated locator(s). Note that a split will go
     * from one locator to N locators for a key range while a join will go
     * from N locators for a key range to 1. A move does not change the #of
     * locators for the key range, just where that index partition is living
     * at this time.
     * 
     * @throws Exception
     */
    abstract protected void retry() throws Exception;

}