All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.procedure2.ProcedureExecutor Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.procedure2;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantLock;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.TimeUnit;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.ProcedureInfo;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.exceptions.IllegalArgumentIOException;
import org.apache.hadoop.hbase.procedure2.store.ProcedureStore;
import org.apache.hadoop.hbase.procedure2.store.ProcedureStore.ProcedureIterator;
import org.apache.hadoop.hbase.procedure2.util.StringUtils;
import org.apache.hadoop.hbase.procedure2.util.TimeoutBlockingQueue;
import org.apache.hadoop.hbase.procedure2.util.TimeoutBlockingQueue.TimeoutRetriever;
import org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos;
import org.apache.hadoop.hbase.protobuf.generated.ProcedureProtos.ProcedureState;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.ForeignExceptionUtil;
import org.apache.hadoop.hbase.util.NonceKey;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.Threads;

/**
 * Thread Pool that executes the submitted procedures.
 * The executor has a ProcedureStore associated.
 * Each operation is logged and on restart the pending procedures are resumed.
 *
 * Unless the Procedure code throws an error (e.g. invalid user input)
 * the procedure will complete (at some point in time), On restart the pending
 * procedures are resumed and the once failed will be rolledback.
 *
 * The user can add procedures to the executor via submitProcedure(proc)
 * check for the finished state via isFinished(procId)
 * and get the result via getResult(procId)
 */
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class ProcedureExecutor {
  private static final Log LOG = LogFactory.getLog(ProcedureExecutor.class);

  Testing testing = null;
  public static class Testing {
    protected boolean killBeforeStoreUpdate = false;
    protected boolean toggleKillBeforeStoreUpdate = false;

    protected boolean shouldKillBeforeStoreUpdate() {
      final boolean kill = this.killBeforeStoreUpdate;
      if (this.toggleKillBeforeStoreUpdate) {
        this.killBeforeStoreUpdate = !kill;
        LOG.warn("Toggle Kill before store update to: " + this.killBeforeStoreUpdate);
      }
      return kill;
    }
  }

  public interface ProcedureExecutorListener {
    void procedureLoaded(long procId);
    void procedureAdded(long procId);
    void procedureFinished(long procId);
  }

  /**
   * Used by the TimeoutBlockingQueue to get the timeout interval of the procedure
   */
  private static class ProcedureTimeoutRetriever implements TimeoutRetriever {
    @Override
    public long getTimeout(Procedure proc) {
      return proc.getTimeRemaining();
    }

    @Override
    public TimeUnit getTimeUnit(Procedure proc) {
      return TimeUnit.MILLISECONDS;
    }
  }

  /**
   * Internal cleaner that removes the completed procedure results after a TTL.
   * NOTE: This is a special case handled in timeoutLoop().
   *
   * Since the client code looks more or less like:
   *   procId = master.doOperation()
   *   while (master.getProcResult(procId) == ProcInProgress);
   * The master should not throw away the proc result as soon as the procedure is done
   * but should wait a result request from the client (see executor.removeResult(procId))
   * The client will call something like master.isProcDone() or master.getProcResult()
   * which will return the result/state to the client, and it will mark the completed
   * proc as ready to delete. note that the client may not receive the response from
   * the master (e.g. master failover) so, if we delay a bit the real deletion of
   * the proc result the client will be able to get the result the next try.
   */
  private static class CompletedProcedureCleaner
      extends ProcedureInMemoryChore {
    private static final Log LOG = LogFactory.getLog(CompletedProcedureCleaner.class);

    private static final String CLEANER_INTERVAL_CONF_KEY = "hbase.procedure.cleaner.interval";
    private static final int DEFAULT_CLEANER_INTERVAL = 30 * 1000; // 30sec

    private static final String EVICT_TTL_CONF_KEY = "hbase.procedure.cleaner.evict.ttl";
    private static final int DEFAULT_EVICT_TTL = 15 * 60000; // 15min

    private static final String EVICT_ACKED_TTL_CONF_KEY ="hbase.procedure.cleaner.acked.evict.ttl";
    private static final int DEFAULT_ACKED_EVICT_TTL = 5 * 60000; // 5min

    private final Map completed;
    private final Map nonceKeysToProcIdsMap;
    private final ProcedureStore store;
    private final Configuration conf;

    public CompletedProcedureCleaner(final Configuration conf, final ProcedureStore store,
        final Map completedMap,
        final Map nonceKeysToProcIdsMap) {
      // set the timeout interval that triggers the periodic-procedure
      super(conf.getInt(CLEANER_INTERVAL_CONF_KEY, DEFAULT_CLEANER_INTERVAL));
      this.completed = completedMap;
      this.nonceKeysToProcIdsMap = nonceKeysToProcIdsMap;
      this.store = store;
      this.conf = conf;
    }

    @Override
    protected void periodicExecute(final TEnvironment env) {
      if (completed.isEmpty()) {
        if (LOG.isTraceEnabled()) {
          LOG.trace("No completed procedures to cleanup.");
        }
        return;
      }

      final long evictTtl = conf.getInt(EVICT_TTL_CONF_KEY, DEFAULT_EVICT_TTL);
      final long evictAckTtl = conf.getInt(EVICT_ACKED_TTL_CONF_KEY, DEFAULT_ACKED_EVICT_TTL);

      final long now = EnvironmentEdgeManager.currentTime();
      final Iterator> it = completed.entrySet().iterator();
      final boolean isDebugEnabled = LOG.isDebugEnabled();
      while (it.hasNext() && store.isRunning()) {
        final Map.Entry entry = it.next();
        final ProcedureInfo procInfo = entry.getValue();

        // TODO: Select TTL based on Procedure type
        if ((procInfo.hasClientAckTime() && (now - procInfo.getClientAckTime()) >= evictAckTtl) ||
            (now - procInfo.getLastUpdate()) >= evictTtl) {
          // Failed Procedures aren't persisted in WAL.
          if (!(procInfo instanceof FailedProcedureInfo)) {
            store.delete(entry.getKey());
          }
          it.remove();

          NonceKey nonceKey = procInfo.getNonceKey();
          if (nonceKey != null) {
            nonceKeysToProcIdsMap.remove(nonceKey);
          }
          if (isDebugEnabled) {
            LOG.debug("Evict completed procedure: " + procInfo);
          }
        }
      }
    }
  }

  /**
   * Map the the procId returned by submitProcedure(), the Root-ProcID, to the ProcedureInfo.
   * Once a Root-Procedure completes (success or failure), the result will be added to this map.
   * The user of ProcedureExecutor should call getResult(procId) to get the result.
   */
  private final ConcurrentHashMap completed =
    new ConcurrentHashMap();

  /**
   * Map the the procId returned by submitProcedure(), the Root-ProcID, to the RootProcedureState.
   * The RootProcedureState contains the execution stack of the Root-Procedure,
   * It is added to the map by submitProcedure() and removed on procedure completion.
   */
  private final ConcurrentHashMap rollbackStack =
    new ConcurrentHashMap();

  /**
   * Helper map to lookup the live procedures by ID.
   * This map contains every procedure. root-procedures and subprocedures.
   */
  private final ConcurrentHashMap procedures =
    new ConcurrentHashMap();

  /**
   * Helper map to lookup whether the procedure already issued from the same client.
   * This map contains every root procedure.
   */
  private ConcurrentHashMap nonceKeysToProcIdsMap =
      new ConcurrentHashMap();

  /**
   * Timeout Queue that contains Procedures in a WAITING_TIMEOUT state
   * or periodic procedures.
   */
  private final TimeoutBlockingQueue waitingTimeout =
    new TimeoutBlockingQueue(new ProcedureTimeoutRetriever());

  /**
   * Queue that contains runnable procedures.
   */
  private final ProcedureRunnableSet runnables;

  // TODO
  private final ReentrantLock submitLock = new ReentrantLock();
  private final AtomicLong lastProcId = new AtomicLong(-1);

  private final CopyOnWriteArrayList listeners =
    new CopyOnWriteArrayList();

  private final AtomicInteger activeExecutorCount = new AtomicInteger(0);
  private final AtomicBoolean running = new AtomicBoolean(false);
  private final TEnvironment environment;
  private final ProcedureStore store;
  private final Configuration conf;

  private Thread[] threads;

  public ProcedureExecutor(final Configuration conf, final TEnvironment environment,
      final ProcedureStore store) {
    this(conf, environment, store, new ProcedureSimpleRunQueue());
  }

  public ProcedureExecutor(final Configuration conf, final TEnvironment environment,
      final ProcedureStore store, final ProcedureRunnableSet runqueue) {
    this.environment = environment;
    this.runnables = runqueue;
    this.store = store;
    this.conf = conf;
  }

  private void load(final boolean abortOnCorruption) throws IOException {
    Preconditions.checkArgument(completed.isEmpty());
    Preconditions.checkArgument(rollbackStack.isEmpty());
    Preconditions.checkArgument(procedures.isEmpty());
    Preconditions.checkArgument(waitingTimeout.isEmpty());
    Preconditions.checkArgument(runnables.size() == 0);

    store.load(new ProcedureStore.ProcedureLoader() {
      @Override
      public void setMaxProcId(long maxProcId) {
        assert lastProcId.get() < 0 : "expected only one call to setMaxProcId()";
        LOG.debug("load procedures maxProcId=" + maxProcId);
        lastProcId.set(maxProcId);
      }

      @Override
      public void load(ProcedureIterator procIter) throws IOException {
        loadProcedures(procIter, abortOnCorruption);
      }

      @Override
      public void handleCorrupted(ProcedureIterator procIter) throws IOException {
        int corruptedCount = 0;
        while (procIter.hasNext()) {
          ProcedureInfo proc = procIter.nextAsProcedureInfo();
          LOG.error("corrupted procedure: " + proc);
          corruptedCount++;
        }
        if (abortOnCorruption && corruptedCount > 0) {
          throw new IOException("found " + corruptedCount + " corrupted procedure(s) on replay");
        }
      }
    });
  }

  private void loadProcedures(final ProcedureIterator procIter,
      final boolean abortOnCorruption) throws IOException {
    final boolean isDebugEnabled = LOG.isDebugEnabled();

    // 1. Build the rollback stack
    int runnablesCount = 0;
    while (procIter.hasNext()) {
      final NonceKey nonceKey;
      final long procId;

      if (procIter.isNextCompleted()) {
        ProcedureInfo proc = procIter.nextAsProcedureInfo();
        nonceKey = proc.getNonceKey();
        procId = proc.getProcId();
        completed.put(proc.getProcId(), proc);
        if (isDebugEnabled) {
          LOG.debug("The procedure is completed: " + proc);
        }
      } else {
        Procedure proc = procIter.nextAsProcedure();
        nonceKey = proc.getNonceKey();
        procId = proc.getProcId();

        if (!proc.hasParent()) {
          assert !proc.isFinished() : "unexpected finished procedure";
          rollbackStack.put(proc.getProcId(), new RootProcedureState());
        }

        // add the procedure to the map
        proc.beforeReplay(getEnvironment());
        procedures.put(proc.getProcId(), proc);

        if (proc.getState() == ProcedureState.RUNNABLE) {
          runnablesCount++;
        }
      }

      // add the nonce to the map
      if (nonceKey != null) {
        nonceKeysToProcIdsMap.put(nonceKey, procId);
      }
    }

    // 2. Initialize the stacks
    ArrayList runnableList = new ArrayList(runnablesCount);
    HashSet waitingSet = null;
    procIter.reset();
    while (procIter.hasNext()) {
      if (procIter.isNextCompleted()) {
        procIter.skipNext();
        continue;
      }

      Procedure proc = procIter.nextAsProcedure();
      assert !(proc.isFinished() && !proc.hasParent()) : "unexpected completed proc=" + proc;

      if (isDebugEnabled) {
        LOG.debug(String.format("Loading procedure state=%s isFailed=%s: %s",
                    proc.getState(), proc.hasException(), proc));
      }

      Long rootProcId = getRootProcedureId(proc);
      if (rootProcId == null) {
        // The 'proc' was ready to run but the root procedure was rolledback?
        runnables.addBack(proc);
        continue;
      }

      if (proc.hasParent()) {
        Procedure parent = procedures.get(proc.getParentProcId());
        // corrupted procedures are handled later at step 3
        if (parent != null && !proc.isFinished()) {
          parent.incChildrenLatch();
        }
      }

      RootProcedureState procStack = rollbackStack.get(rootProcId);
      procStack.loadStack(proc);

      switch (proc.getState()) {
        case RUNNABLE:
          runnableList.add(proc);
          break;
        case WAITING:
          if (!proc.hasChildren()) {
            runnableList.add(proc);
          }
          break;
        case WAITING_TIMEOUT:
          if (waitingSet == null) {
            waitingSet = new HashSet();
          }
          waitingSet.add(proc);
          break;
        case FINISHED:
          if (proc.hasException()) {
            // add the proc to the runnables to perform the rollback
            runnables.addBack(proc);
          }
          break;
        case ROLLEDBACK:
        case INITIALIZING:
          String msg = "Unexpected " + proc.getState() + " state for " + proc;
          LOG.error(msg);
          throw new UnsupportedOperationException(msg);
        default:
          break;
      }
    }

    // 3. Validate the stacks
    int corruptedCount = 0;
    Iterator> itStack = rollbackStack.entrySet().iterator();
    while (itStack.hasNext()) {
      Map.Entry entry = itStack.next();
      RootProcedureState procStack = entry.getValue();
      if (procStack.isValid()) continue;

      for (Procedure proc: procStack.getSubproceduresStack()) {
        LOG.error("corrupted procedure: " + proc);
        procedures.remove(proc.getProcId());
        runnableList.remove(proc);
        if (waitingSet != null) waitingSet.remove(proc);
        corruptedCount++;
      }
      itStack.remove();
    }

    if (abortOnCorruption && corruptedCount > 0) {
      throw new IOException("found " + corruptedCount + " procedures on replay");
    }

    // 4. Push the runnables
    if (!runnableList.isEmpty()) {
      // TODO: See ProcedureWALFormatReader#hasFastStartSupport
      // some procedure may be started way before this stuff.
      for (int i = runnableList.size() - 1; i >= 0; --i) {
        Procedure proc = runnableList.get(i);
        if (!proc.hasParent()) {
          sendProcedureLoadedNotification(proc.getProcId());
        }
        if (proc.wasExecuted()) {
          runnables.addFront(proc);
        } else {
          // if it was not in execution, it can wait.
          runnables.addBack(proc);
        }
      }
    }
  }

  /**
   * Start the procedure executor.
   * It calls ProcedureStore.recoverLease() and ProcedureStore.load() to
   * recover the lease, and ensure a single executor, and start the procedure
   * replay to resume and recover the previous pending and in-progress perocedures.
   *
   * @param numThreads number of threads available for procedure execution.
   * @param abortOnCorruption true if you want to abort your service in case
   *          a corrupted procedure is found on replay. otherwise false.
   */
  public void start(int numThreads, boolean abortOnCorruption) throws IOException {
    if (running.getAndSet(true)) {
      LOG.warn("Already running");
      return;
    }

    // We have numThreads executor + one timer thread used for timing out
    // procedures and triggering periodic procedures.
    threads = new Thread[numThreads + 1];
    LOG.info("Starting procedure executor threads=" + threads.length);

    // Initialize procedures executor
    for (int i = 0; i < numThreads; ++i) {
      threads[i] = new Thread("ProcedureExecutor-" + i) {
        @Override
        public void run() {
          execLoop();
        }
      };
    }

    // Initialize procedures timeout handler (this is the +1 thread)
    threads[numThreads] = new Thread("ProcedureExecutorTimeout") {
      @Override
      public void run() {
        timeoutLoop();
      }
    };

    // Acquire the store lease.
    store.recoverLease();

    // TODO: Split in two steps.
    // TODO: Handle corrupted procedures (currently just a warn)
    // The first one will make sure that we have the latest id,
    // so we can start the threads and accept new procedures.
    // The second step will do the actual load of old procedures.
    load(abortOnCorruption);

    // Start the executors. Here we must have the lastProcId set.
    for (int i = 0; i < threads.length; ++i) {
      threads[i].start();
    }

    // Add completed cleaner chore
    addChore(new CompletedProcedureCleaner(conf, store, completed, nonceKeysToProcIdsMap));
  }

  public void stop() {
    if (!running.getAndSet(false)) {
      return;
    }

    LOG.info("Stopping the procedure executor");
    runnables.signalAll();
    waitingTimeout.signalAll();
  }

  public void join() {
    boolean interrupted = false;

    for (int i = 0; i < threads.length; ++i) {
      try {
        threads[i].join();
      } catch (InterruptedException ex) {
        interrupted = true;
      }
    }

    if (interrupted) {
      Thread.currentThread().interrupt();
    }

    completed.clear();
    rollbackStack.clear();
    procedures.clear();
    nonceKeysToProcIdsMap.clear();
    waitingTimeout.clear();
    runnables.clear();
    lastProcId.set(-1);
  }

  public boolean isRunning() {
    return running.get();
  }

  /**
   * @return the number of execution threads.
   */
  public int getNumThreads() {
    return threads == null ? 0 : (threads.length - 1);
  }

  public int getActiveExecutorCount() {
    return activeExecutorCount.get();
  }

  public TEnvironment getEnvironment() {
    return this.environment;
  }

  public ProcedureStore getStore() {
    return this.store;
  }

  public void registerListener(ProcedureExecutorListener listener) {
    this.listeners.add(listener);
  }

  public boolean unregisterListener(ProcedureExecutorListener listener) {
    return this.listeners.remove(listener);
  }

  /**
   * List procedures.
   * @return the procedures in a list
   */
  public List listProcedures() {
    List procedureLists =
        new ArrayList(procedures.size() + completed.size());
    for (java.util.Map.Entry p: procedures.entrySet()) {
      procedureLists.add(Procedure.createProcedureInfo(p.getValue(), null));
    }
    for (java.util.Map.Entry e: completed.entrySet()) {
      // Note: The procedure could show up twice in the list with different state, as
      // it could complete after we walk through procedures list and insert into
      // procedureList - it is ok, as we will use the information in the ProcedureInfo
      // to figure it out; to prevent this would increase the complexity of the logic.
      procedureLists.add(e.getValue());
    }
    return procedureLists;
  }

  /**
   * Add a chore procedure to the executor
   * @param chore the chore to add
   */
  public void addChore(final ProcedureInMemoryChore chore) {
    chore.setState(ProcedureState.RUNNABLE);
    waitingTimeout.add(chore);
  }

  /**
   * Remove a chore procedure from the executor
   * @param chore the chore to remove
   * @return whether the chore is removed, or it will be removed later
   */
  public boolean removeChore(final ProcedureInMemoryChore chore) {
    chore.setState(ProcedureState.FINISHED);
    return waitingTimeout.remove(chore);
  }

  // ==========================================================================
  //  Nonce Procedure helpers
  // ==========================================================================
  /**
   * Create a NoneKey from the specified nonceGroup and nonce.
   * @param nonceGroup
   * @param nonce
   * @return the generated NonceKey
   */
  public NonceKey createNonceKey(final long nonceGroup, final long nonce) {
    return (nonce == HConstants.NO_NONCE) ? null : new NonceKey(nonceGroup, nonce);
  }

  /**
   * Register a nonce for a procedure that is going to be submitted.
   * A procId will be reserved and on submitProcedure(),
   * the procedure with the specified nonce will take the reserved ProcId.
   * If someone already reserved the nonce, this method will return the procId reserved,
   * otherwise an invalid procId will be returned. and the caller should procede
   * and submit the procedure.
   *
   * @param nonceKey A unique identifier for this operation from the client or process.
   * @return the procId associated with the nonce, if any otherwise an invalid procId.
   */
  public long registerNonce(final NonceKey nonceKey) {
    if (nonceKey == null) return -1;

    // check if we have already a Reserved ID for the nonce
    Long oldProcId = nonceKeysToProcIdsMap.get(nonceKey);
    if (oldProcId == null) {
      // reserve a new Procedure ID, this will be associated with the nonce
      // and the procedure submitted with the specified nonce will use this ID.
      final long newProcId = nextProcId();
      oldProcId = nonceKeysToProcIdsMap.putIfAbsent(nonceKey, newProcId);
      if (oldProcId == null) return -1;
    }

    // we found a registered nonce, but the procedure may not have been submitted yet.
    // since the client expect the procedure to be submitted, spin here until it is.
    final boolean isTraceEnabled = LOG.isTraceEnabled();
    while (isRunning() &&
           !(procedures.containsKey(oldProcId) || completed.containsKey(oldProcId)) &&
           nonceKeysToProcIdsMap.containsKey(nonceKey)) {
      if (isTraceEnabled) {
        LOG.trace("waiting for procId=" + oldProcId.longValue() + " to be submitted");
      }
      Threads.sleep(100);
    }
    return oldProcId.longValue();
  }

  /**
   * Remove the NonceKey if the procedure was not submitted to the executor.
   * @param nonceKey A unique identifier for this operation from the client or process.
   */
  public void unregisterNonceIfProcedureWasNotSubmitted(final NonceKey nonceKey) {
    if (nonceKey == null) return;

    final Long procId = nonceKeysToProcIdsMap.get(nonceKey);
    if (procId == null) return;

    // if the procedure was not submitted, remove the nonce
    if (!(procedures.containsKey(procId) || completed.containsKey(procId))) {
      nonceKeysToProcIdsMap.remove(nonceKey);
    }
  }

  /**
   * If the failure failed before submitting it, we may want to give back the
   * same error to the requests with the same nonceKey.
   *
   * @param nonceKey A unique identifier for this operation from the client or process
   * @param procName name of the procedure, used to inform the user
   * @param procOwner name of the owner of the procedure, used to inform the user
   * @param exception the failure to report to the user
   */
  public void setFailureResultForNonce(final NonceKey nonceKey, final String procName,
      final User procOwner, final IOException exception) {
    if (nonceKey == null) return;

    final Long procId = nonceKeysToProcIdsMap.get(nonceKey);
    if (procId == null || completed.containsKey(procId)) return;

    final long currentTime = EnvironmentEdgeManager.currentTime();
    final ProcedureInfo result = new FailedProcedureInfo(
      procId.longValue(),
      procName,
      procOwner != null ? procOwner.getShortName() : null,
      ProcedureState.ROLLEDBACK,
      -1,
      nonceKey,
      ForeignExceptionUtil.toProtoForeignException("ProcedureExecutor", exception),
      currentTime,
      currentTime,
      null);
    completed.putIfAbsent(procId, result);
  }

  public static class FailedProcedureInfo extends ProcedureInfo {

    public FailedProcedureInfo(long procId, String procName, String procOwner,
        ProcedureState procState, long parentId, NonceKey nonceKey,
        ErrorHandlingProtos.ForeignExceptionMessage exception, long lastUpdate, long startTime,
        byte[] result) {
      super(procId, procName, procOwner, procState, parentId, nonceKey, exception, lastUpdate,
          startTime, result);
    }
  }

  // ==========================================================================
  //  Submit/Abort Procedure
  // ==========================================================================
  /**
   * Add a new root-procedure to the executor.
   * @param proc the new procedure to execute.
   * @return the procedure id, that can be used to monitor the operation
   */
  public long submitProcedure(final Procedure proc) {
    return submitProcedure(proc, null);
  }

  /**
   * Add a new root-procedure to the executor.
   * @param proc the new procedure to execute.
   * @param nonceKey the registered unique identifier for this operation from the client or process.
   * @return the procedure id, that can be used to monitor the operation
   */
  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH",
      justification = "FindBugs is blind to the check-for-null")
  public long submitProcedure(final Procedure proc, final NonceKey nonceKey) {
    Preconditions.checkArgument(proc.getState() == ProcedureState.INITIALIZING);
    Preconditions.checkArgument(isRunning(), "executor not running");
    Preconditions.checkArgument(lastProcId.get() >= 0);
    Preconditions.checkArgument(!proc.hasParent(), "unexpected parent", proc);

    final Long currentProcId;
    if (nonceKey != null) {
      currentProcId = nonceKeysToProcIdsMap.get(nonceKey);
      Preconditions.checkArgument(currentProcId != null,
        "expected nonceKey=" + nonceKey + " to be reserved, use registerNonce()");
    } else {
      currentProcId = nextProcId();
    }

    // Initialize the procedure
    proc.setNonceKey(nonceKey);
    proc.setProcId(currentProcId.longValue());

    // Commit the transaction
    store.insert(proc, null);
    if (LOG.isDebugEnabled()) {
      LOG.debug("Procedure " + proc + " added to the store.");
    }

    // Create the rollback stack for the procedure
    RootProcedureState stack = new RootProcedureState();
    rollbackStack.put(currentProcId, stack);

    // Submit the new subprocedures
    assert !procedures.containsKey(currentProcId);
    procedures.put(currentProcId, proc);
    sendProcedureAddedNotification(currentProcId);
    runnables.addBack(proc);
    return currentProcId;
  }

  public ProcedureInfo getResult(final long procId) {
    return completed.get(procId);
  }

  /**
   * Return true if the procedure is finished.
   * The state may be "completed successfully" or "failed and rolledback".
   * Use getResult() to check the state or get the result data.
   * @param procId the ID of the procedure to check
   * @return true if the procedure execution is finished, otherwise false.
   */
  public boolean isFinished(final long procId) {
    return completed.containsKey(procId);
  }

  /**
   * Return true if the procedure is started.
   * @param procId the ID of the procedure to check
   * @return true if the procedure execution is started, otherwise false.
   */
  public boolean isStarted(final long procId) {
    Procedure proc = procedures.get(procId);
    if (proc == null) {
      return completed.get(procId) != null;
    }
    return proc.wasExecuted();
  }

  /**
   * Mark the specified completed procedure, as ready to remove.
   * @param procId the ID of the procedure to remove
   */
  public void removeResult(final long procId) {
    ProcedureInfo result = completed.get(procId);
    if (result == null) {
      assert !procedures.containsKey(procId) : "procId=" + procId + " is still running";
      if (LOG.isDebugEnabled()) {
        LOG.debug("Procedure procId=" + procId + " already removed by the cleaner.");
      }
      return;
    }

    // The CompletedProcedureCleaner will take care of deletion, once the TTL is expired.
    result.setClientAckTime(EnvironmentEdgeManager.currentTime());
  }

  /**
   * Send an abort notification the specified procedure.
   * Depending on the procedure implementation the abort can be considered or ignored.
   * @param procId the procedure to abort
   * @return true if the procedure exist and has received the abort, otherwise false.
   */
  public boolean abort(final long procId) {
    return abort(procId, true);
  }

  /**
   * Send an abort notification the specified procedure.
   * Depending on the procedure implementation the abort can be considered or ignored.
   * @param procId the procedure to abort
   * @param mayInterruptIfRunning if the proc completed at least one step, should it be aborted?
   * @return true if the procedure exist and has received the abort, otherwise false.
   */
  public boolean abort(final long procId, final boolean mayInterruptIfRunning) {
    Procedure proc = procedures.get(procId);
    if (proc != null) {
      if (!mayInterruptIfRunning && proc.wasExecuted()) {
        return false;
      } else {
        return proc.abort(getEnvironment());
      }
    }
    return false;
  }

  /**
   * Check if the user is this procedure's owner
   * @param procId the target procedure
   * @param user the user
   * @return true if the user is the owner of the procedure,
   *   false otherwise or the owner is unknown.
   */
  public boolean isProcedureOwner(final long procId, final User user) {
    if (user == null) {
      return false;
    }

    Procedure proc = procedures.get(procId);
    if (proc != null) {
      return proc.getOwner().equals(user.getShortName());
    }
    ProcedureInfo procInfo = completed.get(procId);
    if (procInfo == null) {
      // Procedure either does not exist or has already completed and got cleaned up.
      // At this time, we cannot check the owner of the procedure
      return false;
    }
    return ProcedureInfo.isProcedureOwner(procInfo, user);
  }

  public Map getResults() {
    return Collections.unmodifiableMap(completed);
  }

  public Procedure getProcedure(final long procId) {
    return procedures.get(procId);
  }

  protected ProcedureRunnableSet getRunnableSet() {
    return runnables;
  }

  /**
   * Execution loop (N threads)
   * while the executor is in a running state,
   * fetch a procedure from the runnables queue and start the execution.
   */
  private void execLoop() {
    while (isRunning()) {
      Procedure proc = runnables.poll();
      if (proc == null) continue;

      try {
        activeExecutorCount.incrementAndGet();
        execLoop(proc);
      } finally {
        activeExecutorCount.decrementAndGet();
      }
    }
  }

  private void execLoop(Procedure proc) {
    if (LOG.isTraceEnabled()) {
      LOG.trace("Trying to start the execution of " + proc);
    }

    Long rootProcId = getRootProcedureId(proc);
    if (rootProcId == null) {
      // The 'proc' was ready to run but the root procedure was rolledback
      executeRollback(proc);
      return;
    }

    RootProcedureState procStack = rollbackStack.get(rootProcId);
    if (procStack == null) return;

    do {
      // Try to acquire the execution
      if (!procStack.acquire(proc)) {
        if (procStack.setRollback()) {
          // we have the 'rollback-lock' we can start rollingback
          if (!executeRollback(rootProcId, procStack)) {
            procStack.unsetRollback();
            runnables.yield(proc);
          }
        } else {
          // if we can't rollback means that some child is still running.
          // the rollback will be executed after all the children are done.
          // If the procedure was never executed, remove and mark it as rolledback.
          if (!proc.wasExecuted()) {
            if (!executeRollback(proc)) {
              runnables.yield(proc);
            }
          }
        }
        break;
      }

      // Execute the procedure
      assert proc.getState() == ProcedureState.RUNNABLE;
      if (proc.acquireLock(getEnvironment())) {
        execProcedure(procStack, proc);
        proc.releaseLock(getEnvironment());
      } else {
        runnables.yield(proc);
      }
      procStack.release(proc);

      // allows to kill the executor before something is stored to the wal.
      // useful to test the procedure recovery.
      if (testing != null && !isRunning()) {
        break;
      }

      if (proc.isSuccess()) {
        if (LOG.isDebugEnabled()) {
          LOG.debug("Procedure completed in " +
              StringUtils.humanTimeDiff(proc.elapsedTime()) + ": " + proc);
        }
        // Finalize the procedure state
        if (proc.getProcId() == rootProcId) {
          procedureFinished(proc);
        }
        break;
      }
    } while (procStack.isFailed());
  }

  private void timeoutLoop() {
    while (isRunning()) {
      Procedure proc = waitingTimeout.poll();
      if (proc == null) continue;

      if (proc.getTimeRemaining() > 100) {
        // got an early wake, maybe a stop?
        // re-enqueue the task in case was not a stop or just a signal
        waitingTimeout.add(proc);
        continue;
      }

      // ----------------------------------------------------------------------------
      // TODO-MAYBE: Should we provide a notification to the store with the
      // full set of procedures pending and completed to write a compacted
      // version of the log (in case is a log)?
      // In theory no, procedures are have a short life, so at some point the store
      // will have the tracker saying everything is in the last log.
      // ----------------------------------------------------------------------------

      // The ProcedureInMemoryChore is a special case, and it acts as a chore.
      // instead of bringing the Chore class in, we reuse this timeout thread for
      // this special case.
      if (proc instanceof ProcedureInMemoryChore) {
        if (proc.isRunnable()) {
          try {
            ((ProcedureInMemoryChore)proc).periodicExecute(getEnvironment());
          } catch (Throwable e) {
            LOG.error("Ignoring CompletedProcedureCleaner exception: " + e.getMessage(), e);
          }
          proc.setStartTime(EnvironmentEdgeManager.currentTime());
          if (proc.isRunnable()) waitingTimeout.add(proc);
        }
        continue;
      }

      // The procedure received an "abort-timeout", call abort() and
      // add the procedure back in the queue for rollback.
      if (proc.setTimeoutFailure()) {
        long rootProcId = Procedure.getRootProcedureId(procedures, proc);
        RootProcedureState procStack = rollbackStack.get(rootProcId);
        procStack.abort();
        store.update(proc);
        runnables.addFront(proc);
        continue;
      }
    }
  }

  /**
   * Execute the rollback of the full procedure stack.
   * Once the procedure is rolledback, the root-procedure will be visible as
   * finished to user, and the result will be the fatal exception.
   */
  private boolean executeRollback(final long rootProcId, final RootProcedureState procStack) {
    Procedure rootProc = procedures.get(rootProcId);
    RemoteProcedureException exception = rootProc.getException();
    if (exception == null) {
      exception = procStack.getException();
      rootProc.setFailure(exception);
      store.update(rootProc);
    }

    List subprocStack = procStack.getSubproceduresStack();
    assert subprocStack != null : "Called rollback with no steps executed rootProc=" + rootProc;

    int stackTail = subprocStack.size();
    boolean reuseLock = false;
    while (stackTail --> 0) {
      final Procedure proc = subprocStack.get(stackTail);

      if (!reuseLock && !proc.acquireLock(getEnvironment())) {
        // can't take a lock on the procedure, add the root-proc back on the
        // queue waiting for the lock availability
        return false;
      }

      boolean abortRollback = !executeRollback(proc);
      abortRollback |= !isRunning() || !store.isRunning();

      // If the next procedure is the same to this one
      // (e.g. StateMachineProcedure reuse the same instance)
      // we can avoid to lock/unlock each step
      reuseLock = stackTail > 0 && (subprocStack.get(stackTail - 1) == proc) && !abortRollback;
      if (!reuseLock) {
        proc.releaseLock(getEnvironment());
      }

      // allows to kill the executor before something is stored to the wal.
      // useful to test the procedure recovery.
      if (abortRollback) {
        return false;
      }

      subprocStack.remove(stackTail);

      // if the procedure is kind enough to pass the slot to someone else, yield
      if (proc.isYieldAfterExecutionStep(getEnvironment())) {
        return false;
      }
    }

    // Finalize the procedure state
    LOG.info("Rolledback procedure " + rootProc +
             " exec-time=" + StringUtils.humanTimeDiff(rootProc.elapsedTime()) +
             " exception=" + exception.getMessage());
    procedureFinished(rootProc);
    return true;
  }

  /**
   * Execute the rollback of the procedure step.
   * It updates the store with the new state (stack index)
   * or will remove completly the procedure in case it is a child.
   */
  private boolean executeRollback(final Procedure proc) {
    try {
      proc.doRollback(getEnvironment());
    } catch (IOException e) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("rollback attempt failed for " + proc, e);
      }
      return false;
    } catch (InterruptedException e) {
      handleInterruptedException(proc, e);
      return false;
    } catch (Throwable e) {
      // Catch NullPointerExceptions or similar errors...
      LOG.fatal("CODE-BUG: Uncatched runtime exception for procedure: " + proc, e);
    }

    // allows to kill the executor before something is stored to the wal.
    // useful to test the procedure recovery.
    if (testing != null && testing.shouldKillBeforeStoreUpdate()) {
      LOG.debug("TESTING: Kill before store update");
      stop();
      return false;
    }

    if (proc.removeStackIndex()) {
      proc.setState(ProcedureState.ROLLEDBACK);
      if (proc.hasParent()) {
        store.delete(proc.getProcId());
        procedures.remove(proc.getProcId());
      } else {
        final long[] childProcIds = rollbackStack.get(proc.getProcId()).getSubprocedureIds();
        if (childProcIds != null) {
          store.delete(proc, childProcIds);
        } else {
          store.update(proc);
        }
      }
    } else {
      store.update(proc);
    }

    return true;
  }

  /**
   * Executes the specified procedure
   *  - calls the doExecute() of the procedure
   *  - if the procedure execution didn't fail (e.g. invalid user input)
   *     - ...and returned subprocedures
   *        - the subprocedures are initialized.
   *        - the subprocedures are added to the store
   *        - the subprocedures are added to the runnable queue
   *        - the procedure is now in a WAITING state, waiting for the subprocedures to complete
   *     - ...if there are no subprocedure
   *        - the procedure completed successfully
   *        - if there is a parent (WAITING)
   *            - the parent state will be set to RUNNABLE
   *  - in case of failure
   *    - the store is updated with the new state
   *    - the executor (caller of this method) will start the rollback of the procedure
   */
  private void execProcedure(final RootProcedureState procStack, final Procedure procedure) {
    Preconditions.checkArgument(procedure.getState() == ProcedureState.RUNNABLE);

    // Execute the procedure
    boolean reExecute = false;
    Procedure[] subprocs = null;
    do {
      reExecute = false;
      try {
        subprocs = procedure.doExecute(getEnvironment());
        if (subprocs != null && subprocs.length == 0) {
          subprocs = null;
        }
      } catch (ProcedureYieldException e) {
        if (LOG.isTraceEnabled()) {
          LOG.trace("Yield procedure: " + procedure + ": " + e.getMessage());
        }
        runnables.yield(procedure);
        return;
      } catch (InterruptedException e) {
        handleInterruptedException(procedure, e);
        runnables.yield(procedure);
        return;
      } catch (Throwable e) {
        // Catch NullPointerExceptions or similar errors...
        String msg = "CODE-BUG: Uncatched runtime exception for procedure: " + procedure;
        LOG.error(msg, e);
        procedure.setFailure(new RemoteProcedureException(msg, e));
      }

      if (!procedure.isFailed()) {
        if (subprocs != null) {
          if (subprocs.length == 1 && subprocs[0] == procedure) {
            // quick-shortcut for a state machine like procedure
            subprocs = null;
            reExecute = true;
          } else {
            // yield the current procedure, and make the subprocedure runnable
            for (int i = 0; i < subprocs.length; ++i) {
              Procedure subproc = subprocs[i];
              if (subproc == null) {
                String msg = "subproc[" + i + "] is null, aborting the procedure";
                procedure.setFailure(new RemoteProcedureException(msg,
                  new IllegalArgumentIOException(msg)));
                subprocs = null;
                break;
              }

              assert subproc.getState() == ProcedureState.INITIALIZING;
              subproc.setParentProcId(procedure.getProcId());
              subproc.setProcId(nextProcId());
              procStack.addSubProcedure(subproc);
            }

            if (!procedure.isFailed()) {
              procedure.setChildrenLatch(subprocs.length);
              switch (procedure.getState()) {
                case RUNNABLE:
                  procedure.setState(ProcedureState.WAITING);
                  break;
                case WAITING_TIMEOUT:
                  waitingTimeout.add(procedure);
                  break;
                default:
                  break;
              }
            }
          }
        } else if (procedure.getState() == ProcedureState.WAITING_TIMEOUT) {
          waitingTimeout.add(procedure);
        } else {
          // No subtask, so we are done
          procedure.setState(ProcedureState.FINISHED);
        }
      }

      // Add the procedure to the stack
      procStack.addRollbackStep(procedure);

      // allows to kill the executor before something is stored to the wal.
      // useful to test the procedure recovery.
      if (testing != null && testing.shouldKillBeforeStoreUpdate()) {
        LOG.debug("TESTING: Kill before store update");
        stop();
        return;
      }

      // Commit the transaction
      updateStoreOnExec(procStack, procedure, subprocs);

      // if the store is not running we are aborting
      if (!store.isRunning()) {
        return;
      }

      // if the procedure is kind enough to pass the slot to someone else, yield
      if (procedure.getState() == ProcedureState.RUNNABLE &&
          procedure.isYieldAfterExecutionStep(getEnvironment())) {
        runnables.yield(procedure);
        return;
      }

      assert (reExecute && subprocs == null) || !reExecute;
    } while (reExecute);

    // Submit the new subprocedures
    if (subprocs != null && !procedure.isFailed()) {
      for (int i = 0; i < subprocs.length; ++i) {
        Procedure subproc = subprocs[i];
        assert !procedures.containsKey(subproc.getProcId());
        procedures.put(subproc.getProcId(), subproc);
        runnables.addFront(subproc);
      }
    }

    if (procedure.isFinished() && procedure.hasParent()) {
      Procedure parent = procedures.get(procedure.getParentProcId());
      if (parent == null) {
        assert procStack.isRollingback();
        return;
      }

      // If this procedure is the last child awake the parent procedure
      if (LOG.isTraceEnabled()) {
        LOG.trace(parent + " child is done: " + procedure);
      }
      if (parent.childrenCountDown() && parent.getState() == ProcedureState.WAITING) {
        parent.setState(ProcedureState.RUNNABLE);
        store.update(parent);
        runnables.addFront(parent);
        if (LOG.isTraceEnabled()) {
          LOG.trace(parent + " all the children finished their work, resume.");
        }
        return;
      }
    }
  }

  private void updateStoreOnExec(final RootProcedureState procStack,
      final Procedure procedure, final Procedure[] subprocs) {
    if (subprocs != null && !procedure.isFailed()) {
      if (LOG.isTraceEnabled()) {
        LOG.trace("Store add " + procedure + " children " + Arrays.toString(subprocs));
      }
      store.insert(procedure, subprocs);
    } else {
      if (LOG.isTraceEnabled()) {
        LOG.trace("Store update " + procedure);
      }
      if (procedure.isFinished() && !procedure.hasParent()) {
        // remove child procedures
        final long[] childProcIds = procStack.getSubprocedureIds();
        if (childProcIds != null) {
          store.delete(procedure, childProcIds);
          for (int i = 0; i < childProcIds.length; ++i) {
            procedures.remove(childProcIds[i]);
          }
        } else {
          store.update(procedure);
        }
      } else {
        store.update(procedure);
      }
    }
  }

  private void handleInterruptedException(final Procedure proc, final InterruptedException e) {
    if (LOG.isTraceEnabled()) {
      LOG.trace("got an interrupt during " + proc + ". suspend and retry it later.", e);
    }

    // NOTE: We don't call Thread.currentThread().interrupt()
    // because otherwise all the subsequent calls e.g. Thread.sleep() will throw
    // the InterruptedException. If the master is going down, we will be notified
    // and the executor/store will be stopped.
    // (The interrupted procedure will be retried on the next run)
  }

  private void sendProcedureLoadedNotification(final long procId) {
    if (!this.listeners.isEmpty()) {
      for (ProcedureExecutorListener listener: this.listeners) {
        try {
          listener.procedureLoaded(procId);
        } catch (Throwable e) {
          LOG.error("The listener " + listener + " had an error: " + e.getMessage(), e);
        }
      }
    }
  }

  private void sendProcedureAddedNotification(final long procId) {
    if (!this.listeners.isEmpty()) {
      for (ProcedureExecutorListener listener: this.listeners) {
        try {
          listener.procedureAdded(procId);
        } catch (Throwable e) {
          LOG.error("The listener " + listener + " had an error: " + e.getMessage(), e);
        }
      }
    }
  }

  private void sendProcedureFinishedNotification(final long procId) {
    if (!this.listeners.isEmpty()) {
      for (ProcedureExecutorListener listener: this.listeners) {
        try {
          listener.procedureFinished(procId);
        } catch (Throwable e) {
          LOG.error("The listener " + listener + " had an error: " + e.getMessage(), e);
        }
      }
    }
  }

  private long nextProcId() {
    long procId = lastProcId.incrementAndGet();
    if (procId < 0) {
      while (!lastProcId.compareAndSet(procId, 0)) {
        procId = lastProcId.get();
        if (procId >= 0)
          break;
      }
      while (procedures.containsKey(procId)) {
        procId = lastProcId.incrementAndGet();
      }
    }
    return procId;
  }

  @VisibleForTesting
  protected long getLastProcId() {
    return lastProcId.get();
  }

  private Long getRootProcedureId(Procedure proc) {
    return Procedure.getRootProcedureId(procedures, proc);
  }

  private void procedureFinished(final Procedure proc) {
    // call the procedure completion cleanup handler
    try {
      proc.completionCleanup(getEnvironment());
    } catch (Throwable e) {
      // Catch NullPointerExceptions or similar errors...
      LOG.error("CODE-BUG: uncatched runtime exception for procedure: " + proc, e);
    }

    // update the executor internal state maps
    ProcedureInfo procInfo = Procedure.createProcedureInfo(proc, proc.getNonceKey());
    if (!proc.shouldWaitClientAck(getEnvironment())) {
      procInfo.setClientAckTime(0);
    }

    completed.put(procInfo.getProcId(), procInfo);
    rollbackStack.remove(proc.getProcId());
    procedures.remove(proc.getProcId());

    // call the runnableSet completion cleanup handler
    try {
      runnables.completionCleanup(proc);
    } catch (Throwable e) {
      // Catch NullPointerExceptions or similar errors...
      LOG.error("CODE-BUG: uncatched runtime exception for runnableSet: " + runnables, e);
    }

    // Notify the listeners
    sendProcedureFinishedNotification(proc.getProcId());
  }

  public Pair getResultOrProcedure(final long procId) {
    ProcedureInfo result = completed.get(procId);
    Procedure proc = null;
    if (result == null) {
      proc = procedures.get(procId);
      if (proc == null) {
        result = completed.get(procId);
      }
    }
    return new Pair(result, proc);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy