org.apache.hadoop.hbase.procedure2.ProcedureExecutor Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hbase-procedure Show documentation
Procedure Framework
There is a newer version: 3.0.0-beta-1
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.procedure2;

import edu.umd.cs.findbugs.annotations.Nullable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.Deque;
import java.util.HashSet;
import java.util.List;
import java.util.PriorityQueue;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.Executor;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.exceptions.IllegalArgumentIOException;
import org.apache.hadoop.hbase.log.HBaseMarkers;
import org.apache.hadoop.hbase.procedure2.Procedure.LockState;
import org.apache.hadoop.hbase.procedure2.store.ProcedureStore;
import org.apache.hadoop.hbase.procedure2.store.ProcedureStore.ProcedureIterator;
import org.apache.hadoop.hbase.procedure2.store.ProcedureStore.ProcedureStoreListener;
import org.apache.hadoop.hbase.procedure2.trace.ProcedureSpanBuilder;
import org.apache.hadoop.hbase.procedure2.util.StringUtils;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.trace.TraceUtil;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.IdLock;
import org.apache.hadoop.hbase.util.NonceKey;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;

import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState;

/**
 * Thread Pool that executes the submitted procedures. The executor has a ProcedureStore associated.
 * Each operation is logged and on restart the pending procedures are resumed. Unless the Procedure
 * code throws an error (e.g. invalid user input) the procedure will complete (at some point in
 * time), On restart the pending procedures are resumed and the once failed will be rolledback. The
 * user can add procedures to the executor via submitProcedure(proc) check for the finished state
 * via isFinished(procId) and get the result via getResult(procId)
 */
@InterfaceAudience.Private
public class ProcedureExecutor {
  private static final Logger LOG = LoggerFactory.getLogger(ProcedureExecutor.class);

  public static final String CHECK_OWNER_SET_CONF_KEY = "hbase.procedure.check.owner.set";
  private static final boolean DEFAULT_CHECK_OWNER_SET = false;

  public static final String WORKER_KEEP_ALIVE_TIME_CONF_KEY =
    "hbase.procedure.worker.keep.alive.time.msec";
  private static final long DEFAULT_WORKER_KEEP_ALIVE_TIME = TimeUnit.MINUTES.toMillis(1);

  public static final String EVICT_TTL_CONF_KEY = "hbase.procedure.cleaner.evict.ttl";
  static final int DEFAULT_EVICT_TTL = 15 * 60000; // 15min

  public static final String EVICT_ACKED_TTL_CONF_KEY = "hbase.procedure.cleaner.acked.evict.ttl";
  static final int DEFAULT_ACKED_EVICT_TTL = 5 * 60000; // 5min

  /**
   * {@link #testing} is non-null when ProcedureExecutor is being tested. Tests will try to break PE
   * having it fail at various junctures. When non-null, testing is set to an instance of the below
   * internal {@link Testing} class with flags set for the particular test.
   */
  volatile Testing testing = null;

  /**
   * Class with parameters describing how to fail/die when in testing-context.
   */
  public static class Testing {
    protected volatile boolean killIfHasParent = true;
    protected volatile boolean killIfSuspended = false;

    /**
     * Kill the PE BEFORE we store state to the WAL. Good for figuring out if a Procedure is
     * persisting all the state it needs to recover after a crash.
     */
    protected volatile boolean killBeforeStoreUpdate = false;
    protected volatile boolean toggleKillBeforeStoreUpdate = false;

    /**
     * Set when we want to fail AFTER state has been stored into the WAL. Rarely used. HBASE-20978
     * is about a case where memory-state was being set after store to WAL where a crash could cause
     * us to get stuck. This flag allows killing at what was a vulnerable time.
     */
    protected volatile boolean killAfterStoreUpdate = false;
    protected volatile boolean toggleKillAfterStoreUpdate = false;

    protected volatile boolean killBeforeStoreUpdateInRollback = false;
    protected volatile boolean toggleKillBeforeStoreUpdateInRollback = false;

    protected boolean shouldKillBeforeStoreUpdate() {
      final boolean kill = this.killBeforeStoreUpdate;
      if (this.toggleKillBeforeStoreUpdate) {
        this.killBeforeStoreUpdate = !kill;
        LOG.warn("Toggle KILL before store update to: " + this.killBeforeStoreUpdate);
      }
      return kill;
    }

    protected boolean shouldKillBeforeStoreUpdate(boolean isSuspended, boolean hasParent) {
      if (isSuspended && !killIfSuspended) {
        return false;
      }
      if (hasParent && !killIfHasParent) {
        return false;
      }
      return shouldKillBeforeStoreUpdate();
    }

    protected boolean shouldKillAfterStoreUpdate() {
      final boolean kill = this.killAfterStoreUpdate;
      if (this.toggleKillAfterStoreUpdate) {
        this.killAfterStoreUpdate = !kill;
        LOG.warn("Toggle KILL after store update to: " + this.killAfterStoreUpdate);
      }
      return kill;
    }

    protected boolean shouldKillAfterStoreUpdate(final boolean isSuspended) {
      return (isSuspended && !killIfSuspended) ? false : shouldKillAfterStoreUpdate();
    }

    protected boolean shouldKillBeforeStoreUpdateInRollback() {
      final boolean kill = this.killBeforeStoreUpdateInRollback;
      if (this.toggleKillBeforeStoreUpdateInRollback) {
        this.killBeforeStoreUpdateInRollback = !kill;
        LOG.warn("Toggle KILL before store update in rollback to: "
          + this.killBeforeStoreUpdateInRollback);
      }
      return kill;
    }
  }

  public interface ProcedureExecutorListener {
    void procedureLoaded(long procId);

    void procedureAdded(long procId);

    void procedureFinished(long procId);
  }

  /**
   * Map the the procId returned by submitProcedure(), the Root-ProcID, to the Procedure. Once a
   * Root-Procedure completes (success or failure), the result will be added to this map. The user
   * of ProcedureExecutor should call getResult(procId) to get the result.
   */
  private final ConcurrentHashMap> completed =
    new ConcurrentHashMap<>();

  /**
   * Map the the procId returned by submitProcedure(), the Root-ProcID, to the RootProcedureState.
   * The RootProcedureState contains the execution stack of the Root-Procedure, It is added to the
   * map by submitProcedure() and removed on procedure completion.
   */
  private final ConcurrentHashMap> rollbackStack =
    new ConcurrentHashMap<>();

  /**
   * Helper map to lookup the live procedures by ID. This map contains every procedure.
   * root-procedures and subprocedures.
   */
  private final ConcurrentHashMap> procedures =
    new ConcurrentHashMap<>();

  /**
   * Helper map to lookup whether the procedure already issued from the same client. This map
   * contains every root procedure.
   */
  private final ConcurrentHashMap nonceKeysToProcIdsMap = new ConcurrentHashMap<>();

  private final CopyOnWriteArrayList listeners =
    new CopyOnWriteArrayList<>();

  private Configuration conf;

  /**
   * Created in the {@link #init(int, boolean)} method. Destroyed in {@link #join()} (FIX! Doing
   * resource handling rather than observing in a #join is unexpected). Overridden when we do the
   * ProcedureTestingUtility.testRecoveryAndDoubleExecution trickery (Should be ok).
   */
  private ThreadGroup threadGroup;

  /**
   * Created in the {@link #init(int, boolean)} method. Terminated in {@link #join()} (FIX! Doing
   * resource handling rather than observing in a #join is unexpected). Overridden when we do the
   * ProcedureTestingUtility.testRecoveryAndDoubleExecution trickery (Should be ok).
   */
  private CopyOnWriteArrayList workerThreads;

  /**
   * Created in the {@link #init(int, boolean)} method. Terminated in {@link #join()} (FIX! Doing
   * resource handling rather than observing in a #join is unexpected). Overridden when we do the
   * ProcedureTestingUtility.testRecoveryAndDoubleExecution trickery (Should be ok).
   */
  private TimeoutExecutorThread timeoutExecutor;

  /**
   * WorkerMonitor check for stuck workers and new worker thread when necessary, for example if
   * there is no worker to assign meta, it will new worker thread for it, so it is very important.
   * TimeoutExecutor execute many tasks like DeadServerMetricRegionChore RegionInTransitionChore and
   * so on, some tasks may execute for a long time so will block other tasks like WorkerMonitor, so
   * use a dedicated thread for executing WorkerMonitor.
   */
  private TimeoutExecutorThread workerMonitorExecutor;

  private int corePoolSize;
  private int maxPoolSize;

  private volatile long keepAliveTime;

  /**
   * Scheduler/Queue that contains runnable procedures.
   */
  private final ProcedureScheduler scheduler;

  private final Executor forceUpdateExecutor = Executors.newSingleThreadExecutor(
    new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Force-Update-PEWorker-%d").build());

  private final AtomicLong lastProcId = new AtomicLong(-1);
  private final AtomicLong workerId = new AtomicLong(0);
  private final AtomicInteger activeExecutorCount = new AtomicInteger(0);
  private final AtomicBoolean running = new AtomicBoolean(false);
  private final TEnvironment environment;
  private final ProcedureStore store;

  private final boolean checkOwnerSet;

  // To prevent concurrent execution of the same procedure.
  // For some rare cases, especially if the procedure uses ProcedureEvent, it is possible that the
  // procedure is woken up before we finish the suspend which causes the same procedures to be
  // executed in parallel. This does lead to some problems, see HBASE-20939&HBASE-20949, and is also
  // a bit confusing to the developers. So here we introduce this lock to prevent the concurrent
  // execution of the same procedure.
  private final IdLock procExecutionLock = new IdLock();

  public ProcedureExecutor(final Configuration conf, final TEnvironment environment,
    final ProcedureStore store) {
    this(conf, environment, store, new SimpleProcedureScheduler());
  }

  private boolean isRootFinished(Procedure proc) {
    Procedure rootProc = procedures.get(proc.getRootProcId());
    return rootProc == null || rootProc.isFinished();
  }

  private void forceUpdateProcedure(long procId) throws IOException {
    IdLock.Entry lockEntry = procExecutionLock.getLockEntry(procId);
    try {
      Procedure proc = procedures.get(procId);
      if (proc != null) {
        if (proc.isFinished() && proc.hasParent() && isRootFinished(proc)) {
          LOG.debug("Procedure {} has already been finished and parent is succeeded,"
            + " skip force updating", proc);
          return;
        }
      } else {
        CompletedProcedureRetainer retainer = completed.get(procId);
        if (retainer == null || retainer.getProcedure() instanceof FailedProcedure) {
          LOG.debug("No pending procedure with id = {}, skip force updating.", procId);
          return;
        }
        long evictTtl = conf.getInt(EVICT_TTL_CONF_KEY, DEFAULT_EVICT_TTL);
        long evictAckTtl = conf.getInt(EVICT_ACKED_TTL_CONF_KEY, DEFAULT_ACKED_EVICT_TTL);
        if (retainer.isExpired(EnvironmentEdgeManager.currentTime(), evictTtl, evictAckTtl)) {
          LOG.debug("Procedure {} has already been finished and expired, skip force updating",
            procId);
          return;
        }
        proc = retainer.getProcedure();
      }
      LOG.debug("Force update procedure {}", proc);
      store.update(proc);
    } finally {
      procExecutionLock.releaseLockEntry(lockEntry);
    }
  }

  public ProcedureExecutor(final Configuration conf, final TEnvironment environment,
    final ProcedureStore store, final ProcedureScheduler scheduler) {
    this.environment = environment;
    this.scheduler = scheduler;
    this.store = store;
    this.conf = conf;
    this.checkOwnerSet = conf.getBoolean(CHECK_OWNER_SET_CONF_KEY, DEFAULT_CHECK_OWNER_SET);
    refreshConfiguration(conf);
    store.registerListener(new ProcedureStoreListener() {

      @Override
      public void forceUpdate(long[] procIds) {
        Arrays.stream(procIds).forEach(procId -> forceUpdateExecutor.execute(() -> {
          try {
            forceUpdateProcedure(procId);
          } catch (IOException e) {
            LOG.warn("Failed to force update procedure with pid={}", procId);
          }
        }));
      }
    });
  }

  private void load(final boolean abortOnCorruption) throws IOException {
    Preconditions.checkArgument(completed.isEmpty(), "completed not empty");
    Preconditions.checkArgument(rollbackStack.isEmpty(), "rollback state not empty");
    Preconditions.checkArgument(procedures.isEmpty(), "procedure map not empty");
    Preconditions.checkArgument(scheduler.size() == 0, "run queue not empty");

    store.load(new ProcedureStore.ProcedureLoader() {
      @Override
      public void setMaxProcId(long maxProcId) {
        assert lastProcId.get() < 0 : "expected only one call to setMaxProcId()";
        lastProcId.set(maxProcId);
      }

      @Override
      public void load(ProcedureIterator procIter) throws IOException {
        loadProcedures(procIter);
      }

      @Override
      public void handleCorrupted(ProcedureIterator procIter) throws IOException {
        int corruptedCount = 0;
        while (procIter.hasNext()) {
          Procedure proc = procIter.next();
          LOG.error("Corrupt " + proc);
          corruptedCount++;
        }
        if (abortOnCorruption && corruptedCount > 0) {
          throw new IOException("found " + corruptedCount + " corrupted procedure(s) on replay");
        }
      }
    });
  }

  private void restoreLock(Procedure proc, Set restored) {
    proc.restoreLock(getEnvironment());
    restored.add(proc.getProcId());
  }

  private void restoreLocks(Deque> stack, Set restored) {
    while (!stack.isEmpty()) {
      restoreLock(stack.pop(), restored);
    }
  }

  // Restore the locks for all the procedures.
  // Notice that we need to restore the locks starting from the root proc, otherwise there will be
  // problem that a sub procedure may hold the exclusive lock first and then we are stuck when
  // calling the acquireLock method for the parent procedure.
  // The algorithm is straight-forward:
  // 1. Use a set to record the procedures which locks have already been restored.
  // 2. Use a stack to store the hierarchy of the procedures
  // 3. For all the procedure, we will first try to find its parent and push it into the stack,
  // unless
  // a. We have no parent, i.e, we are the root procedure
  // b. The lock has already been restored(by checking the set introduced in #1)
  // then we start to pop the stack and call acquireLock for each procedure.
  // Notice that this should be done for all procedures, not only the ones in runnableList.
  private void restoreLocks() {
    Set restored = new HashSet<>();
    Deque> stack = new ArrayDeque<>();
    procedures.values().forEach(proc -> {
      for (;;) {
        if (restored.contains(proc.getProcId())) {
          restoreLocks(stack, restored);
          return;
        }
        if (!proc.hasParent()) {
          restoreLock(proc, restored);
          restoreLocks(stack, restored);
          return;
        }
        stack.push(proc);
        proc = procedures.get(proc.getParentProcId());
      }
    });
  }

  private void initializeStacks(ProcedureIterator procIter,
    List> runnableList, List> failedList,
    List> waitingList, List> waitingTimeoutList)
    throws IOException {
    procIter.reset();
    while (procIter.hasNext()) {
      if (procIter.isNextFinished()) {
        procIter.skipNext();
        continue;
      }

      @SuppressWarnings("unchecked")
      Procedure proc = procIter.next();
      assert !(proc.isFinished() && !proc.hasParent()) : "unexpected completed proc=" + proc;
      LOG.debug("Loading {}", proc);
      Long rootProcId = getRootProcedureId(proc);
      // The orphan procedures will be passed to handleCorrupted, so add an assert here
      assert rootProcId != null;

      if (proc.hasParent()) {
        Procedure parent = procedures.get(proc.getParentProcId());
        if (parent != null && !proc.isFinished()) {
          parent.incChildrenLatch();
        }
      }

      RootProcedureState procStack = rollbackStack.get(rootProcId);
      procStack.loadStack(proc);

      proc.setRootProcId(rootProcId);
      switch (proc.getState()) {
        case RUNNABLE:
          runnableList.add(proc);
          break;
        case WAITING:
          waitingList.add(proc);
          break;
        case WAITING_TIMEOUT:
          waitingTimeoutList.add(proc);
          break;
        case FAILED:
          failedList.add(proc);
          break;
        case ROLLEDBACK:
        case INITIALIZING:
          String msg = "Unexpected " + proc.getState() + " state for " + proc;
          LOG.error(msg);
          throw new UnsupportedOperationException(msg);
        default:
          break;
      }
    }
    rollbackStack.forEach((rootProcId, procStack) -> {
      if (procStack.getSubproceduresStack() != null) {
        // if we have already record some stack ids, it means we support rollback
        procStack.setRollbackSupported(true);
      } else {
        // otherwise, test the root procedure to see if we support rollback
        procStack.setRollbackSupported(procedures.get(rootProcId).isRollbackSupported());
      }
    });
  }

  private void processWaitingProcedures(List> waitingList,
    List> runnableList) {
    waitingList.forEach(proc -> {
      if (!proc.hasChildren()) {
        // Normally, WAITING procedures should be waken by its children. But, there is a case that,
        // all the children are successful and before they can wake up their parent procedure, the
        // master was killed. So, during recovering the procedures from ProcedureWal, its children
        // are not loaded because of their SUCCESS state. So we need to continue to run this WAITING
        // procedure. But before executing, we need to set its state to RUNNABLE, otherwise, a
        // exception will throw:
        // Preconditions.checkArgument(procedure.getState() == ProcedureState.RUNNABLE,
        // "NOT RUNNABLE! " + procedure.toString());
        proc.setState(ProcedureState.RUNNABLE);
        runnableList.add(proc);
      } else {
        proc.afterReplay(getEnvironment());
      }
    });
  }

  private void processWaitingTimeoutProcedures(List> waitingTimeoutList) {
    waitingTimeoutList.forEach(proc -> {
      proc.afterReplay(getEnvironment());
      timeoutExecutor.add(proc);
    });
  }

  private void pushProceduresAfterLoad(List> runnableList,
    List> failedList) {
    failedList.forEach(scheduler::addBack);
    runnableList.forEach(p -> {
      p.afterReplay(getEnvironment());
      if (!p.hasParent()) {
        sendProcedureLoadedNotification(p.getProcId());
      }
      scheduler.addBack(p);
    });
  }

  private void loadProcedures(ProcedureIterator procIter) throws IOException {
    // 1. Build the rollback stack
    int runnableCount = 0;
    int failedCount = 0;
    int waitingCount = 0;
    int waitingTimeoutCount = 0;
    while (procIter.hasNext()) {
      boolean finished = procIter.isNextFinished();
      @SuppressWarnings("unchecked")
      Procedure proc = procIter.next();
      NonceKey nonceKey = proc.getNonceKey();
      long procId = proc.getProcId();

      if (finished) {
        completed.put(proc.getProcId(), new CompletedProcedureRetainer<>(proc));
        LOG.debug("Completed {}", proc);
      } else {
        if (!proc.hasParent()) {
          assert !proc.isFinished() : "unexpected finished procedure";
          rollbackStack.put(proc.getProcId(), new RootProcedureState<>());
        }

        // add the procedure to the map
        proc.beforeReplay(getEnvironment());
        procedures.put(proc.getProcId(), proc);
        switch (proc.getState()) {
          case RUNNABLE:
            runnableCount++;
            break;
          case FAILED:
            failedCount++;
            break;
          case WAITING:
            waitingCount++;
            break;
          case WAITING_TIMEOUT:
            waitingTimeoutCount++;
            break;
          default:
            break;
        }
      }

      if (nonceKey != null) {
        nonceKeysToProcIdsMap.put(nonceKey, procId); // add the nonce to the map
      }
    }

    // 2. Initialize the stacks: In the old implementation, for procedures in FAILED state, we will
    // push it into the ProcedureScheduler directly to execute the rollback. But this does not work
    // after we introduce the restore lock stage. For now, when we acquire a xlock, we will remove
    // the queue from runQueue in scheduler, and then when a procedure which has lock access, for
    // example, a sub procedure of the procedure which has the xlock, is pushed into the scheduler,
    // we will add the queue back to let the workers poll from it. The assumption here is that, the
    // procedure which has the xlock should have been polled out already, so when loading we can not
    // add the procedure to scheduler first and then call acquireLock, since the procedure is still
    // in the queue, and since we will remove the queue from runQueue, then no one can poll it out,
    // then there is a dead lock
    List> runnableList = new ArrayList<>(runnableCount);
    List> failedList = new ArrayList<>(failedCount);
    List> waitingList = new ArrayList<>(waitingCount);
    List> waitingTimeoutList = new ArrayList<>(waitingTimeoutCount);

    initializeStacks(procIter, runnableList, failedList, waitingList, waitingTimeoutList);

    // 3. Check the waiting procedures to see if some of them can be added to runnable.
    processWaitingProcedures(waitingList, runnableList);

    // 4. restore locks
    restoreLocks();

    // 5. Push the procedures to the timeout executor
    processWaitingTimeoutProcedures(waitingTimeoutList);

    // 6. Push the procedure to the scheduler
    pushProceduresAfterLoad(runnableList, failedList);
    // After all procedures put into the queue, signal the worker threads.
    // Otherwise, there is a race condition. See HBASE-21364.
    scheduler.signalAll();
  }

  /**
   * Initialize the procedure executor, but do not start workers. We will start them later.
   * 
   * It calls ProcedureStore.recoverLease() and ProcedureStore.load() to recover the lease, and
   * ensure a single executor, and start the procedure replay to resume and recover the previous
   * pending and in-progress procedures.
   * @param numThreads        number of threads available for procedure execution.
   * @param abortOnCorruption true if you want to abort your service in case a corrupted procedure
   *                          is found on replay. otherwise false.
   */
  public void init(int numThreads, boolean abortOnCorruption) throws IOException {
    // We have numThreads executor + one timer thread used for timing out
    // procedures and triggering periodic procedures.
    this.corePoolSize = numThreads;
    this.maxPoolSize = 10 * numThreads;
    LOG.info("Starting {} core workers (bigger of cpus/4 or 16) with max (burst) worker count={}",
      corePoolSize, maxPoolSize);

    this.threadGroup = new ThreadGroup("PEWorkerGroup");
    this.timeoutExecutor = new TimeoutExecutorThread<>(this, threadGroup, "ProcExecTimeout");
    this.workerMonitorExecutor = new TimeoutExecutorThread<>(this, threadGroup, "WorkerMonitor");

    // Create the workers
    workerId.set(0);
    workerThreads = new CopyOnWriteArrayList<>();
    for (int i = 0; i < corePoolSize; ++i) {
      workerThreads.add(new WorkerThread(threadGroup));
    }

    long st, et;

    // Acquire the store lease.
    st = System.nanoTime();
    store.recoverLease();
    et = System.nanoTime();
    LOG.info("Recovered {} lease in {}", store.getClass().getSimpleName(),
      StringUtils.humanTimeDiff(TimeUnit.NANOSECONDS.toMillis(et - st)));

    // start the procedure scheduler
    scheduler.start();

    // TODO: Split in two steps.
    // TODO: Handle corrupted procedures (currently just a warn)
    // The first one will make sure that we have the latest id,
    // so we can start the threads and accept new procedures.
    // The second step will do the actual load of old procedures.
    st = System.nanoTime();
    load(abortOnCorruption);
    et = System.nanoTime();
    LOG.info("Loaded {} in {}", store.getClass().getSimpleName(),
      StringUtils.humanTimeDiff(TimeUnit.NANOSECONDS.toMillis(et - st)));
  }

  /**
   * Start the workers.
   */
  public void startWorkers() throws IOException {
    if (!running.compareAndSet(false, true)) {
      LOG.warn("Already running");
      return;
    }
    // Start the executors. Here we must have the lastProcId set.
    LOG.trace("Start workers {}", workerThreads.size());
    timeoutExecutor.start();
    workerMonitorExecutor.start();
    for (WorkerThread worker : workerThreads) {
      worker.start();
    }

    // Internal chores
    workerMonitorExecutor.add(new WorkerMonitor());

    // Add completed cleaner chore
    addChore(new CompletedProcedureCleaner<>(conf, store, procExecutionLock, completed,
      nonceKeysToProcIdsMap));
  }

  public void stop() {
    if (!running.getAndSet(false)) {
      return;
    }

    LOG.info("Stopping");
    scheduler.stop();
    timeoutExecutor.sendStopSignal();
    workerMonitorExecutor.sendStopSignal();
  }

  public void join() {
    assert !isRunning() : "expected not running";

    // stop the timeout executor
    timeoutExecutor.awaitTermination();
    // stop the work monitor executor
    workerMonitorExecutor.awaitTermination();

    // stop the worker threads
    for (WorkerThread worker : workerThreads) {
      worker.awaitTermination();
    }

    // Destroy the Thread Group for the executors
    // TODO: Fix. #join is not place to destroy resources.
    try {
      threadGroup.destroy();
    } catch (IllegalThreadStateException e) {
      LOG.error("ThreadGroup {} contains running threads; {}: See STDOUT", this.threadGroup,
        e.getMessage());
      // This dumps list of threads on STDOUT.
      this.threadGroup.list();
    }

    // reset the in-memory state for testing
    completed.clear();
    rollbackStack.clear();
    procedures.clear();
    nonceKeysToProcIdsMap.clear();
    scheduler.clear();
    lastProcId.set(-1);
  }

  public void refreshConfiguration(final Configuration conf) {
    this.conf = conf;
    setKeepAliveTime(conf.getLong(WORKER_KEEP_ALIVE_TIME_CONF_KEY, DEFAULT_WORKER_KEEP_ALIVE_TIME),
      TimeUnit.MILLISECONDS);
  }

  // ==========================================================================
  // Accessors
  // ==========================================================================
  public boolean isRunning() {
    return running.get();
  }

  /** Returns the current number of worker threads. */
  public int getWorkerThreadCount() {
    return workerThreads.size();
  }

  /** Returns the core pool size settings. */
  public int getCorePoolSize() {
    return corePoolSize;
  }

  public int getActiveExecutorCount() {
    return activeExecutorCount.get();
  }

  public TEnvironment getEnvironment() {
    return this.environment;
  }

  public ProcedureStore getStore() {
    return this.store;
  }

  ProcedureScheduler getScheduler() {
    return scheduler;
  }

  public void setKeepAliveTime(final long keepAliveTime, final TimeUnit timeUnit) {
    this.keepAliveTime = timeUnit.toMillis(keepAliveTime);
    this.scheduler.signalAll();
  }

  public long getKeepAliveTime(final TimeUnit timeUnit) {
    return timeUnit.convert(keepAliveTime, TimeUnit.MILLISECONDS);
  }

  // ==========================================================================
  // Submit/Remove Chores
  // ==========================================================================

  /**
   * Add a chore procedure to the executor
   * @param chore the chore to add
   */
  public void addChore(@Nullable ProcedureInMemoryChore chore) {
    if (chore == null) {
      return;
    }
    chore.setState(ProcedureState.WAITING_TIMEOUT);
    timeoutExecutor.add(chore);
  }

  /**
   * Remove a chore procedure from the executor
   * @param chore the chore to remove
   * @return whether the chore is removed, or it will be removed later
   */
  public boolean removeChore(@Nullable ProcedureInMemoryChore chore) {
    if (chore == null) {
      return true;
    }
    chore.setState(ProcedureState.SUCCESS);
    return timeoutExecutor.remove(chore);
  }

  // ==========================================================================
  // Nonce Procedure helpers
  // ==========================================================================
  /**
   * Create a NonceKey from the specified nonceGroup and nonce.
   * @param nonceGroup the group to use for the {@link NonceKey}
   * @param nonce      the nonce to use in the {@link NonceKey}
   * @return the generated NonceKey
   */
  public NonceKey createNonceKey(final long nonceGroup, final long nonce) {
    return (nonce == HConstants.NO_NONCE) ? null : new NonceKey(nonceGroup, nonce);
  }

  /**
   * Register a nonce for a procedure that is going to be submitted. A procId will be reserved and
   * on submitProcedure(), the procedure with the specified nonce will take the reserved ProcId. If
   * someone already reserved the nonce, this method will return the procId reserved, otherwise an
   * invalid procId will be returned. and the caller should procede and submit the procedure.
   * @param nonceKey A unique identifier for this operation from the client or process.
   * @return the procId associated with the nonce, if any otherwise an invalid procId.
   */
  public long registerNonce(final NonceKey nonceKey) {
    if (nonceKey == null) {
      return -1;
    }

    // check if we have already a Reserved ID for the nonce
    Long oldProcId = nonceKeysToProcIdsMap.get(nonceKey);
    if (oldProcId == null) {
      // reserve a new Procedure ID, this will be associated with the nonce
      // and the procedure submitted with the specified nonce will use this ID.
      final long newProcId = nextProcId();
      oldProcId = nonceKeysToProcIdsMap.putIfAbsent(nonceKey, newProcId);
      if (oldProcId == null) {
        return -1;
      }
    }

    // we found a registered nonce, but the procedure may not have been submitted yet.
    // since the client expect the procedure to be submitted, spin here until it is.
    final boolean traceEnabled = LOG.isTraceEnabled();
    while (
      isRunning() && !(procedures.containsKey(oldProcId) || completed.containsKey(oldProcId))
        && nonceKeysToProcIdsMap.containsKey(nonceKey)
    ) {
      if (traceEnabled) {
        LOG.trace("Waiting for pid=" + oldProcId.longValue() + " to be submitted");
      }
      Threads.sleep(100);
    }
    return oldProcId.longValue();
  }

  /**
   * Remove the NonceKey if the procedure was not submitted to the executor.
   * @param nonceKey A unique identifier for this operation from the client or process.
   */
  public void unregisterNonceIfProcedureWasNotSubmitted(final NonceKey nonceKey) {
    if (nonceKey == null) {
      return;
    }

    final Long procId = nonceKeysToProcIdsMap.get(nonceKey);
    if (procId == null) {
      return;
    }

    // if the procedure was not submitted, remove the nonce
    if (!(procedures.containsKey(procId) || completed.containsKey(procId))) {
      nonceKeysToProcIdsMap.remove(nonceKey);
    }
  }

  /**
   * If the failure failed before submitting it, we may want to give back the same error to the
   * requests with the same nonceKey.
   * @param nonceKey  A unique identifier for this operation from the client or process
   * @param procName  name of the procedure, used to inform the user
   * @param procOwner name of the owner of the procedure, used to inform the user
   * @param exception the failure to report to the user
   */
  public void setFailureResultForNonce(NonceKey nonceKey, String procName, User procOwner,
    IOException exception) {
    if (nonceKey == null) {
      return;
    }

    Long procId = nonceKeysToProcIdsMap.get(nonceKey);
    if (procId == null || completed.containsKey(procId)) {
      return;
    }

    completed.computeIfAbsent(procId, (key) -> {
      Procedure proc =
        new FailedProcedure<>(procId.longValue(), procName, procOwner, nonceKey, exception);

      return new CompletedProcedureRetainer<>(proc);
    });
  }

  // ==========================================================================
  // Submit/Abort Procedure
  // ==========================================================================
  /**
   * Add a new root-procedure to the executor.
   * @param proc the new procedure to execute.
   * @return the procedure id, that can be used to monitor the operation
   */
  public long submitProcedure(Procedure proc) {
    return submitProcedure(proc, null);
  }

  /**
   * Bypass a procedure. If the procedure is set to bypass, all the logic in execute/rollback will
   * be ignored and it will return success, whatever. It is used to recover buggy stuck procedures,
   * releasing the lock resources and letting other procedures run. Bypassing one procedure (and its
   * ancestors will be bypassed automatically) may leave the cluster in a middle state, e.g. region
   * not assigned, or some hdfs files left behind. After getting rid of those stuck procedures, the
   * operators may have to do some clean up on hdfs or schedule some assign procedures to let region
   * online. DO AT YOUR OWN RISK.
   * 

   * A procedure can be bypassed only if 1. The procedure is in state of RUNNABLE, WAITING,
   * WAITING_TIMEOUT or it is a root procedure without any child. 2. No other worker thread is
   * executing it 3. No child procedure has been submitted
   * 

   * If all the requirements are meet, the procedure and its ancestors will be bypassed and
   * persisted to WAL.
   * 

   * If the procedure is in WAITING state, will set it to RUNNABLE add it to run queue. TODO: What
   * about WAITING_TIMEOUT?
   * @param pids      the procedure id
   * @param lockWait  time to wait lock
   * @param force     if force set to true, we will bypass the procedure even if it is executing.
   *                  This is for procedures which can't break out during executing(due to bug,
   *                  mostly) In this case, bypassing the procedure is not enough, since it is
   *                  already stuck there. We need to restart the master after bypassing, and
   *                  letting the problematic procedure to execute wth bypass=true, so in that
   *                  condition, the procedure can be successfully bypassed.
   * @param recursive We will do an expensive search for children of each pid. EXPENSIVE!
   * @return true if bypass success
   * @throws IOException IOException
   */
  public List bypassProcedure(List pids, long lockWait, boolean force,
    boolean recursive) throws IOException {
    List result = new ArrayList(pids.size());
    for (long pid : pids) {
      result.add(bypassProcedure(pid, lockWait, force, recursive));
    }
    return result;
  }

  boolean bypassProcedure(long pid, long lockWait, boolean override, boolean recursive)
    throws IOException {
    Preconditions.checkArgument(lockWait > 0, "lockWait should be positive");
    final Procedure procedure = getProcedure(pid);
    if (procedure == null) {
      LOG.debug("Procedure pid={} does not exist, skipping bypass", pid);
      return false;
    }

    LOG.debug("Begin bypass {} with lockWait={}, override={}, recursive={}", procedure, lockWait,
      override, recursive);
    IdLock.Entry lockEntry = procExecutionLock.tryLockEntry(procedure.getProcId(), lockWait);
    if (lockEntry == null && !override) {
      LOG.debug("Waited {} ms, but {} is still running, skipping bypass with force={}", lockWait,
        procedure, override);
      return false;
    } else if (lockEntry == null) {
      LOG.debug("Waited {} ms, but {} is still running, begin bypass with force={}", lockWait,
        procedure, override);
    }
    try {
      // check whether the procedure is already finished
      if (procedure.isFinished()) {
        LOG.debug("{} is already finished, skipping bypass", procedure);
        return false;
      }

      if (procedure.hasChildren()) {
        if (recursive) {
          // EXPENSIVE. Checks each live procedure of which there could be many!!!
          // Is there another way to get children of a procedure?
          LOG.info("Recursive bypass on children of pid={}", procedure.getProcId());
          this.procedures.forEachValue(1 /* Single-threaded */,
            // Transformer
            v -> v.getParentProcId() == procedure.getProcId() ? v : null,
            // Consumer
            v -> {
              try {
                bypassProcedure(v.getProcId(), lockWait, override, recursive);
              } catch (IOException e) {
                LOG.warn("Recursive bypass of pid={}", v.getProcId(), e);
              }
            });
        } else {
          LOG.debug("{} has children, skipping bypass", procedure);
          return false;
        }
      }

      // If the procedure has no parent or no child, we are safe to bypass it in whatever state
      if (
        procedure.hasParent() && procedure.getState() != ProcedureState.RUNNABLE
          && procedure.getState() != ProcedureState.WAITING
          && procedure.getState() != ProcedureState.WAITING_TIMEOUT
      ) {
        LOG.debug("Bypassing procedures in RUNNABLE, WAITING and WAITING_TIMEOUT states "
          + "(with no parent), {}", procedure);
        // Question: how is the bypass done here?
        return false;
      }

      // Now, the procedure is not finished, and no one can execute it since we take the lock now
      // And we can be sure that its ancestor is not running too, since their child has not
      // finished yet
      Procedure current = procedure;
      while (current != null) {
        LOG.debug("Bypassing {}", current);
        current.bypass(getEnvironment());
        store.update(current);
        long parentID = current.getParentProcId();
        current = getProcedure(parentID);
      }

      // wake up waiting procedure, already checked there is no child
      if (procedure.getState() == ProcedureState.WAITING) {
        procedure.setState(ProcedureState.RUNNABLE);
        store.update(procedure);
      }

      // If state of procedure is WAITING_TIMEOUT, we can directly submit it to the scheduler.
      // Instead we should remove it from timeout Executor queue and tranfer its state to RUNNABLE
      if (procedure.getState() == ProcedureState.WAITING_TIMEOUT) {
        LOG.debug("transform procedure {} from WAITING_TIMEOUT to RUNNABLE", procedure);
        if (timeoutExecutor.remove(procedure)) {
          LOG.debug("removed procedure {} from timeoutExecutor", procedure);
          timeoutExecutor.executeTimedoutProcedure(procedure);
        }
      } else if (lockEntry != null) {
        scheduler.addFront(procedure);
        LOG.debug("Bypassing {} and its ancestors successfully, adding to queue", procedure);
      } else {
        // If we don't have the lock, we can't re-submit the queue,
        // since it is already executing. To get rid of the stuck situation, we
        // need to restart the master. With the procedure set to bypass, the procedureExecutor
        // will bypass it and won't get stuck again.
        LOG.debug("Bypassing {} and its ancestors successfully, but since it is already running, "
          + "skipping add to queue", procedure);
      }
      return true;

    } finally {
      if (lockEntry != null) {
        procExecutionLock.releaseLockEntry(lockEntry);
      }
    }
  }

  /**
   * Add a new root-procedure to the executor.
   * @param proc     the new procedure to execute.
   * @param nonceKey the registered unique identifier for this operation from the client or process.
   * @return the procedure id, that can be used to monitor the operation
   */
  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH",
      justification = "FindBugs is blind to the check-for-null")
  public long submitProcedure(Procedure proc, NonceKey nonceKey) {
    Preconditions.checkArgument(lastProcId.get() >= 0);

    prepareProcedure(proc);

    final Long currentProcId;
    if (nonceKey != null) {
      currentProcId = nonceKeysToProcIdsMap.get(nonceKey);
      Preconditions.checkArgument(currentProcId != null,
        "Expected nonceKey=" + nonceKey + " to be reserved, use registerNonce(); proc=" + proc);
    } else {
      currentProcId = nextProcId();
    }

    // Initialize the procedure
    proc.setNonceKey(nonceKey);
    proc.setProcId(currentProcId.longValue());

    // Commit the transaction
    store.insert(proc, null);
    LOG.debug("Stored {}", proc);

    // Add the procedure to the executor
    return pushProcedure(proc);
  }

  /**
   * Add a set of new root-procedure to the executor.
   * @param procs the new procedures to execute.
   */
  // TODO: Do we need to take nonces here?
  public void submitProcedures(Procedure[] procs) {
    Preconditions.checkArgument(lastProcId.get() >= 0);
    if (procs == null || procs.length <= 0) {
      return;
    }

    // Prepare procedure
    for (int i = 0; i < procs.length; ++i) {
      prepareProcedure(procs[i]).setProcId(nextProcId());
    }

    // Commit the transaction
    store.insert(procs);
    if (LOG.isDebugEnabled()) {
      LOG.debug("Stored " + Arrays.toString(procs));
    }

    // Add the procedure to the executor
    for (int i = 0; i < procs.length; ++i) {
      pushProcedure(procs[i]);
    }
  }

  private Procedure prepareProcedure(Procedure proc) {
    Preconditions.checkArgument(proc.getState() == ProcedureState.INITIALIZING);
    Preconditions.checkArgument(!proc.hasParent(), "unexpected parent", proc);
    if (this.checkOwnerSet) {
      Preconditions.checkArgument(proc.hasOwner(), "missing owner");
    }
    return proc;
  }

  private long pushProcedure(Procedure proc) {
    final long currentProcId = proc.getProcId();

    // Update metrics on start of a procedure
    proc.updateMetricsOnSubmit(getEnvironment());

    // Create the rollback stack for the procedure
    RootProcedureState stack = new RootProcedureState<>();
    stack.setRollbackSupported(proc.isRollbackSupported());
    rollbackStack.put(currentProcId, stack);

    // Submit the new subprocedures
    assert !procedures.containsKey(currentProcId);
    procedures.put(currentProcId, proc);
    sendProcedureAddedNotification(currentProcId);
    scheduler.addBack(proc);
    return proc.getProcId();
  }

  /**
   * Send an abort notification the specified procedure. Depending on the procedure implementation
   * the abort can be considered or ignored.
   * @param procId the procedure to abort
   * @return true if the procedure exists and has received the abort, otherwise false.
   */
  public boolean abort(long procId) {
    return abort(procId, true);
  }

  /**
   * Send an abort notification to the specified procedure. Depending on the procedure
   * implementation, the abort can be considered or ignored.
   * @param procId                the procedure to abort
   * @param mayInterruptIfRunning if the proc completed at least one step, should it be aborted?
   * @return true if the procedure exists and has received the abort, otherwise false.
   */
  public boolean abort(long procId, boolean mayInterruptIfRunning) {
    Procedure proc = procedures.get(procId);
    if (proc != null) {
      if (!mayInterruptIfRunning && proc.wasExecuted()) {
        return false;
      }
      return proc.abort(getEnvironment());
    }
    return false;
  }

  // ==========================================================================
  // Executor query helpers
  // ==========================================================================
  public Procedure getProcedure(final long procId) {
    return procedures.get(procId);
  }

  public > T getProcedure(Class clazz, long procId) {
    Procedure proc = getProcedure(procId);
    if (clazz.isInstance(proc)) {
      return clazz.cast(proc);
    }
    return null;
  }

  public Procedure getResult(long procId) {
    CompletedProcedureRetainer retainer = completed.get(procId);
    if (retainer == null) {
      return null;
    } else {
      return retainer.getProcedure();
    }
  }

  /**
   * Return true if the procedure is finished. The state may be "completed successfully" or "failed
   * and rolledback". Use getResult() to check the state or get the result data.
   * @param procId the ID of the procedure to check
   * @return true if the procedure execution is finished, otherwise false.
   */
  public boolean isFinished(final long procId) {
    return !procedures.containsKey(procId);
  }

  /**
   * Return true if the procedure is started.
   * @param procId the ID of the procedure to check
   * @return true if the procedure execution is started, otherwise false.
   */
  public boolean isStarted(long procId) {
    Procedure proc = procedures.get(procId);
    if (proc == null) {
      return completed.get(procId) != null;
    }
    return proc.wasExecuted();
  }

  /**
   * Mark the specified completed procedure, as ready to remove.
   * @param procId the ID of the procedure to remove
   */
  public void removeResult(long procId) {
    CompletedProcedureRetainer retainer = completed.get(procId);
    if (retainer == null) {
      assert !procedures.containsKey(procId) : "pid=" + procId + " is still running";
      LOG.debug("pid={} already removed by the cleaner.", procId);
      return;
    }

    // The CompletedProcedureCleaner will take care of deletion, once the TTL is expired.
    retainer.setClientAckTime(EnvironmentEdgeManager.currentTime());
  }

  public Procedure getResultOrProcedure(long procId) {
    CompletedProcedureRetainer retainer = completed.get(procId);
    if (retainer == null) {
      return procedures.get(procId);
    } else {
      return retainer.getProcedure();
    }
  }

  /**
   * Check if the user is this procedure's owner
   * @param procId the target procedure
   * @param user   the user
   * @return true if the user is the owner of the procedure, false otherwise or the owner is
   *         unknown.
   */
  public boolean isProcedureOwner(long procId, User user) {
    if (user == null) {
      return false;
    }
    final Procedure runningProc = procedures.get(procId);
    if (runningProc != null) {
      return runningProc.getOwner().equals(user.getShortName());
    }

    final CompletedProcedureRetainer retainer = completed.get(procId);
    if (retainer != null) {
      return retainer.getProcedure().getOwner().equals(user.getShortName());
    }

    // Procedure either does not exist or has already completed and got cleaned up.
    // At this time, we cannot check the owner of the procedure
    return false;
  }

  /**
   * Should only be used when starting up, where the procedure workers have not been started.
   * 

   * If the procedure works has been started, the return values maybe changed when you are
   * processing it so usually this is not safe. Use {@link #getProcedures()} below for most cases as
   * it will do a copy, and also include the finished procedures.
   */
  public Collection> getActiveProceduresNoCopy() {
    return procedures.values();
  }

  /**
   * Get procedures.
   * @return the procedures in a list
   */
  public List> getProcedures() {
    List> procedureList =
      new ArrayList<>(procedures.size() + completed.size());
    procedureList.addAll(procedures.values());
    // Note: The procedure could show up twice in the list with different state, as
    // it could complete after we walk through procedures list and insert into
    // procedureList - it is ok, as we will use the information in the Procedure
    // to figure it out; to prevent this would increase the complexity of the logic.
    completed.values().stream().map(CompletedProcedureRetainer::getProcedure)
      .forEach(procedureList::add);
    return procedureList;
  }

  // ==========================================================================
  // Listeners helpers
  // ==========================================================================
  public void registerListener(ProcedureExecutorListener listener) {
    this.listeners.add(listener);
  }

  public boolean unregisterListener(ProcedureExecutorListener listener) {
    return this.listeners.remove(listener);
  }

  private void sendProcedureLoadedNotification(final long procId) {
    if (!this.listeners.isEmpty()) {
      for (ProcedureExecutorListener listener : this.listeners) {
        try {
          listener.procedureLoaded(procId);
        } catch (Throwable e) {
          LOG.error("Listener " + listener + " had an error: " + e.getMessage(), e);
        }
      }
    }
  }

  private void sendProcedureAddedNotification(final long procId) {
    if (!this.listeners.isEmpty()) {
      for (ProcedureExecutorListener listener : this.listeners) {
        try {
          listener.procedureAdded(procId);
        } catch (Throwable e) {
          LOG.error("Listener " + listener + " had an error: " + e.getMessage(), e);
        }
      }
    }
  }

  private void sendProcedureFinishedNotification(final long procId) {
    if (!this.listeners.isEmpty()) {
      for (ProcedureExecutorListener listener : this.listeners) {
        try {
          listener.procedureFinished(procId);
        } catch (Throwable e) {
          LOG.error("Listener " + listener + " had an error: " + e.getMessage(), e);
        }
      }
    }
  }

  // ==========================================================================
  // Procedure IDs helpers
  // ==========================================================================
  private long nextProcId() {
    long procId = lastProcId.incrementAndGet();
    if (procId < 0) {
      while (!lastProcId.compareAndSet(procId, 0)) {
        procId = lastProcId.get();
        if (procId >= 0) {
          break;
        }
      }
      while (procedures.containsKey(procId)) {
        procId = lastProcId.incrementAndGet();
      }
    }
    assert procId >= 0 : "Invalid procId " + procId;
    return procId;
  }

  protected long getLastProcId() {
    return lastProcId.get();
  }

  public Set getActiveProcIds() {
    return procedures.keySet();
  }

  Long getRootProcedureId(Procedure proc) {
    return Procedure.getRootProcedureId(procedures, proc);
  }

  // ==========================================================================
  // Executions
  // ==========================================================================
  private void executeProcedure(Procedure proc) {
    if (proc.isFinished()) {
      LOG.debug("{} is already finished, skipping execution", proc);
      return;
    }
    final Long rootProcId = getRootProcedureId(proc);
    if (rootProcId == null) {
      // The 'proc' was ready to run but the root procedure was rolledback
      LOG.warn("Rollback because parent is done/rolledback proc=" + proc);
      executeRollback(proc);
      return;
    }

    RootProcedureState procStack = rollbackStack.get(rootProcId);
    if (procStack == null) {
      LOG.warn("RootProcedureState is null for " + proc.getProcId());
      return;
    }
    do {
      // Try to acquire the execution
      if (!procStack.acquire(proc)) {
        if (procStack.setRollback()) {
          // we have the 'rollback-lock' we can start rollingback
          switch (executeRollback(rootProcId, procStack)) {
            case LOCK_ACQUIRED:
              break;
            case LOCK_YIELD_WAIT:
              procStack.unsetRollback();
              scheduler.yield(proc);
              break;
            case LOCK_EVENT_WAIT:
              LOG.info("LOCK_EVENT_WAIT rollback..." + proc);
              procStack.unsetRollback();
              break;
            default:
              throw new UnsupportedOperationException();
          }
        } else {
          // if we can't rollback means that some child is still running.
          // the rollback will be executed after all the children are done.
          // If the procedure was never executed, remove and mark it as rolledback.
          if (!proc.wasExecuted()) {
            switch (executeRollback(proc)) {
              case LOCK_ACQUIRED:
                break;
              case LOCK_YIELD_WAIT:
                scheduler.yield(proc);
                break;
              case LOCK_EVENT_WAIT:
                LOG.info("LOCK_EVENT_WAIT can't rollback child running?..." + proc);
                break;
              default:
                throw new UnsupportedOperationException();
            }
          }
        }
        break;
      }

      // Execute the procedure
      assert proc.getState() == ProcedureState.RUNNABLE : proc;
      // Note that lock is NOT about concurrency but rather about ensuring
      // ownership of a procedure of an entity such as a region or table
      LockState lockState = acquireLock(proc);
      switch (lockState) {
        case LOCK_ACQUIRED:
          execProcedure(procStack, proc);
          break;
        case LOCK_YIELD_WAIT:
          LOG.info(lockState + " " + proc);
          scheduler.yield(proc);
          break;
        case LOCK_EVENT_WAIT:
          // Someone will wake us up when the lock is available
          LOG.debug(lockState + " " + proc);
          break;
        default:
          throw new UnsupportedOperationException();
      }
      procStack.release(proc);

      if (proc.isSuccess()) {
        // update metrics on finishing the procedure
        proc.updateMetricsOnFinish(getEnvironment(), proc.elapsedTime(), true);
        LOG.info("Finished " + proc + " in " + StringUtils.humanTimeDiff(proc.elapsedTime()));
        // Finalize the procedure state
        if (proc.getProcId() == rootProcId) {
          procedureFinished(proc);
        } else {
          execCompletionCleanup(proc);
        }
        break;
      }
    } while (procStack.isFailed());
  }

  private LockState acquireLock(Procedure proc) {
    TEnvironment env = getEnvironment();
    // if holdLock is true, then maybe we already have the lock, so just return LOCK_ACQUIRED if
    // hasLock is true.
    if (proc.hasLock()) {
      return LockState.LOCK_ACQUIRED;
    }
    return proc.doAcquireLock(env, store);
  }

  private void releaseLock(Procedure proc, boolean force) {
    TEnvironment env = getEnvironment();
    // For how the framework works, we know that we will always have the lock
    // when we call releaseLock(), so we can avoid calling proc.hasLock()
    if (force || !proc.holdLock(env) || proc.isFinished()) {
      proc.doReleaseLock(env, store);
    }
  }

  // Returning null means we have already held the execution lock, so you do not need to get the
  // lock entry for releasing
  private IdLock.Entry getLockEntryForRollback(long procId) {
    // Hold the execution lock if it is not held by us. The IdLock is not reentrant so we need
    // this check, as the worker will hold the lock before executing a procedure. This is the only
    // place where we may hold two procedure execution locks, and there is a fence in the
    // RootProcedureState where we can make sure that only one worker can execute the rollback of
    // a RootProcedureState, so there is no dead lock problem. And the lock here is necessary to
    // prevent race between us and the force update thread.
    if (!procExecutionLock.isHeldByCurrentThread(procId)) {
      try {
        return procExecutionLock.getLockEntry(procId);
      } catch (IOException e) {
        // can only happen if interrupted, so not a big deal to propagate it
        throw new UncheckedIOException(e);
      }
    }
    return null;
  }

  private void executeUnexpectedRollback(Procedure rootProc,
    RootProcedureState procStack) {
    if (procStack.getSubprocs() != null) {
      // comparing proc id in reverse order, so we will delete later procedures first, otherwise we
      // may delete parent procedure first and if we fail in the middle of this operation, when
      // loading we will find some orphan procedures
      PriorityQueue> pq =
        new PriorityQueue<>(procStack.getSubprocs().size(),
          Comparator.> comparingLong(Procedure::getProcId).reversed());
      pq.addAll(procStack.getSubprocs());
      for (;;) {
        Procedure subproc = pq.poll();
        if (subproc == null) {
          break;
        }
        if (!procedures.containsKey(subproc.getProcId())) {
          // this means it has already been rolledback
          continue;
        }
        IdLock.Entry lockEntry = getLockEntryForRollback(subproc.getProcId());
        try {
          cleanupAfterRollbackOneStep(subproc);
          execCompletionCleanup(subproc);
        } finally {
          if (lockEntry != null) {
            procExecutionLock.releaseLockEntry(lockEntry);
          }
        }
      }
    }
    IdLock.Entry lockEntry = getLockEntryForRollback(rootProc.getProcId());
    try {
      cleanupAfterRollbackOneStep(rootProc);
    } finally {
      if (lockEntry != null) {
        procExecutionLock.releaseLockEntry(lockEntry);
      }
    }
  }

  private LockState executeNormalRollback(Procedure rootProc,
    RootProcedureState procStack) {
    List> subprocStack = procStack.getSubproceduresStack();
    assert subprocStack != null : "Called rollback with no steps executed rootProc=" + rootProc;

    int stackTail = subprocStack.size();
    while (stackTail-- > 0) {
      Procedure proc = subprocStack.get(stackTail);
      IdLock.Entry lockEntry = getLockEntryForRollback(proc.getProcId());
      try {
        // For the sub procedures which are successfully finished, we do not rollback them.
        // Typically, if we want to rollback a procedure, we first need to rollback it, and then
        // recursively rollback its ancestors. The state changes which are done by sub procedures
        // should be handled by parent procedures when rolling back. For example, when rolling back
        // a MergeTableProcedure, we will schedule new procedures to bring the offline regions
        // online, instead of rolling back the original procedures which offlined the regions(in
        // fact these procedures can not be rolled back...).
        if (proc.isSuccess()) {
          // Just do the cleanup work, without actually executing the rollback
          subprocStack.remove(stackTail);
          cleanupAfterRollbackOneStep(proc);
          continue;
        }
        LockState lockState = acquireLock(proc);
        if (lockState != LockState.LOCK_ACQUIRED) {
          // can't take a lock on the procedure, add the root-proc back on the
          // queue waiting for the lock availability
          return lockState;
        }

        lockState = executeRollback(proc);
        releaseLock(proc, false);
        boolean abortRollback = lockState != LockState.LOCK_ACQUIRED;
        abortRollback |= !isRunning() || !store.isRunning();

        // allows to kill the executor before something is stored to the wal.
        // useful to test the procedure recovery.
        if (abortRollback) {
          return lockState;
        }

        subprocStack.remove(stackTail);

        // if the procedure is kind enough to pass the slot to someone else, yield
        // if the proc is already finished, do not yield
        if (!proc.isFinished() && proc.isYieldAfterExecutionStep(getEnvironment())) {
          return LockState.LOCK_YIELD_WAIT;
        }

        if (proc != rootProc) {
          execCompletionCleanup(proc);
        }
      } finally {
        if (lockEntry != null) {
          procExecutionLock.releaseLockEntry(lockEntry);
        }
      }
    }
    return LockState.LOCK_ACQUIRED;
  }

  /**
   * Execute the rollback of the full procedure stack. Once the procedure is rolledback, the
   * root-procedure will be visible as finished to user, and the result will be the fatal exception.
   */
  private LockState executeRollback(long rootProcId, RootProcedureState procStack) {
    Procedure rootProc = procedures.get(rootProcId);
    RemoteProcedureException exception = rootProc.getException();
    // TODO: This needs doc. The root proc doesn't have an exception. Maybe we are
    // rolling back because the subprocedure does. Clarify.
    if (exception == null) {
      exception = procStack.getException();
      rootProc.setFailure(exception);
      store.update(rootProc);
    }

    if (procStack.isRollbackSupported()) {
      LockState lockState = executeNormalRollback(rootProc, procStack);
      if (lockState != LockState.LOCK_ACQUIRED) {
        return lockState;
      }
    } else {
      // the procedure does not support rollback, so typically we should not reach here, this
      // usually means there are code bugs, let's just wait all the subprocedures to finish and then
      // mark the root procedure as failure.
      LOG.error(HBaseMarkers.FATAL,
        "Root Procedure {} does not support rollback but the execution failed"
          + " and try to rollback, code bug?",
        rootProc, exception);
      executeUnexpectedRollback(rootProc, procStack);
    }

    IdLock.Entry lockEntry = getLockEntryForRollback(rootProc.getProcId());
    try {
      // Finalize the procedure state
      LOG.info("Rolled back {} exec-time={}", rootProc,
        StringUtils.humanTimeDiff(rootProc.elapsedTime()));
      procedureFinished(rootProc);
    } finally {
      if (lockEntry != null) {
        procExecutionLock.releaseLockEntry(lockEntry);
      }
    }

    return LockState.LOCK_ACQUIRED;
  }

  private void cleanupAfterRollbackOneStep(Procedure proc) {
    if (testing != null && testing.shouldKillBeforeStoreUpdateInRollback()) {
      kill("TESTING: Kill BEFORE store update in rollback: " + proc);
    }
    if (proc.removeStackIndex()) {
      if (!proc.isSuccess()) {
        proc.setState(ProcedureState.ROLLEDBACK);
      }

      // update metrics on finishing the procedure (fail)
      proc.updateMetricsOnFinish(getEnvironment(), proc.elapsedTime(), false);

      if (proc.hasParent()) {
        store.delete(proc.getProcId());
        procedures.remove(proc.getProcId());
      } else {
        final long[] childProcIds = rollbackStack.get(proc.getProcId()).getSubprocedureIds();
        if (childProcIds != null) {
          store.delete(proc, childProcIds);
        } else {
          store.update(proc);
        }
      }
    } else {
      store.update(proc);
    }
  }

  /**
   * Execute the rollback of the procedure step. It updates the store with the new state (stack
   * index) or will remove completly the procedure in case it is a child.
   */
  private LockState executeRollback(Procedure proc) {
    try {
      proc.doRollback(getEnvironment());
    } catch (IOException e) {
      LOG.debug("Roll back attempt failed for {}", proc, e);
      return LockState.LOCK_YIELD_WAIT;
    } catch (InterruptedException e) {
      handleInterruptedException(proc, e);
      return LockState.LOCK_YIELD_WAIT;
    } catch (Throwable e) {
      // Catch NullPointerExceptions or similar errors...
      LOG.error(HBaseMarkers.FATAL, "CODE-BUG: Uncaught runtime exception for " + proc, e);
    }

    cleanupAfterRollbackOneStep(proc);

    return LockState.LOCK_ACQUIRED;
  }

  private void yieldProcedure(Procedure proc) {
    releaseLock(proc, false);
    scheduler.yield(proc);
  }

  /**
   * Executes procedure
   * 

   * Calls the doExecute() of the procedure
   * 
If the procedure execution didn't fail (i.e. valid user input)
   * 
   * ...and returned subprocedures
   * 
   * The subprocedures are initialized.
   * 
The subprocedures are added to the store
   * 
The subprocedures are added to the runnable queue
   * 
The procedure is now in a WAITING state, waiting for the subprocedures to complete
   * 
   * 
   * ...if there are no subprocedure
   * 
   * the procedure completed successfully
   * 
if there is a parent (WAITING)
   * 
the parent state will be set to RUNNABLE
   * 
   * 
   * 
   * 
   * In case of failure
   * 
   * The store is updated with the new state
   * The executor (caller of this method) will start the rollback of the procedure
   * 
   * 
   * 
   */
  private void execProcedure(RootProcedureState procStack,
    Procedure procedure) {
    Preconditions.checkArgument(procedure.getState() == ProcedureState.RUNNABLE,
      "NOT RUNNABLE! " + procedure.toString());

    // Procedures can suspend themselves. They skip out by throwing a ProcedureSuspendedException.
    // The exception is caught below and then we hurry to the exit without disturbing state. The
    // idea is that the processing of this procedure will be unsuspended later by an external event
    // such the report of a region open.
    boolean suspended = false;

    // Whether to 're-' -execute; run through the loop again.
    boolean reExecute = false;

    Procedure[] subprocs = null;
    do {
      reExecute = false;
      procedure.resetPersistence();
      try {
        subprocs = procedure.doExecute(getEnvironment());
        if (subprocs != null && subprocs.length == 0) {
          subprocs = null;
        }
      } catch (ProcedureSuspendedException e) {
        LOG.trace("Suspend {}", procedure);
        suspended = true;
      } catch (ProcedureYieldException e) {
        LOG.trace("Yield {}", procedure, e);
        yieldProcedure(procedure);
        return;
      } catch (InterruptedException e) {
        LOG.trace("Yield interrupt {}", procedure, e);
        handleInterruptedException(procedure, e);
        yieldProcedure(procedure);
        return;
      } catch (Throwable e) {
        // Catch NullPointerExceptions or similar errors...
        String msg = "CODE-BUG: Uncaught runtime exception: " + procedure;
        LOG.error(msg, e);
        procedure.setFailure(new RemoteProcedureException(msg, e));
      }

      if (!procedure.isFailed()) {
        if (subprocs != null) {
          if (subprocs.length == 1 && subprocs[0] == procedure) {
            // Procedure returned itself. Quick-shortcut for a state machine-like procedure;
            // i.e. we go around this loop again rather than go back out on the scheduler queue.
            subprocs = null;
            reExecute = true;
            LOG.trace("Short-circuit to next step on pid={}", procedure.getProcId());
          } else {
            // Yield the current procedure, and make the subprocedure runnable
            // subprocs may come back 'null'.
            subprocs = initializeChildren(procStack, procedure, subprocs);
            LOG.info("Initialized subprocedures=" + (subprocs == null
              ? null
              : Stream.of(subprocs).map(e -> "{" + e.toString() + "}").collect(Collectors.toList())
                .toString()));
          }
        } else if (procedure.getState() == ProcedureState.WAITING_TIMEOUT) {
          LOG.trace("Added to timeoutExecutor {}", procedure);
          timeoutExecutor.add(procedure);
        } else if (!suspended) {
          // No subtask, so we are done
          procedure.setState(ProcedureState.SUCCESS);
        }
      }

      // allows to kill the executor before something is stored to the wal.
      // useful to test the procedure recovery.
      if (
        testing != null && testing.shouldKillBeforeStoreUpdate(suspended, procedure.hasParent())
      ) {
        kill("TESTING: Kill BEFORE store update: " + procedure);
      }

      // TODO: The code here doesn't check if store is running before persisting to the store as
      // it relies on the method call below to throw RuntimeException to wind up the stack and
      // executor thread to stop. The statement following the method call below seems to check if
      // store is not running, to prevent scheduling children procedures, re-execution or yield
      // of this procedure. This may need more scrutiny and subsequent cleanup in future
      //
      // Commit the transaction even if a suspend (state may have changed). Note this append
      // can take a bunch of time to complete.
      if (procedure.needPersistence()) {
        // Add the procedure to the stack
        // See HBASE-28210 on why we need synchronized here
        boolean needUpdateStoreOutsideLock = false;
        synchronized (procStack) {
          if (procStack.addRollbackStep(procedure)) {
            updateStoreOnExec(procStack, procedure, subprocs);
          } else {
            needUpdateStoreOutsideLock = true;
          }
        }
        // this is an optimization if we do not need to maintain rollback step, as all subprocedures
        // of the same root procedure share the same root procedure state, if we can only update
        // store under the above lock, the sub procedures of the same root procedure can only be
        // persistent sequentially, which will have a bad performance. See HBASE-28212 for more
        // details.
        if (needUpdateStoreOutsideLock) {
          updateStoreOnExec(procStack, procedure, subprocs);
        }
      }

      // if the store is not running we are aborting
      if (!store.isRunning()) {
        return;
      }
      // if the procedure is kind enough to pass the slot to someone else, yield
      if (
        procedure.isRunnable() && !suspended
          && procedure.isYieldAfterExecutionStep(getEnvironment())
      ) {
        yieldProcedure(procedure);
        return;
      }

      assert (reExecute && subprocs == null) || !reExecute;
    } while (reExecute);

    // Allows to kill the executor after something is stored to the WAL but before the below
    // state settings are done -- in particular the one on the end where we make parent
    // RUNNABLE again when its children are done; see countDownChildren.
    if (testing != null && testing.shouldKillAfterStoreUpdate(suspended)) {
      kill("TESTING: Kill AFTER store update: " + procedure);
    }

    // Submit the new subprocedures
    if (subprocs != null && !procedure.isFailed()) {
      submitChildrenProcedures(subprocs);
    }

    // we need to log the release lock operation before waking up the parent procedure, as there
    // could be race that the parent procedure may call updateStoreOnExec ahead of us and remove all
    // the sub procedures from store and cause problems...
    releaseLock(procedure, false);

    // if the procedure is complete and has a parent, count down the children latch.
    // If 'suspended', do nothing to change state -- let other threads handle unsuspend event.
    if (!suspended && procedure.isFinished() && procedure.hasParent()) {
      countDownChildren(procStack, procedure);
    }
  }

  private void kill(String msg) {
    LOG.debug(msg);
    stop();
    throw new RuntimeException(msg);
  }

  private Procedure[] initializeChildren(RootProcedureState procStack,
    Procedure procedure, Procedure[] subprocs) {
    assert subprocs != null : "expected subprocedures";
    final long rootProcId = getRootProcedureId(procedure);
    for (int i = 0; i < subprocs.length; ++i) {
      Procedure subproc = subprocs[i];
      if (subproc == null) {
        String msg = "subproc[" + i + "] is null, aborting the procedure";
        procedure
          .setFailure(new RemoteProcedureException(msg, new IllegalArgumentIOException(msg)));
        return null;
      }

      assert subproc.getState() == ProcedureState.INITIALIZING : subproc;
      subproc.setParentProcId(procedure.getProcId());
      subproc.setRootProcId(rootProcId);
      subproc.setProcId(nextProcId());
      procStack.addSubProcedure(subproc);
    }

    if (!procedure.isFailed()) {
      procedure.setChildrenLatch(subprocs.length);
      switch (procedure.getState()) {
        case RUNNABLE:
          procedure.setState(ProcedureState.WAITING);
          break;
        case WAITING_TIMEOUT:
          timeoutExecutor.add(procedure);
          break;
        default:
          break;
      }
    }
    return subprocs;
  }

  private void submitChildrenProcedures(Procedure[] subprocs) {
    for (int i = 0; i < subprocs.length; ++i) {
      Procedure subproc = subprocs[i];
      subproc.updateMetricsOnSubmit(getEnvironment());
      assert !procedures.containsKey(subproc.getProcId());
      procedures.put(subproc.getProcId(), subproc);
      scheduler.addFront(subproc);
    }
  }

  private void countDownChildren(RootProcedureState procStack,
    Procedure procedure) {
    Procedure parent = procedures.get(procedure.getParentProcId());
    if (parent == null) {
      assert procStack.isRollingback();
      return;
    }

    // If this procedure is the last child awake the parent procedure
    if (parent.tryRunnable()) {
      // If we succeeded in making the parent runnable -- i.e. all of its
      // children have completed, move parent to front of the queue.
      store.update(parent);
      scheduler.addFront(parent);
      LOG.info("Finished subprocedure pid={}, resume processing ppid={}", procedure.getProcId(),
        parent.getProcId());
      return;
    }
  }

  private void updateStoreOnExec(RootProcedureState procStack,
    Procedure procedure, Procedure[] subprocs) {
    if (subprocs != null && !procedure.isFailed()) {
      if (LOG.isTraceEnabled()) {
        LOG.trace("Stored " + procedure + ", children " + Arrays.toString(subprocs));
      }
      store.insert(procedure, subprocs);
    } else {
      LOG.trace("Store update {}", procedure);
      if (procedure.isFinished() && !procedure.hasParent()) {
        // remove child procedures
        final long[] childProcIds = procStack.getSubprocedureIds();
        if (childProcIds != null) {
          store.delete(procedure, childProcIds);
          for (int i = 0; i < childProcIds.length; ++i) {
            procedures.remove(childProcIds[i]);
          }
        } else {
          store.update(procedure);
        }
      } else {
        store.update(procedure);
      }
    }
  }

  private void handleInterruptedException(Procedure proc, InterruptedException e) {
    LOG.trace("Interrupt during {}. suspend and retry it later.", proc, e);
    // NOTE: We don't call Thread.currentThread().interrupt()
    // because otherwise all the subsequent calls e.g. Thread.sleep() will throw
    // the InterruptedException. If the master is going down, we will be notified
    // and the executor/store will be stopped.
    // (The interrupted procedure will be retried on the next run)
  }

  private void execCompletionCleanup(Procedure proc) {
    final TEnvironment env = getEnvironment();
    if (proc.hasLock()) {
      LOG.warn("Usually this should not happen, we will release the lock before if the procedure"
        + " is finished, even if the holdLock is true, arrive here means we have some holes where"
        + " we do not release the lock. And the releaseLock below may fail since the procedure may"
        + " have already been deleted from the procedure store.");
      releaseLock(proc, true);
    }
    try {
      proc.completionCleanup(env);
    } catch (Throwable e) {
      // Catch NullPointerExceptions or similar errors...
      LOG.error("CODE-BUG: uncatched runtime exception for procedure: " + proc, e);
    }
  }

  private void procedureFinished(Procedure proc) {
    // call the procedure completion cleanup handler
    execCompletionCleanup(proc);

    CompletedProcedureRetainer retainer = new CompletedProcedureRetainer<>(proc);

    // update the executor internal state maps
    if (!proc.shouldWaitClientAck(getEnvironment())) {
      retainer.setClientAckTime(0);
    }

    completed.put(proc.getProcId(), retainer);
    rollbackStack.remove(proc.getProcId());
    procedures.remove(proc.getProcId());

    // call the runnableSet completion cleanup handler
    try {
      scheduler.completionCleanup(proc);
    } catch (Throwable e) {
      // Catch NullPointerExceptions or similar errors...
      LOG.error("CODE-BUG: uncatched runtime exception for completion cleanup: {}", proc, e);
    }

    // Notify the listeners
    sendProcedureFinishedNotification(proc.getProcId());
  }

  RootProcedureState getProcStack(long rootProcId) {
    return rollbackStack.get(rootProcId);
  }

  ProcedureScheduler getProcedureScheduler() {
    return scheduler;
  }

  int getCompletedSize() {
    return completed.size();
  }

  public IdLock getProcExecutionLock() {
    return procExecutionLock;
  }

  // ==========================================================================
  // Worker Thread
  // ==========================================================================
  private class WorkerThread extends StoppableThread {
    private final AtomicLong executionStartTime = new AtomicLong(Long.MAX_VALUE);
    private volatile Procedure activeProcedure;

    public WorkerThread(ThreadGroup group) {
      this(group, "PEWorker-");
    }

    protected WorkerThread(ThreadGroup group, String prefix) {
      super(group, prefix + workerId.incrementAndGet());
      setDaemon(true);
    }

    @Override
    public void sendStopSignal() {
      scheduler.signalAll();
    }

    /**
     * Encapsulates execution of the current {@link #activeProcedure} for easy tracing.
     */
    private long runProcedure() throws IOException {
      final Procedure proc = this.activeProcedure;
      int activeCount = activeExecutorCount.incrementAndGet();
      int runningCount = store.setRunningProcedureCount(activeCount);
      LOG.trace("Execute pid={} runningCount={}, activeCount={}", proc.getProcId(), runningCount,
        activeCount);
      executionStartTime.set(EnvironmentEdgeManager.currentTime());
      IdLock.Entry lockEntry = procExecutionLock.getLockEntry(proc.getProcId());
      try {
        executeProcedure(proc);
      } catch (AssertionError e) {
        LOG.info("ASSERT pid=" + proc.getProcId(), e);
        throw e;
      } finally {
        procExecutionLock.releaseLockEntry(lockEntry);
        activeCount = activeExecutorCount.decrementAndGet();
        runningCount = store.setRunningProcedureCount(activeCount);
        LOG.trace("Halt pid={} runningCount={}, activeCount={}", proc.getProcId(), runningCount,
          activeCount);
        this.activeProcedure = null;
        executionStartTime.set(Long.MAX_VALUE);
      }
      return EnvironmentEdgeManager.currentTime();
    }

    @Override
    public void run() {
      long lastUpdate = EnvironmentEdgeManager.currentTime();
      try {
        while (isRunning() && keepAlive(lastUpdate)) {
          @SuppressWarnings("unchecked")
          Procedure proc = scheduler.poll(keepAliveTime, TimeUnit.MILLISECONDS);
          if (proc == null) {
            continue;
          }
          this.activeProcedure = proc;
          lastUpdate = TraceUtil.trace(this::runProcedure, new ProcedureSpanBuilder(proc));
        }
      } catch (Throwable t) {
        LOG.warn("Worker terminating UNNATURALLY {}", this.activeProcedure, t);
      } finally {
        LOG.trace("Worker terminated.");
      }
      workerThreads.remove(this);
    }

    @Override
    public String toString() {
      Procedure p = this.activeProcedure;
      return getName() + "(pid=" + (p == null ? Procedure.NO_PROC_ID : p.getProcId() + ")");
    }

    /** Returns the time since the current procedure is running */
    public long getCurrentRunTime() {
      return EnvironmentEdgeManager.currentTime() - executionStartTime.get();
    }

    // core worker never timeout
    protected boolean keepAlive(long lastUpdate) {
      return true;
    }
  }

  // A worker thread which can be added when core workers are stuck. Will timeout after
  // keepAliveTime if there is no procedure to run.
  private final class KeepAliveWorkerThread extends WorkerThread {
    public KeepAliveWorkerThread(ThreadGroup group) {
      super(group, "KeepAlivePEWorker-");
    }

    @Override
    protected boolean keepAlive(long lastUpdate) {
      return EnvironmentEdgeManager.currentTime() - lastUpdate < keepAliveTime;
    }
  }

  // ----------------------------------------------------------------------------
  // TODO-MAYBE: Should we provide a InlineChore to notify the store with the
  // full set of procedures pending and completed to write a compacted
  // version of the log (in case is a log)?
  // In theory no, procedures are have a short life, so at some point the store
  // will have the tracker saying everything is in the last log.
  // ----------------------------------------------------------------------------

  private final class WorkerMonitor extends InlineChore {
    public static final String WORKER_MONITOR_INTERVAL_CONF_KEY =
      "hbase.procedure.worker.monitor.interval.msec";
    private static final int DEFAULT_WORKER_MONITOR_INTERVAL = 5000; // 5sec

    public static final String WORKER_STUCK_THRESHOLD_CONF_KEY =
      "hbase.procedure.worker.stuck.threshold.msec";
    private static final int DEFAULT_WORKER_STUCK_THRESHOLD = 10000; // 10sec

    public static final String WORKER_ADD_STUCK_PERCENTAGE_CONF_KEY =
      "hbase.procedure.worker.add.stuck.percentage";
    private static final float DEFAULT_WORKER_ADD_STUCK_PERCENTAGE = 0.5f; // 50% stuck

    private float addWorkerStuckPercentage = DEFAULT_WORKER_ADD_STUCK_PERCENTAGE;
    private int timeoutInterval = DEFAULT_WORKER_MONITOR_INTERVAL;
    private int stuckThreshold = DEFAULT_WORKER_STUCK_THRESHOLD;

    public WorkerMonitor() {
      refreshConfig();
    }

    @Override
    public void run() {
      final int stuckCount = checkForStuckWorkers();
      checkThreadCount(stuckCount);

      // refresh interval (poor man dynamic conf update)
      refreshConfig();
    }

    private int checkForStuckWorkers() {
      // check if any of the worker is stuck
      int stuckCount = 0;
      for (WorkerThread worker : workerThreads) {
        if (worker.getCurrentRunTime() < stuckThreshold) {
          continue;
        }

        // WARN the worker is stuck
        stuckCount++;
        LOG.warn("Worker stuck {}, run time {}", worker,
          StringUtils.humanTimeDiff(worker.getCurrentRunTime()));
      }
      return stuckCount;
    }

    private void checkThreadCount(final int stuckCount) {
      // nothing to do if there are no runnable tasks
      if (stuckCount < 1 || !scheduler.hasRunnables()) {
        return;
      }

      // add a new thread if the worker stuck percentage exceed the threshold limit
      // and every handler is active.
      final float stuckPerc = ((float) stuckCount) / workerThreads.size();
      // let's add new worker thread more aggressively, as they will timeout finally if there is no
      // work to do.
      if (stuckPerc >= addWorkerStuckPercentage && workerThreads.size() < maxPoolSize) {
        final KeepAliveWorkerThread worker = new KeepAliveWorkerThread(threadGroup);
        workerThreads.add(worker);
        worker.start();
        LOG.debug("Added new worker thread {}", worker);
      }
    }

    private void refreshConfig() {
      addWorkerStuckPercentage =
        conf.getFloat(WORKER_ADD_STUCK_PERCENTAGE_CONF_KEY, DEFAULT_WORKER_ADD_STUCK_PERCENTAGE);
      timeoutInterval =
        conf.getInt(WORKER_MONITOR_INTERVAL_CONF_KEY, DEFAULT_WORKER_MONITOR_INTERVAL);
      stuckThreshold = conf.getInt(WORKER_STUCK_THRESHOLD_CONF_KEY, DEFAULT_WORKER_STUCK_THRESHOLD);
    }

    @Override
    public int getTimeoutInterval() {
      return timeoutInterval;
    }
  }
}