All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.procedure.Procedure Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.procedure;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.hbase.errorhandling.ForeignException;
import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
import org.apache.hadoop.hbase.errorhandling.ForeignExceptionListener;
import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
import org.apache.hadoop.hbase.errorhandling.TimeoutExceptionInjector;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.collect.Lists;

/**
 * A globally-barriered distributed procedure. This class encapsulates state and methods for
 * tracking and managing a distributed procedure, as well as aborting if any member encounters a
 * problem or if a cancellation is requested.
 * 

* All procedures first attempt to reach a barrier point with the {@link #sendGlobalBarrierStart()} * method. The procedure contacts all members and waits for all subprocedures to execute * {@link Subprocedure#acquireBarrier} to acquire its local piece of the global barrier and then * send acquisition info back to the coordinator. If all acquisitions at subprocedures succeed, the * coordinator then will call {@link #sendGlobalBarrierReached()}. This notifies members to execute * the {@link Subprocedure#insideBarrier()} method. The procedure is blocked until all * {@link Subprocedure#insideBarrier} executions complete at the members. When * {@link Subprocedure#insideBarrier} completes at each member, the member sends notification to the * coordinator. Once all members complete, the coordinator calls * {@link #sendGlobalBarrierComplete()}. *

* If errors are encountered remotely, they are forwarded to the coordinator, and * {@link Subprocedure#cleanup(Exception)} is called. *

* Each Procedure and each Subprocedure enforces a time limit on the execution time. If the time * limit expires before the procedure completes the {@link TimeoutExceptionInjector} will trigger an * {@link ForeignException} to abort the procedure. This is particularly useful for situations when * running a distributed {@link Subprocedure} so participants can avoid blocking for extreme amounts * of time if one of the participants fails or takes a really long time (e.g. GC pause). *

* Users should generally not directly create or subclass instances of this. They are created for * them implicitly via * {@link ProcedureCoordinator#startProcedure(ForeignExceptionDispatcher, String, byte[], List)}} */ @InterfaceAudience.Private public class Procedure implements Callable, ForeignExceptionListener { private static final Logger LOG = LoggerFactory.getLogger(Procedure.class); // // Arguments and naming // // Name of the procedure final private String procName; // Arguments for this procedure execution final private byte[] args; // // Execution State // /** latch for waiting until all members have acquire in barrier state */ final CountDownLatch acquiredBarrierLatch; /** latch for waiting until all members have executed and released their in barrier state */ final CountDownLatch releasedBarrierLatch; /** latch for waiting until a procedure has completed */ final CountDownLatch completedLatch; /** monitor to check for errors */ private final ForeignExceptionDispatcher monitor; // // Execution Timeout Handling. // /** frequency to check for errors (ms) */ protected final long wakeFrequency; protected final TimeoutExceptionInjector timeoutInjector; // // Members' and Coordinator's state // /** lock to prevent nodes from acquiring and then releasing before we can track them */ private final Object joinBarrierLock = new Object(); private final List acquiringMembers; private final List inBarrierMembers; private final HashMap dataFromFinishedMembers; private ProcedureCoordinator coord; /** * Creates a procedure. (FOR TESTING) {@link Procedure} state to be run by a * {@link ProcedureCoordinator}. * @param coord coordinator to call back to for general errors (e.g. * {@link ProcedureCoordinator#rpcConnectionFailure(String, IOException)}). * @param monitor error monitor to check for external errors * @param wakeFreq frequency to check for errors while waiting * @param timeout amount of time to allow the procedure to run before cancelling * @param procName name of the procedure instance * @param args argument data associated with the procedure instance * @param expectedMembers names of the expected members */ public Procedure(ProcedureCoordinator coord, ForeignExceptionDispatcher monitor, long wakeFreq, long timeout, String procName, byte[] args, List expectedMembers) { this.coord = coord; this.acquiringMembers = new ArrayList<>(expectedMembers); this.inBarrierMembers = new ArrayList<>(acquiringMembers.size()); this.dataFromFinishedMembers = new HashMap<>(); this.procName = procName; this.args = args; this.monitor = monitor; this.wakeFrequency = wakeFreq; int count = expectedMembers.size(); this.acquiredBarrierLatch = new CountDownLatch(count); this.releasedBarrierLatch = new CountDownLatch(count); this.completedLatch = new CountDownLatch(1); this.timeoutInjector = new TimeoutExceptionInjector(monitor, timeout); } /** * Create a procedure. Users should generally not directly create instances of this. They are * created them implicitly via * {@link ProcedureCoordinator#createProcedure(ForeignExceptionDispatcher, String, byte[], List)}} * @param coord coordinator to call back to for general errors (e.g. * {@link ProcedureCoordinator#rpcConnectionFailure(String, IOException)}). * @param wakeFreq frequency to check for errors while waiting * @param timeout amount of time to allow the procedure to run before cancelling * @param procName name of the procedure instance * @param args argument data associated with the procedure instance * @param expectedMembers names of the expected members */ public Procedure(ProcedureCoordinator coord, long wakeFreq, long timeout, String procName, byte[] args, List expectedMembers) { this(coord, new ForeignExceptionDispatcher(), wakeFreq, timeout, procName, args, expectedMembers); } public String getName() { return procName; } /** * @return String of the procedure members both trying to enter the barrier and already in barrier */ public String getStatus() { String waiting, done; synchronized (joinBarrierLock) { waiting = acquiringMembers.toString(); done = inBarrierMembers.toString(); } return "Procedure " + procName + " { waiting=" + waiting + " done=" + done + " }"; } /** * Get the ForeignExceptionDispatcher * @return the Procedure's monitor. */ public ForeignExceptionDispatcher getErrorMonitor() { return monitor; } /** * This call is the main execution thread of the barriered procedure. It sends messages and * essentially blocks until all procedure members acquire or later complete but periodically * checks for foreign exceptions. */ @Override @SuppressWarnings("finally") final public Void call() { LOG.info("Starting procedure '" + procName + "'"); // start the timer timeoutInjector.start(); // run the procedure try { // start by checking for error first monitor.rethrowException(); LOG.debug("Procedure '" + procName + "' starting 'acquire'"); sendGlobalBarrierStart(); // wait for all the members to report acquisition LOG.debug("Waiting for all members to 'acquire'"); waitForLatch(acquiredBarrierLatch, monitor, wakeFrequency, "acquired"); monitor.rethrowException(); LOG.debug("Procedure '" + procName + "' starting 'in-barrier' execution."); sendGlobalBarrierReached(); // wait for all members to report barrier release LOG.debug("Waiting for all members to 'release'"); waitForLatch(releasedBarrierLatch, monitor, wakeFrequency, "released"); // make sure we didn't get an error during in barrier execution and release monitor.rethrowException(); LOG.info("Procedure '" + procName + "' execution completed"); } catch (Exception e) { if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } String msg = "Procedure '" + procName + "' execution failed!"; LOG.error(msg, e); receive(new ForeignException(getName(), e)); } finally { LOG.debug("Running finish phase."); sendGlobalBarrierComplete(); completedLatch.countDown(); // tell the timer we are done, if we get here successfully timeoutInjector.complete(); return null; } } /** * Sends a message to Members to create a new {@link Subprocedure} for this Procedure and execute * the {@link Subprocedure#acquireBarrier} step. n */ public void sendGlobalBarrierStart() throws ForeignException { // start the procedure LOG.debug("Starting procedure '" + procName + "', kicking off acquire phase on members."); try { // send procedure barrier start to specified list of members. cloning the list to avoid // concurrent modification from the controller setting the prepared nodes coord.getRpcs().sendGlobalBarrierAcquire(this, args, Lists.newArrayList(this.acquiringMembers)); } catch (IOException e) { coord.rpcConnectionFailure("Can't reach controller.", e); } catch (IllegalArgumentException e) { throw new ForeignException(getName(), e); } } /** * Sends a message to all members that the global barrier condition has been satisfied. This * should only be executed after all members have completed its * {@link Subprocedure#acquireBarrier()} call successfully. This triggers the member * {@link Subprocedure#insideBarrier} method. n */ public void sendGlobalBarrierReached() throws ForeignException { try { // trigger to have member run {@link Subprocedure#insideBarrier} coord.getRpcs().sendGlobalBarrierReached(this, Lists.newArrayList(inBarrierMembers)); } catch (IOException e) { coord.rpcConnectionFailure("Can't reach controller.", e); } } /** * Sends a message to members that all {@link Subprocedure#insideBarrier} calls have completed. * After this executes, the coordinator can assume that any state resources about this barrier * procedure state has been released. */ public void sendGlobalBarrierComplete() { LOG.debug("Finished coordinator procedure - removing self from list of running procedures"); try { coord.getRpcs().resetMembers(this); } catch (IOException e) { coord.rpcConnectionFailure("Failed to reset procedure:" + procName, e); } } // // Call backs from other external processes. // /** * Call back triggered by an individual member upon successful local barrier acquisition n */ public void barrierAcquiredByMember(String member) { LOG.debug("member: '" + member + "' joining acquired barrier for procedure '" + procName + "' on coordinator"); if (this.acquiringMembers.contains(member)) { synchronized (joinBarrierLock) { if (this.acquiringMembers.remove(member)) { this.inBarrierMembers.add(member); acquiredBarrierLatch.countDown(); } } LOG.debug( "Waiting on: " + acquiredBarrierLatch + " remaining members to acquire global barrier"); } else { LOG.warn("Member " + member + " joined barrier, but we weren't waiting on it to join." + " Continuing on."); } } /** * Call back triggered by a individual member upon successful local in-barrier execution and * release nn */ public void barrierReleasedByMember(String member, byte[] dataFromMember) { boolean removed = false; synchronized (joinBarrierLock) { removed = this.inBarrierMembers.remove(member); if (removed) { releasedBarrierLatch.countDown(); } } if (removed) { LOG.debug("Member: '" + member + "' released barrier for procedure'" + procName + "', counting down latch. Waiting for " + releasedBarrierLatch.getCount() + " more"); } else { LOG.warn("Member: '" + member + "' released barrier for procedure'" + procName + "', but we weren't waiting on it to release!"); } dataFromFinishedMembers.put(member, dataFromMember); } /** * Waits until the entire procedure has globally completed, or has been aborted. If an exception * is thrown the procedure may or not have run cleanup to trigger the completion latch yet. nn */ public void waitForCompleted() throws ForeignException, InterruptedException { waitForLatch(completedLatch, monitor, wakeFrequency, procName + " completed"); } /** * Waits until the entire procedure has globally completed, or has been aborted. If an exception * is thrown the procedure may or not have run cleanup to trigger the completion latch yet. * @return data returned from procedure members upon successfully completing subprocedure. nn */ public HashMap waitForCompletedWithRet() throws ForeignException, InterruptedException { waitForCompleted(); return dataFromFinishedMembers; } /** * Check if the entire procedure has globally completed, or has been aborted. n */ public boolean isCompleted() throws ForeignException { // Rethrow exception if any monitor.rethrowException(); return (completedLatch.getCount() == 0); } /** * A callback that handles incoming ForeignExceptions. */ @Override public void receive(ForeignException e) { monitor.receive(e); } /** * Wait for latch to count to zero, ignoring any spurious wake-ups, but waking periodically to * check for errors * @param latch latch to wait on * @param monitor monitor to check for errors while waiting * @param wakeFrequency frequency to wake up and check for errors (in * {@link TimeUnit#MILLISECONDS}) * @param latchDescription description of the latch, for logging * @throws ForeignException type of error the monitor can throw, if the task fails * @throws InterruptedException if we are interrupted while waiting on latch */ public static void waitForLatch(CountDownLatch latch, ForeignExceptionSnare monitor, long wakeFrequency, String latchDescription) throws ForeignException, InterruptedException { boolean released = false; while (!released) { if (monitor != null) { monitor.rethrowException(); } /* * ForeignExceptionDispatcher.LOG.debug("Waiting for '" + latchDescription + * "' latch. (sleep:" + wakeFrequency + " ms)"); */ released = latch.await(wakeFrequency, TimeUnit.MILLISECONDS); } // check error again in case an error raised during last wait if (monitor != null) { monitor.rethrowException(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy