All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.client.RpcRetryingCaller Maven / Gradle / Ivy

The newest version!
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.client;

import java.io.IOException;
import java.io.InterruptedIOException;
import java.lang.reflect.UndeclaredThrowableException;
import java.net.SocketTimeoutException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.CallQueueTooBigException;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.exceptions.PreemptiveFastFailException;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.ExceptionUtil;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.util.StringUtils;

import com.google.protobuf.ServiceException;

/**
 * Runs an rpc'ing {@link RetryingCallable}. Sets into rpc client
 * threadlocal outstanding timeouts as so we don't persist too much.
 * Dynamic rather than static so can set the generic appropriately.
 *
 * This object has a state. It should not be used by in parallel by different threads.
 * Reusing it is possible however, even between multiple threads. However, the user will
 *  have to manage the synchronization on its side: there is no synchronization inside the class.
 */
@InterfaceAudience.Private
public class RpcRetryingCaller {
  public static final Log LOG = LogFactory.getLog(RpcRetryingCaller.class);
  /**
   * When we started making calls.
   */
  private long globalStartTime;
  /**
   * Start and end times for a single call.
   */
  private final static int MIN_RPC_TIMEOUT = 1;
  /** How many retries are allowed before we start to log */
  private final int startLogErrorsCnt;

  private final long pause;
  private final long pauseForCQTBE;
  private final int retries;
  private final int rpcTimeout;// timeout for each rpc request
  private final Object lock = new Object();
  private final AtomicBoolean cancelled = new AtomicBoolean(false);
  private final RetryingCallerInterceptor interceptor;
  private final RetryingCallerInterceptorContext context;

  public RpcRetryingCaller(long pause, long pauseForCQTBE, int retries, int startLogErrorsCnt) {
    this(pause, pauseForCQTBE, retries, RetryingCallerInterceptorFactory.NO_OP_INTERCEPTOR,
        startLogErrorsCnt, 0);
  }

  public RpcRetryingCaller(long pause, long pauseForCQTBE, int retries,
      RetryingCallerInterceptor interceptor, int startLogErrorsCnt, int rpcTimeout) {
    this.pause = pause;
    this.pauseForCQTBE = pauseForCQTBE;
    this.retries = retries;
    this.interceptor = interceptor;
    context = interceptor.createEmptyContext();
    this.startLogErrorsCnt = startLogErrorsCnt;
    this.rpcTimeout = rpcTimeout;
  }

  private int getRemainingTime(int callTimeout) {
    if (callTimeout <= 0) {
      return 0;
    } else {
      if (callTimeout == Integer.MAX_VALUE) return Integer.MAX_VALUE;
      int remainingTime = (int) (callTimeout -
          (EnvironmentEdgeManager.currentTime() - this.globalStartTime));
      if (remainingTime < MIN_RPC_TIMEOUT) {
        // If there is no time left, we're trying anyway. It's too late.
        // 0 means no timeout, and it's not the intent here. So we secure both cases by
        // resetting to the minimum.
        remainingTime = MIN_RPC_TIMEOUT;
      }
      return remainingTime;
    }
  }

  private int getTimeout(int callTimeout){
    int timeout = getRemainingTime(callTimeout);
    if (timeout <= 0 || (rpcTimeout > 0 && rpcTimeout < timeout)){
      timeout = rpcTimeout;
    }
    return timeout;
  }

  public void cancel(){
    synchronized (lock){
      cancelled.set(true);
      lock.notifyAll();
    }
  }

  /**
   * Retries if invocation fails.
   * @param callTimeout Timeout for this call
   * @param callable The {@link RetryingCallable} to run.
   * @return an object of type T
   * @throws IOException if a remote or network exception occurs
   * @throws RuntimeException other unspecified error
   */
  public T callWithRetries(RetryingCallable callable, int callTimeout)
  throws IOException, RuntimeException {
    List exceptions =
      new ArrayList();
    this.globalStartTime = EnvironmentEdgeManager.currentTime();
    context.clear();
    for (int tries = 0;; tries++) {
      long expectedSleep;
      try {
        // bad cache entries are cleared in the call to RetryingCallable#throwable() in catch block
        callable.prepare(tries != 0); // if called with false, check table status on ZK
        interceptor.intercept(context.prepare(callable, tries));
        return callable.call(getTimeout(callTimeout));
      } catch (PreemptiveFastFailException e) {
        throw e;
      } catch (Throwable t) {
        ExceptionUtil.rethrowIfInterrupt(t);

        // translateException throws exception when should not retry: i.e. when request is bad.
        interceptor.handleFailure(context, t);
        t = translateException(t);
        if (tries > startLogErrorsCnt) {
          if (LOG.isInfoEnabled()) {
            StringBuilder builder = new StringBuilder("Call exception, tries=").append(tries)
                .append(", retries=").append(retries).append(", started=")
                .append(EnvironmentEdgeManager.currentTime() - this.globalStartTime)
                .append(" ms ago, ").append("cancelled=").append(cancelled.get())
                .append(", msg=").append(t.getMessage())
                .append(", details=").append(callable.getExceptionMessageAdditionalDetail())
                .append(", see https://s.apache.org/timeout");
            if (LOG.isDebugEnabled()) {
              builder.append(", exception=").append(StringUtils.stringifyException(t));
              LOG.debug(builder.toString());
            } else {
              LOG.info(builder.toString());
            }
          }
        }

        callable.throwable(t, retries != 1);
        RetriesExhaustedException.ThrowableWithExtraContext qt =
            new RetriesExhaustedException.ThrowableWithExtraContext(t,
                EnvironmentEdgeManager.currentTime(), toString());
        exceptions.add(qt);
        if (tries >= retries - 1) {
          throw new RetriesExhaustedException(tries, exceptions);
        }
        // If the server is dead, we need to wait a little before retrying, to give
        // a chance to the regions to be moved
        // get right pause time, start by RETRY_BACKOFF[0] * pauseBase, where pauseBase might be
        // special when encountering CallQueueTooBigException, see #HBASE-17114
        long pauseBase = (t instanceof CallQueueTooBigException) ? pauseForCQTBE : pause;
        expectedSleep = callable.sleep(pauseBase, tries);

        // If, after the planned sleep, there won't be enough time left, we stop now.
        long duration = singleCallDuration(expectedSleep);
        if (duration > callTimeout) {
          String msg = "callTimeout=" + callTimeout + ", callDuration=" + duration +
              ": " + t.getMessage() + " " + callable.getExceptionMessageAdditionalDetail();
          throw (SocketTimeoutException)(new SocketTimeoutException(msg).initCause(t));
        }
      } finally {
        interceptor.updateFailureInfo(context);
      }
      try {
        if (expectedSleep > 0) {
          synchronized (lock) {
            if (cancelled.get()) return null;
            lock.wait(expectedSleep);
          }
        }
        if (cancelled.get()) return null;
      } catch (InterruptedException e) {
        throw new InterruptedIOException("Interrupted after " + tries + " tries  on " + retries);
      }
    }
  }

  /**
   * @return Calculate how long a single call took
   */
  private long singleCallDuration(final long expectedSleep) {
    return (EnvironmentEdgeManager.currentTime() - this.globalStartTime) + expectedSleep;
  }

  /**
   * Call the server once only.
   * {@link RetryingCallable} has a strange shape so we can do retrys.  Use this invocation if you
   * want to do a single call only (A call to {@link RetryingCallable#call(int)} will not likely
   * succeed).
   * @return an object of type T
   * @throws IOException if a remote or network exception occurs
   * @throws RuntimeException other unspecified error
   */
  public T callWithoutRetries(RetryingCallable callable, int callTimeout)
  throws IOException, RuntimeException {
    // The code of this method should be shared with withRetries.
    this.globalStartTime = EnvironmentEdgeManager.currentTime();
    try {
      callable.prepare(false);
      return callable.call(callTimeout);
    } catch (Throwable t) {
      Throwable t2 = translateException(t);
      ExceptionUtil.rethrowIfInterrupt(t2);
      // It would be nice to clear the location cache here.
      if (t2 instanceof IOException) {
        throw (IOException)t2;
      } else {
        throw new RuntimeException(t2);
      }
    }
  }

  /**
   * Get the good or the remote exception if any, throws the DoNotRetryIOException.
   * @param t the throwable to analyze
   * @return the translated exception, if it's not a DoNotRetryIOException
   * @throws DoNotRetryIOException - if we find it, we throw it instead of translating.
   */
  static Throwable translateException(Throwable t) throws DoNotRetryIOException {
    if (t instanceof UndeclaredThrowableException) {
      if (t.getCause() != null) {
        t = t.getCause();
      }
    }
    if (t instanceof RemoteException) {
      t = ((RemoteException)t).unwrapRemoteException();
    }
    if (t instanceof LinkageError) {
      throw new DoNotRetryIOException(t);
    }
    if (t instanceof ServiceException) {
      ServiceException se = (ServiceException)t;
      Throwable cause = se.getCause();
      if (cause != null) {
        if (cause instanceof DoNotRetryIOException) {
          throw (DoNotRetryIOException)cause;
        } else if (cause instanceof NeedUnmanagedConnectionException) {
          throw new DoNotRetryIOException(cause);
        }
      }
      // Don't let ServiceException out; its rpc specific.
      t = cause;
      // t could be a RemoteException so go aaround again.
      translateException(t);
    } else if (t instanceof DoNotRetryIOException) {
      throw (DoNotRetryIOException)t;
    } else if (t instanceof NeedUnmanagedConnectionException) {
      throw new DoNotRetryIOException(t);
    }
    return t;
  }

  @Override
  public String toString() {
    return "RpcRetryingCaller{" + "globalStartTime=" + globalStartTime +
        ", pause=" + pause + ", retries=" + retries + '}';
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy