org.apache.hadoop.hbase.client.AsyncProcess Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hbase-client Show documentation
Client of HBase
There is a newer version: 3.0.0-beta-1
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.client;

import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Consumer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.RegionLocations;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.AsyncProcessTask.SubmittedRows;
import org.apache.hadoop.hbase.client.RequestController.ReturnCode;
import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.yetus.audience.InterfaceStability;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * This class allows a continuous flow of requests. It's written to be compatible with a synchronous
 * caller such as HTable.
 * 
 * The caller sends a buffer of operation, by calling submit. This class extract from this list the
 * operations it can send, i.e. the operations that are on region that are not considered as busy.
 * The process is asynchronous, i.e. it returns immediately when if has finished to iterate on the
 * list. If, and only if, the maximum number of current task is reached, the call to submit will
 * block. Alternatively, the caller can call submitAll, in which case all the operations will be
 * sent. Each call to submit returns a future-like object that can be used to track operation
 * progress.
 * 
 * 
 * The class manages internally the retries.
 * 
 * 
 * The errors are tracked inside the Future object that is returned. The results are always tracked
 * inside the Future object and can be retrieved when the call has finished. Partial results can
 * also be retrieved if some part of multi-request failed.
 * 
 * 
 * This class is thread safe. Internally, the class is thread safe enough to manage simultaneously
 * new submission and results arising from older operations.
 * 
 * 
 * Internally, this class works with {@link Row}, this mean it could be theoretically used for gets
 * as well.
 * 
 */
@InterfaceAudience.Private
@InterfaceStability.Evolving
class AsyncProcess {
  private static final Logger LOG = LoggerFactory.getLogger(AsyncProcess.class);
  private static final AtomicLong COUNTER = new AtomicLong();

  public static final String PRIMARY_CALL_TIMEOUT_KEY = "hbase.client.primaryCallTimeout.multiget";

  /**
   * Configure the number of failures after which the client will start logging. A few failures is
   * fine: region moved, then is not opened, then is overloaded. We try to have an acceptable
   * heuristic for the number of errors we don't log. 5 was chosen because we wait for 1s at this
   * stage.
   */
  public static final String START_LOG_ERRORS_AFTER_COUNT_KEY =
    "hbase.client.start.log.errors.counter";
  public static final int DEFAULT_START_LOG_ERRORS_AFTER_COUNT = 5;

  /**
   * Configuration to decide whether to log details for batch error
   */
  public static final String LOG_DETAILS_FOR_BATCH_ERROR = "hbase.client.log.batcherrors.details";

  /**
   * Return value from a submit that didn't contain any requests.
   */
  private static final AsyncRequestFuture NO_REQS_RESULT = new AsyncRequestFuture() {
    final Object[] result = new Object[0];

    @Override
    public boolean hasError() {
      return false;
    }

    @Override
    public RetriesExhaustedWithDetailsException getErrors() {
      return null;
    }

    @Override
    public List getFailedOperations() {
      return null;
    }

    @Override
    public Object[] getResults() {
      return result;
    }

    @Override
    public void waitUntilDone() throws InterruptedIOException {
    }
  };

  // TODO: many of the fields should be made private
  final long id;

  final ClusterConnection connection;
  final ConnectionConfiguration connectionConfiguration;
  private final RpcRetryingCallerFactory rpcCallerFactory;
  final RpcControllerFactory rpcFactory;

  // Start configuration settings.
  final int startLogErrorsCnt;

  final int numTries;
  long serverTrackerTimeout;
  final long primaryCallTimeoutMicroseconds;
  /** Whether to log details for batch errors */
  final boolean logBatchErrorDetails;
  // End configuration settings.

  /**
   * The traffic control for requests.
   */
  final RequestController requestController;
  public static final String LOG_DETAILS_PERIOD = "hbase.client.log.detail.period.ms";
  private static final int DEFAULT_LOG_DETAILS_PERIOD = 10000;
  private final int periodToLog;

  AsyncProcess(ClusterConnection hc, Configuration conf, RpcRetryingCallerFactory rpcCaller,
    RpcControllerFactory rpcFactory) {
    this(hc, conf, rpcCaller, rpcFactory, hc.getConnectionConfiguration().getRetriesNumber());
  }

  AsyncProcess(ClusterConnection hc, Configuration conf, RpcRetryingCallerFactory rpcCaller,
    RpcControllerFactory rpcFactory, int retriesNumber) {
    if (hc == null) {
      throw new IllegalArgumentException("ClusterConnection cannot be null.");
    }

    this.connection = hc;
    this.connectionConfiguration = connection.getConnectionConfiguration();

    this.id = COUNTER.incrementAndGet();

    // how many times we could try in total, one more than retry number
    this.numTries = retriesNumber + 1;
    this.primaryCallTimeoutMicroseconds = conf.getInt(PRIMARY_CALL_TIMEOUT_KEY, 10000);
    this.startLogErrorsCnt =
      conf.getInt(START_LOG_ERRORS_AFTER_COUNT_KEY, DEFAULT_START_LOG_ERRORS_AFTER_COUNT);
    this.periodToLog = conf.getInt(LOG_DETAILS_PERIOD, DEFAULT_LOG_DETAILS_PERIOD);
    // Server tracker allows us to do faster, and yet useful (hopefully), retries.
    // However, if we are too useful, we might fail very quickly due to retry count limit.
    // To avoid this, we are going to cheat for now (see HBASE-7659), and calculate maximum
    // retry time if normal retries were used. Then we will retry until this time runs out.
    // If we keep hitting one server, the net effect will be the incremental backoff, and
    // essentially the same number of retries as planned. If we have to do faster retries,
    // we will do more retries in aggregate, but the user will be none the wiser.
    this.serverTrackerTimeout = 0L;
    for (int i = 0; i < this.numTries; ++i) {
      serverTrackerTimeout = serverTrackerTimeout
        + ConnectionUtils.getPauseTime(connectionConfiguration.getPauseMillis(), i);
    }

    this.rpcCallerFactory = rpcCaller;
    this.rpcFactory = rpcFactory;
    this.logBatchErrorDetails = conf.getBoolean(LOG_DETAILS_FOR_BATCH_ERROR, false);

    this.requestController = RequestControllerFactory.create(conf);
  }

  /**
   * The submitted task may be not accomplished at all if there are too many running tasks or other
   * limits.
   * @param  The class to cast the result
   * @param task      The setting and data
   */
  public  AsyncRequestFuture submit(AsyncProcessTask task)
    throws InterruptedIOException {
    AsyncRequestFuture reqFuture = checkTask(task);
    if (reqFuture != null) {
      return reqFuture;
    }
    SubmittedRows submittedRows =
      task.getSubmittedRows() == null ? SubmittedRows.ALL : task.getSubmittedRows();
    switch (submittedRows) {
      case ALL:
        return submitAll(task);
      case AT_LEAST_ONE:
        return submit(task, true);
      default:
        return submit(task, false);
    }
  }

  /**
   * Extract from the rows list what we can submit. The rows we can not submit are kept in the list.
   * Does not send requests to replicas (not currently used for anything other than streaming puts
   * anyway).
   * @param task       The setting and data
   * @param atLeastOne true if we should submit at least a subset.
   */
  private  AsyncRequestFuture submit(AsyncProcessTask task, boolean atLeastOne)
    throws InterruptedIOException {
    TableName tableName = task.getTableName();
    RowAccess rows = task.getRowAccess();
    Map actionsByServer = new HashMap<>();
    List retainedActions = new ArrayList<>(rows.size());

    NonceGenerator ng = this.connection.getNonceGenerator();
    long nonceGroup = ng.getNonceGroup(); // Currently, nonce group is per entire client.

    // Location errors that happen before we decide what requests to take.
    List locationErrors = null;
    List locationErrorRows = null;
    RequestController.Checker checker = requestController.newChecker();
    boolean firstIter = true;
    do {
      // Wait until there is at least one slot for a new task.
      requestController.waitForFreeSlot(id, periodToLog, getLogger(tableName, -1));
      int posInList = -1;
      if (!firstIter) {
        checker.reset();
      }
      Iterator it = rows.iterator();
      while (it.hasNext()) {
        Row r = it.next();
        HRegionLocation loc;
        try {
          if (r == null) {
            throw new IllegalArgumentException("#" + id + ", row cannot be null");
          }
          // Make sure we get 0-s replica.
          RegionLocations locs = connection.locateRegion(tableName, r.getRow(), true, true,
            RegionReplicaUtil.DEFAULT_REPLICA_ID);
          if (locs == null || locs.isEmpty() || locs.getDefaultRegionLocation() == null) {
            throw new IOException("#" + id + ", no location found, aborting submit for"
              + " tableName=" + tableName + " rowkey=" + Bytes.toStringBinary(r.getRow()));
          }
          loc = locs.getDefaultRegionLocation();
        } catch (IOException ex) {
          locationErrors = new ArrayList<>(1);
          locationErrorRows = new ArrayList<>(1);
          LOG.error("Failed to get region location ", ex);
          // This action failed before creating ars. Retain it, but do not add to submit list.
          // We will then add it to ars in an already-failed state.

          int priority = HConstants.NORMAL_QOS;
          if (r instanceof Mutation) {
            priority = ((Mutation) r).getPriority();
          }
          retainedActions.add(new Action(r, ++posInList, priority));
          locationErrors.add(ex);
          locationErrorRows.add(posInList);
          it.remove();
          break; // Backward compat: we stop considering actions on location error.
        }
        ReturnCode code = checker.canTakeRow(loc, r);
        if (code == ReturnCode.END) {
          break;
        }
        if (code == ReturnCode.INCLUDE) {
          int priority = HConstants.NORMAL_QOS;
          if (r instanceof Mutation) {
            priority = ((Mutation) r).getPriority();
          }
          Action action = new Action(r, ++posInList, priority);
          setNonce(ng, r, action);
          retainedActions.add(action);
          // TODO: replica-get is not supported on this path
          byte[] regionName = loc.getRegionInfo().getRegionName();
          addAction(loc.getServerName(), regionName, action, actionsByServer, nonceGroup);
          it.remove();
        }
      }
      firstIter = false;
    } while (retainedActions.isEmpty() && atLeastOne && (locationErrors == null));

    if (retainedActions.isEmpty()) return NO_REQS_RESULT;

    return submitMultiActions(task, retainedActions, nonceGroup, locationErrors, locationErrorRows,
      actionsByServer);
  }

   AsyncRequestFuture submitMultiActions(AsyncProcessTask task,
    List retainedActions, long nonceGroup, List locationErrors,
    List locationErrorRows, Map actionsByServer) {
    AsyncRequestFutureImpl ars =
      createAsyncRequestFuture(task, retainedActions, nonceGroup);
    // Add location errors if any
    if (locationErrors != null) {
      for (int i = 0; i < locationErrors.size(); ++i) {
        int originalIndex = locationErrorRows.get(i);
        Row row = retainedActions.get(originalIndex).getAction();
        ars.manageError(originalIndex, row, AsyncRequestFutureImpl.Retry.NO_LOCATION_PROBLEM,
          locationErrors.get(i), null);
      }
    }
    ars.sendMultiAction(actionsByServer, 1, null, false);
    return ars;
  }

  /**
   * Helper that is used when grouping the actions per region server.
   * @param server          - server
   * @param regionName      - regionName
   * @param action          - the action to add to the multiaction
   * @param actionsByServer the multiaction per server
   * @param nonceGroup      Nonce group.
   */
  static void addAction(ServerName server, byte[] regionName, Action action,
    Map actionsByServer, long nonceGroup) {
    MultiAction multiAction = actionsByServer.get(server);
    if (multiAction == null) {
      multiAction = new MultiAction();
      actionsByServer.put(server, multiAction);
    }
    if (action.hasNonce() && !multiAction.hasNonceGroup()) {
      multiAction.setNonceGroup(nonceGroup);
    }

    multiAction.add(regionName, action);
  }

  /**
   * Submit immediately the list of rows, whatever the server status. Kept for backward
   * compatibility: it allows to be used with the batch interface that return an array of objects.
   * @param task The setting and data
   */
  private  AsyncRequestFuture submitAll(AsyncProcessTask task) {
    RowAccess rows = task.getRowAccess();
    List actions = new ArrayList<>(rows.size());

    // The position will be used by the processBatch to match the object array returned.
    int posInList = -1;
    NonceGenerator ng = this.connection.getNonceGenerator();
    int highestPriority = HConstants.PRIORITY_UNSET;
    for (Row r : rows) {
      posInList++;
      if (r instanceof Put) {
        Put put = (Put) r;
        if (put.isEmpty()) {
          throw new IllegalArgumentException(
            "No columns to insert for #" + (posInList + 1) + " item");
        }
        highestPriority = Math.max(put.getPriority(), highestPriority);
      }
      Action action = new Action(r, posInList, highestPriority);
      setNonce(ng, r, action);
      actions.add(action);
    }
    AsyncRequestFutureImpl ars =
      createAsyncRequestFuture(task, actions, ng.getNonceGroup());
    ars.groupAndSendMultiAction(actions, 1);
    return ars;
  }

  private  AsyncRequestFuture checkTask(AsyncProcessTask task) {
    if (task.getRowAccess() == null || task.getRowAccess().isEmpty()) {
      return NO_REQS_RESULT;
    }
    Objects.requireNonNull(task.getPool(), "The pool can't be NULL");
    checkOperationTimeout(task.getOperationTimeout());
    checkRpcTimeout(task.getRpcTimeout());
    return null;
  }

  private void setNonce(NonceGenerator ng, Row r, Action action) {
    if (hasIncrementOrAppend(r)) {
      action.setNonce(ng.newNonce()); // Action handles NO_NONCE, so it's ok if ng is disabled.
    }
  }

  private static boolean hasIncrementOrAppend(Row action) {
    if (action instanceof Append || action instanceof Increment) {
      return true;
    } else if (action instanceof RowMutations) {
      return hasIncrementOrAppend((RowMutations) action);
    } else if (action instanceof CheckAndMutate) {
      return hasIncrementOrAppend(((CheckAndMutate) action).getAction());
    }
    return false;
  }

  private static boolean hasIncrementOrAppend(RowMutations mutations) {
    for (Mutation mutation : mutations.getMutations()) {
      if (mutation instanceof Append || mutation instanceof Increment) {
        return true;
      }
    }
    return false;
  }

  private int checkTimeout(String name, int timeout) {
    if (timeout < 0) {
      throw new RuntimeException(
        "The " + name + " must be bigger than zero," + "current value is" + timeout);
    }
    return timeout;
  }

  private int checkOperationTimeout(int operationTimeout) {
    return checkTimeout("operation timeout", operationTimeout);
  }

  private int checkRpcTimeout(int rpcTimeout) {
    return checkTimeout("rpc timeout", rpcTimeout);
  }

   AsyncRequestFutureImpl createAsyncRequestFuture(AsyncProcessTask task,
    List actions, long nonceGroup) {
    return new AsyncRequestFutureImpl<>(task, actions, nonceGroup, this);
  }

  /** Wait until the async does not have more than max tasks in progress. */
  protected void waitForMaximumCurrentTasks(int max, TableName tableName)
    throws InterruptedIOException {
    requestController.waitForMaximumCurrentTasks(max, id, periodToLog, getLogger(tableName, max));
  }

  private Consumer getLogger(TableName tableName, long max) {
    return (currentInProgress) -> {
      LOG.info("#" + id
        + (max < 0
          ? ", waiting for any free slot"
          : ", waiting for some tasks to finish. Expected max=" + max)
        + ", tasksInProgress=" + currentInProgress
        + (tableName == null ? "" : ", tableName=" + tableName));
    };
  }

  void incTaskCounters(Collection regions, ServerName sn) {
    requestController.incTaskCounters(regions, sn);
  }

  void decTaskCounters(Collection regions, ServerName sn) {
    requestController.decTaskCounters(regions, sn);
  }

  /**
   * Create a caller. Isolated to be easily overridden in the tests.
   */
  protected RpcRetryingCaller
    createCaller(CancellableRegionServerCallable callable, int rpcTimeout) {
    return rpcCallerFactory. newCaller(checkRpcTimeout(rpcTimeout));
  }

  /**
   * Creates the server error tracker to use inside process. Currently, to preserve the main
   * assumption about current retries, and to work well with the retry-limit-based calculation, the
   * calculation is local per Process object. We may benefit from connection-wide tracking of server
   * errors.
   * @return ServerErrorTracker to use, null if there is no ServerErrorTracker on this connection
   */
  ConnectionImplementation.ServerErrorTracker createServerErrorTracker() {
    return new ConnectionImplementation.ServerErrorTracker(this.serverTrackerTimeout,
      this.numTries);
  }

  static boolean isReplicaGet(Row row) {
    return (row instanceof Get) && (((Get) row).getConsistency() == Consistency.TIMELINE);
  }

}