
org.apache.hadoop.hbase.client.AsyncProcess Maven / Gradle / Ivy
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.client;
import com.google.common.annotations.VisibleForTesting;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.AsyncProcess.RowChecker.ReturnCode;
import org.apache.hadoop.hbase.CallQueueTooBigException;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.RegionLocations;
import org.apache.hadoop.hbase.RetryImmediatelyException;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.backoff.ServerStatistics;
import org.apache.hadoop.hbase.client.coprocessor.Batch;
import org.apache.hadoop.hbase.exceptions.ClientExceptionsUtil;
import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdge;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.htrace.Trace;
/**
* This class allows a continuous flow of requests. It's written to be compatible with a
* synchronous caller such as HTable.
*
* The caller sends a buffer of operation, by calling submit. This class extract from this list
* the operations it can send, i.e. the operations that are on region that are not considered
* as busy. The process is asynchronous, i.e. it returns immediately when if has finished to
* iterate on the list. If, and only if, the maximum number of current task is reached, the call
* to submit will block. Alternatively, the caller can call submitAll, in which case all the
* operations will be sent. Each call to submit returns a future-like object that can be used
* to track operation progress.
*
*
* The class manages internally the retries.
*
*
* The class can be constructed in regular mode, or "global error" mode. In global error mode,
* AP tracks errors across all calls (each "future" also has global view of all errors). That
* mode is necessary for backward compat with HTable behavior, where multiple submissions are
* made and the errors can propagate using any put/flush call, from previous calls.
* In "regular" mode, the errors are tracked inside the Future object that is returned.
* The results are always tracked inside the Future object and can be retrieved when the call
* has finished. Partial results can also be retrieved if some part of multi-request failed.
*
*
* This class is thread safe in regular mode; in global error code, submitting operations and
* retrieving errors from different threads may be not thread safe.
* Internally, the class is thread safe enough to manage simultaneously new submission and results
* arising from older operations.
*
*
* Internally, this class works with {@link Row}, this mean it could be theoretically used for
* gets as well.
*
*/
@InterfaceAudience.Private
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="JLM_JSR166_UTILCONCURRENT_MONITORENTER",
justification="Synchronization on tasks in progress counter is intended")
class AsyncProcess {
private static final Log LOG = LogFactory.getLog(AsyncProcess.class);
protected static final AtomicLong COUNTER = new AtomicLong();
public static final String PRIMARY_CALL_TIMEOUT_KEY = "hbase.client.primaryCallTimeout.multiget";
/**
* Configure the number of failures after which the client will start logging. A few failures
* is fine: region moved, then is not opened, then is overloaded. We try to have an acceptable
* heuristic for the number of errors we don't log. 9 was chosen because we wait for 1s at
* this stage.
*/
public static final String START_LOG_ERRORS_AFTER_COUNT_KEY =
"hbase.client.start.log.errors.counter";
public static final int DEFAULT_START_LOG_ERRORS_AFTER_COUNT = 9;
/**
* Configuration to decide whether to log details for batch error
*/
public static final String LOG_DETAILS_FOR_BATCH_ERROR = "hbase.client.log.batcherrors.details";
private final int thresholdToLogUndoneTaskDetails;
private static final String THRESHOLD_TO_LOG_UNDONE_TASK_DETAILS =
"hbase.client.threshold.log.details";
private static final int DEFAULT_THRESHOLD_TO_LOG_UNDONE_TASK_DETAILS = 10;
private final int THRESHOLD_TO_LOG_REGION_DETAILS = 2;
/**
* The maximum size of single RegionServer.
*/
public static final String HBASE_CLIENT_MAX_PERREQUEST_HEAPSIZE = "hbase.client.max.perrequest.heapsize";
/**
* Default value of {@link #HBASE_CLIENT_MAX_PERREQUEST_HEAPSIZE}.
*/
public static final long DEFAULT_HBASE_CLIENT_MAX_PERREQUEST_HEAPSIZE = 4194304;
/**
* The maximum size of submit.
*/
public static final String HBASE_CLIENT_MAX_SUBMIT_HEAPSIZE = "hbase.client.max.submit.heapsize";
/**
* Default value of {@link #HBASE_CLIENT_MAX_SUBMIT_HEAPSIZE}.
*/
public static final long DEFAULT_HBASE_CLIENT_MAX_SUBMIT_HEAPSIZE = DEFAULT_HBASE_CLIENT_MAX_PERREQUEST_HEAPSIZE;
/**
* The context used to wait for results from one submit call.
* 1) If AsyncProcess is set to track errors globally, and not per call (for HTable puts),
* then errors and failed operations in this object will reflect global errors.
* 2) If submit call is made with needResults false, results will not be saved.
* */
public static interface AsyncRequestFuture {
public boolean hasError();
public RetriesExhaustedWithDetailsException getErrors();
public List extends Row> getFailedOperations();
public Object[] getResults() throws InterruptedIOException;
/** Wait until all tasks are executed, successfully or not. */
public void waitUntilDone() throws InterruptedIOException;
}
/**
* Return value from a submit that didn't contain any requests.
*/
private static final AsyncRequestFuture NO_REQS_RESULT = new AsyncRequestFuture() {
final Object[] result = new Object[0];
@Override
public boolean hasError() {
return false;
}
@Override
public RetriesExhaustedWithDetailsException getErrors() {
return null;
}
@Override
public List extends Row> getFailedOperations() {
return null;
}
@Override
public Object[] getResults() {
return result;
}
@Override
public void waitUntilDone() throws InterruptedIOException {
}
};
/** Sync point for calls to multiple replicas for the same user request (Get).
* Created and put in the results array (we assume replica calls require results) when
* the replica calls are launched. See results for details of this process.
* POJO, all fields are public. To modify them, the object itself is locked. */
private static class ReplicaResultState {
public ReplicaResultState(int callCount) {
this.callCount = callCount;
}
/** Number of calls outstanding, or 0 if a call succeeded (even with others outstanding). */
int callCount;
/** Errors for which it is not decided whether we will report them to user. If one of the
* calls succeeds, we will discard the errors that may have happened in the other calls. */
BatchErrors replicaErrors = null;
@Override
public String toString() {
return "[call count " + callCount + "; errors " + replicaErrors + "]";
}
}
// TODO: many of the fields should be made private
protected final long id;
protected final ClusterConnection connection;
protected final RpcRetryingCallerFactory rpcCallerFactory;
protected final RpcControllerFactory rpcFactory;
protected final BatchErrors globalErrors;
protected final ExecutorService pool;
protected final AtomicLong tasksInProgress = new AtomicLong(0);
protected final ConcurrentMap taskCounterPerRegion =
new ConcurrentSkipListMap(Bytes.BYTES_COMPARATOR);
protected final ConcurrentMap taskCounterPerServer =
new ConcurrentHashMap();
// Start configuration settings.
private final int startLogErrorsCnt;
/**
* The number of tasks simultaneously executed on the cluster.
*/
protected final int maxTotalConcurrentTasks;
/**
* The max heap size of all tasks simultaneously executed on a server.
*/
protected final long maxHeapSizePerRequest;
protected final long maxHeapSizeSubmit;
/**
* The number of tasks we run in parallel on a single region.
* With 1 (the default) , we ensure that the ordering of the queries is respected: we don't start
* a set of operations on a region before the previous one is done. As well, this limits
* the pressure we put on the region server.
*/
protected final int maxConcurrentTasksPerRegion;
/**
* The number of task simultaneously executed on a single region server.
*/
protected final int maxConcurrentTasksPerServer;
protected final long pause;
protected final long pauseForCQTBE;// pause for CallQueueTooBigException, if specified
protected int numTries;
protected int serverTrackerTimeout;
protected int rpcTimeout;
protected int operationTimeout;
protected long primaryCallTimeoutMicroseconds;
/** Whether to log details for batch errors */
private final boolean logBatchErrorDetails;
// End configuration settings.
protected static class BatchErrors {
private final List throwables = new ArrayList();
private final List actions = new ArrayList();
private final List addresses = new ArrayList();
public synchronized void add(Throwable ex, Row row, ServerName serverName) {
if (row == null){
throw new IllegalArgumentException("row cannot be null. location=" + serverName);
}
throwables.add(ex);
actions.add(row);
addresses.add(serverName != null ? serverName.toString() : "null");
}
public boolean hasErrors() {
return !throwables.isEmpty();
}
private synchronized RetriesExhaustedWithDetailsException makeException(boolean logDetails) {
if (logDetails) {
LOG.error("Exception occurred! Exception details: " + throwables + ";\nActions: "
+ actions);
}
return new RetriesExhaustedWithDetailsException(new ArrayList(throwables),
new ArrayList(actions), new ArrayList(addresses));
}
public synchronized void clear() {
throwables.clear();
actions.clear();
addresses.clear();
}
public synchronized void merge(BatchErrors other) {
throwables.addAll(other.throwables);
actions.addAll(other.actions);
addresses.addAll(other.addresses);
}
}
public AsyncProcess(ClusterConnection hc, Configuration conf, ExecutorService pool,
RpcRetryingCallerFactory rpcCaller, boolean useGlobalErrors, RpcControllerFactory rpcFactory,
int rpcTimeout) {
if (hc == null) {
throw new IllegalArgumentException("HConnection cannot be null.");
}
this.connection = hc;
this.pool = pool;
this.globalErrors = useGlobalErrors ? new BatchErrors() : null;
this.id = COUNTER.incrementAndGet();
this.pause = conf.getLong(HConstants.HBASE_CLIENT_PAUSE,
HConstants.DEFAULT_HBASE_CLIENT_PAUSE);
long configuredPauseForCQTBE = conf.getLong(HConstants.HBASE_CLIENT_PAUSE_FOR_CQTBE, pause);
if (configuredPauseForCQTBE < pause) {
LOG.warn("The " + HConstants.HBASE_CLIENT_PAUSE_FOR_CQTBE + " setting: "
+ configuredPauseForCQTBE + " is smaller than " + HConstants.HBASE_CLIENT_PAUSE
+ ", will use " + pause + " instead.");
this.pauseForCQTBE = pause;
} else {
this.pauseForCQTBE = configuredPauseForCQTBE;
}
this.numTries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
this.rpcTimeout = rpcTimeout;
this.operationTimeout = conf.getInt(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT,
HConstants.DEFAULT_HBASE_CLIENT_OPERATION_TIMEOUT);
this.primaryCallTimeoutMicroseconds = conf.getInt(PRIMARY_CALL_TIMEOUT_KEY, 10000);
this.maxTotalConcurrentTasks = conf.getInt(HConstants.HBASE_CLIENT_MAX_TOTAL_TASKS,
HConstants.DEFAULT_HBASE_CLIENT_MAX_TOTAL_TASKS);
this.maxConcurrentTasksPerServer = conf.getInt(HConstants.HBASE_CLIENT_MAX_PERSERVER_TASKS,
HConstants.DEFAULT_HBASE_CLIENT_MAX_PERSERVER_TASKS);
this.maxConcurrentTasksPerRegion = conf.getInt(HConstants.HBASE_CLIENT_MAX_PERREGION_TASKS,
HConstants.DEFAULT_HBASE_CLIENT_MAX_PERREGION_TASKS);
this.maxHeapSizePerRequest = conf.getLong(HBASE_CLIENT_MAX_PERREQUEST_HEAPSIZE,
DEFAULT_HBASE_CLIENT_MAX_PERREQUEST_HEAPSIZE);
this.maxHeapSizeSubmit = conf.getLong(HBASE_CLIENT_MAX_SUBMIT_HEAPSIZE, DEFAULT_HBASE_CLIENT_MAX_SUBMIT_HEAPSIZE);
this.startLogErrorsCnt =
conf.getInt(START_LOG_ERRORS_AFTER_COUNT_KEY, DEFAULT_START_LOG_ERRORS_AFTER_COUNT);
if (this.maxTotalConcurrentTasks <= 0) {
throw new IllegalArgumentException("maxTotalConcurrentTasks=" + maxTotalConcurrentTasks);
}
if (this.maxConcurrentTasksPerServer <= 0) {
throw new IllegalArgumentException("maxConcurrentTasksPerServer=" +
maxConcurrentTasksPerServer);
}
if (this.maxConcurrentTasksPerRegion <= 0) {
throw new IllegalArgumentException("maxConcurrentTasksPerRegion=" +
maxConcurrentTasksPerRegion);
}
if (this.maxHeapSizePerRequest <= 0) {
throw new IllegalArgumentException("maxHeapSizePerServer=" +
maxHeapSizePerRequest);
}
if (this.maxHeapSizeSubmit <= 0) {
throw new IllegalArgumentException("maxHeapSizeSubmit=" +
maxHeapSizeSubmit);
}
// Server tracker allows us to do faster, and yet useful (hopefully), retries.
// However, if we are too useful, we might fail very quickly due to retry count limit.
// To avoid this, we are going to cheat for now (see HBASE-7659), and calculate maximum
// retry time if normal retries were used. Then we will retry until this time runs out.
// If we keep hitting one server, the net effect will be the incremental backoff, and
// essentially the same number of retries as planned. If we have to do faster retries,
// we will do more retries in aggregate, but the user will be none the wiser.
this.serverTrackerTimeout = 0;
for (int i = 0; i < this.numTries; ++i) {
serverTrackerTimeout = (int) (serverTrackerTimeout + ConnectionUtils.getPauseTime(this.pause, i));
}
this.rpcCallerFactory = rpcCaller;
this.rpcFactory = rpcFactory;
this.logBatchErrorDetails = conf.getBoolean(LOG_DETAILS_FOR_BATCH_ERROR, false);
this.thresholdToLogUndoneTaskDetails =
conf.getInt(THRESHOLD_TO_LOG_UNDONE_TASK_DETAILS,
DEFAULT_THRESHOLD_TO_LOG_UNDONE_TASK_DETAILS);
}
public void setRpcTimeout(int rpcTimeout) {
this.rpcTimeout = rpcTimeout;
}
public void setOperationTimeout(int operationTimeout) {
this.operationTimeout = operationTimeout;
}
/**
* @return pool if non null, otherwise returns this.pool if non null, otherwise throws
* RuntimeException
*/
@VisibleForTesting
ExecutorService getPool(ExecutorService pool) {
if (pool != null) {
return pool;
}
if (this.pool != null) {
return this.pool;
}
throw new RuntimeException("Neither AsyncProcess nor request have ExecutorService");
}
/**
* See {@link #submit(ExecutorService, TableName, List, boolean, Batch.Callback, boolean)}.
* Uses default ExecutorService for this AP (must have been created with one).
*/
public AsyncRequestFuture submit(TableName tableName, final List extends Row> rows,
boolean atLeastOne, Batch.Callback callback, boolean needResults)
throws InterruptedIOException {
return submit(null, tableName, rows, atLeastOne, callback, needResults);
}
/**
* See {@link #submit(ExecutorService, TableName, RowAccess, boolean, Batch.Callback, boolean)}.
* Uses default ExecutorService for this AP (must have been created with one).
*/
public AsyncRequestFuture submit(TableName tableName,
final RowAccess extends Row> rows, boolean atLeastOne, Batch.Callback callback,
boolean needResults) throws InterruptedIOException {
return submit(null, tableName, rows, atLeastOne, callback, needResults);
}
/**
* See {@link #submit(ExecutorService, TableName, RowAccess, boolean, Batch.Callback, boolean)}.
* Uses the {@link ListRowAccess} to wrap the {@link List}.
*/
public AsyncRequestFuture submit(ExecutorService pool, TableName tableName,
List extends Row> rows, boolean atLeastOne, Batch.Callback callback,
boolean needResults) throws InterruptedIOException {
return submit(pool, tableName, new ListRowAccess(rows), atLeastOne,
callback, needResults);
}
/**
* Extract from the rows list what we can submit. The rows we can not submit are kept in the
* list. Does not send requests to replicas (not currently used for anything other
* than streaming puts anyway).
*
* @param pool ExecutorService to use.
* @param tableName The table for which this request is needed.
* @param callback Batch callback. Only called on success (94 behavior).
* @param needResults Whether results are needed, or can be discarded.
* @param rows - the submitted row. Modified by the method: we remove the rows we took.
* @param atLeastOne true if we should submit at least a subset.
*/
public AsyncRequestFuture submit(ExecutorService pool, TableName tableName,
RowAccess extends Row> rows, boolean atLeastOne, Batch.Callback callback,
boolean needResults) throws InterruptedIOException {
if (rows.isEmpty()) {
return NO_REQS_RESULT;
}
Map> actionsByServer =
new HashMap>();
List> retainedActions = new ArrayList>(rows.size());
NonceGenerator ng = this.connection.getNonceGenerator();
long nonceGroup = ng.getNonceGroup(); // Currently, nonce group is per entire client.
// Location errors that happen before we decide what requests to take.
List locationErrors = null;
List locationErrorRows = null;
RowCheckerHost checker = createRowCheckerHost();
boolean firstIter = true;
do {
// Wait until there is at least one slot for a new task.
waitForMaximumCurrentTasks(maxTotalConcurrentTasks - 1, tableName.getNameAsString());
int posInList = -1;
if (!firstIter) {
checker.reset();
}
Iterator extends Row> it = rows.iterator();
while (it.hasNext()) {
Row r = it.next();
HRegionLocation loc;
try {
if (r == null) {
throw new IllegalArgumentException("#" + id + ", row cannot be null");
}
// Make sure we get 0-s replica.
RegionLocations locs = connection.locateRegion(
tableName, r.getRow(), true, true, RegionReplicaUtil.DEFAULT_REPLICA_ID);
if (locs == null || locs.isEmpty() || locs.getDefaultRegionLocation() == null) {
throw new IOException("#" + id + ", no location found, aborting submit for"
+ " tableName=" + tableName + " rowkey=" + Bytes.toStringBinary(r.getRow()));
}
loc = locs.getDefaultRegionLocation();
} catch (IOException ex) {
locationErrors = new ArrayList();
locationErrorRows = new ArrayList();
LOG.error("Failed to get region location ", ex);
// This action failed before creating ars. Retain it, but do not add to submit list.
// We will then add it to ars in an already-failed state.
int priority = HConstants.NORMAL_QOS;
if (r instanceof Mutation) {
priority = ((Mutation) r).getPriority();
}
retainedActions.add(new Action(r, ++posInList, priority));
locationErrors.add(ex);
locationErrorRows.add(posInList);
it.remove();
break; // Backward compat: we stop considering actions on location error.
}
long rowSize = (r instanceof Mutation) ? ((Mutation) r).heapSize() : 0;
ReturnCode code = checker.canTakeOperation(loc, rowSize);
if (code == ReturnCode.END) {
break;
}
if (code == ReturnCode.INCLUDE) {
int priority = HConstants.NORMAL_QOS;
if (r instanceof Mutation) {
priority = ((Mutation) r).getPriority();
}
Action action = new Action(r, ++posInList, priority);
setNonce(ng, r, action);
retainedActions.add(action);
// TODO: replica-get is not supported on this path
byte[] regionName = loc.getRegionInfo().getRegionName();
addAction(loc.getServerName(), regionName, action, actionsByServer, nonceGroup);
it.remove();
}
}
firstIter = false;
} while (retainedActions.isEmpty() && atLeastOne && (locationErrors == null));
if (retainedActions.isEmpty()) return NO_REQS_RESULT;
return submitMultiActions(tableName, retainedActions, nonceGroup, callback, null, needResults,
locationErrors, locationErrorRows, actionsByServer, pool);
}
private RowCheckerHost createRowCheckerHost() {
return new RowCheckerHost(Arrays.asList(
new TaskCountChecker(maxTotalConcurrentTasks,
maxConcurrentTasksPerServer,
maxConcurrentTasksPerRegion,
tasksInProgress,
taskCounterPerServer,
taskCounterPerRegion)
, new RequestSizeChecker(maxHeapSizePerRequest)
, new SubmittedSizeChecker(maxHeapSizeSubmit)
));
}
AsyncRequestFuture submitMultiActions(TableName tableName,
List> retainedActions, long nonceGroup, Batch.Callback callback,
Object[] results, boolean needResults, List locationErrors,
List locationErrorRows, Map> actionsByServer,
ExecutorService pool) {
AsyncRequestFutureImpl ars = createAsyncRequestFuture(
tableName, retainedActions, nonceGroup, pool, callback, results, needResults, null,
operationTimeout, rpcTimeout);
// Add location errors if any
if (locationErrors != null) {
for (int i = 0; i < locationErrors.size(); ++i) {
int originalIndex = locationErrorRows.get(i);
Row row = retainedActions.get(originalIndex).getAction();
ars.manageError(originalIndex, row,
Retry.NO_LOCATION_PROBLEM, locationErrors.get(i), null);
}
}
ars.sendMultiAction(actionsByServer, 1, null, false);
return ars;
}
/**
* Helper that is used when grouping the actions per region server.
*
* @param loc - the destination. Must not be null.
* @param action - the action to add to the multiaction
* @param actionsByServer the multiaction per server
* @param nonceGroup Nonce group.
*/
private static void addAction(ServerName server, byte[] regionName, Action action,
Map> actionsByServer, long nonceGroup) {
MultiAction multiAction = actionsByServer.get(server);
if (multiAction == null) {
multiAction = new MultiAction();
actionsByServer.put(server, multiAction);
}
if (action.hasNonce() && !multiAction.hasNonceGroup()) {
multiAction.setNonceGroup(nonceGroup);
}
multiAction.add(regionName, action);
}
/**
* See {@link #submitAll(ExecutorService, TableName, List, org.apache.hadoop.hbase.client.coprocessor.Batch.Callback, Object[])}.
* Uses default ExecutorService for this AP (must have been created with one).
*/
public AsyncRequestFuture submitAll(TableName tableName,
List extends Row> rows, Batch.Callback callback, Object[] results) {
return submitAll(null, tableName, rows, callback, results, null, operationTimeout, rpcTimeout);
}
public AsyncRequestFuture submitAll(ExecutorService pool, TableName tableName,
List extends Row> rows, Batch.Callback callback, Object[] results) {
return submitAll(pool, tableName, rows, callback, results, null, operationTimeout, rpcTimeout);
}
/**
* Submit immediately the list of rows, whatever the server status. Kept for backward
* compatibility: it allows to be used with the batch interface that return an array of objects.
*
* @param pool ExecutorService to use.
* @param tableName name of the table for which the submission is made.
* @param rows the list of rows.
* @param callback the callback.
* @param results Optional array to return the results thru; backward compat.
*/
public AsyncRequestFuture submitAll(ExecutorService pool, TableName tableName,
List extends Row> rows, Batch.Callback callback, Object[] results,
PayloadCarryingServerCallable callable, int operationTimeout, int rpcTimeout) {
List> actions = new ArrayList>(rows.size());
// The position will be used by the processBatch to match the object array returned.
int posInList = -1;
NonceGenerator ng = this.connection.getNonceGenerator();
int highestPriority = HConstants.PRIORITY_UNSET;
for (Row r : rows) {
posInList++;
if (r instanceof Put) {
Put put = (Put) r;
if (put.isEmpty()) {
throw new IllegalArgumentException("No columns to insert for #" + (posInList+1)+ " item");
}
highestPriority = Math.max(put.getPriority(), highestPriority);
}
Action action = new Action(r, posInList, highestPriority);
setNonce(ng, r, action);
actions.add(action);
}
AsyncRequestFutureImpl ars = createAsyncRequestFuture(
tableName, actions, ng.getNonceGroup(), getPool(pool), callback, results, results != null,
callable, operationTimeout, rpcTimeout);
ars.groupAndSendMultiAction(actions, 1);
return ars;
}
private static void setNonce(NonceGenerator ng, Row r, Action action) {
if (!(r instanceof Append) && !(r instanceof Increment)) return;
action.setNonce(ng.newNonce()); // Action handles NO_NONCE, so it's ok if ng is disabled.
}
/**
* The context, and return value, for a single submit/submitAll call.
* Note on how this class (one AP submit) works. Initially, all requests are split into groups
* by server; request is sent to each server in parallel; the RPC calls are not async so a
* thread per server is used. Every time some actions fail, regions/locations might have
* changed, so we re-group them by server and region again and send these groups in parallel
* too. The result, in case of retries, is a "tree" of threads, with parent exiting after
* scheduling children. This is why lots of code doesn't require any synchronization.
*/
protected class AsyncRequestFutureImpl implements AsyncRequestFuture {
/**
* Runnable (that can be submitted to thread pool) that waits for when it's time
* to issue replica calls, finds region replicas, groups the requests by replica and
* issues the calls (on separate threads, via sendMultiAction).
* This is done on a separate thread because we don't want to wait on user thread for
* our asynchronous call, and usually we have to wait before making replica calls.
*/
private final class ReplicaCallIssuingRunnable implements Runnable {
private final long startTime;
private final List> initialActions;
public ReplicaCallIssuingRunnable(List> initialActions, long startTime) {
this.initialActions = initialActions;
this.startTime = startTime;
}
@Override
public void run() {
boolean done = false;
if (primaryCallTimeoutMicroseconds > 0) {
try {
done = waitUntilDone(startTime * 1000L + primaryCallTimeoutMicroseconds);
} catch (InterruptedException ex) {
LOG.error("Replica thread was interrupted - no replica calls: " + ex.getMessage());
return;
}
}
if (done) return; // Done within primary timeout
Map> actionsByServer =
new HashMap>();
List> unknownLocActions = new ArrayList>();
if (replicaGetIndices == null) {
for (int i = 0; i < results.length; ++i) {
addReplicaActions(i, actionsByServer, unknownLocActions);
}
} else {
for (int replicaGetIndice : replicaGetIndices) {
addReplicaActions(replicaGetIndice, actionsByServer, unknownLocActions);
}
}
if (!actionsByServer.isEmpty()) {
sendMultiAction(actionsByServer, 1, null, unknownLocActions.isEmpty());
}
if (!unknownLocActions.isEmpty()) {
actionsByServer = new HashMap>();
for (Action action : unknownLocActions) {
addReplicaActionsAgain(action, actionsByServer);
}
// Some actions may have completely failed, they are handled inside addAgain.
if (!actionsByServer.isEmpty()) {
sendMultiAction(actionsByServer, 1, null, true);
}
}
}
/**
* Add replica actions to action map by server.
* @param index Index of the original action.
* @param actionsByServer The map by server to add it to.
*/
private void addReplicaActions(int index, Map> actionsByServer,
List> unknownReplicaActions) {
if (results[index] != null) return; // opportunistic. Never goes from non-null to null.
Action action = initialActions.get(index);
RegionLocations loc = findAllLocationsOrFail(action, true);
if (loc == null) return;
HRegionLocation[] locs = loc.getRegionLocations();
if (locs.length == 1) {
LOG.warn("No replicas found for " + action.getAction());
return;
}
synchronized (replicaResultLock) {
// Don't run replica calls if the original has finished. We could do it e.g. if
// original has already failed before first replica call (unlikely given retries),
// but that would require additional synchronization w.r.t. returning to caller.
if (results[index] != null) return;
// We set the number of calls here. After that any path must call setResult/setError.
// True even for replicas that are not found - if we refuse to send we MUST set error.
results[index] = new ReplicaResultState(locs.length);
}
for (int i = 1; i < locs.length; ++i) {
Action replicaAction = new Action(action, i);
if (locs[i] != null) {
addAction(locs[i].getServerName(), locs[i].getRegionInfo().getRegionName(),
replicaAction, actionsByServer, nonceGroup);
} else {
unknownReplicaActions.add(replicaAction);
}
}
}
private void addReplicaActionsAgain(
Action action, Map> actionsByServer) {
if (action.getReplicaId() == RegionReplicaUtil.DEFAULT_REPLICA_ID) {
throw new AssertionError("Cannot have default replica here");
}
HRegionLocation loc = getReplicaLocationOrFail(action);
if (loc == null) return;
addAction(loc.getServerName(), loc.getRegionInfo().getRegionName(),
action, actionsByServer, nonceGroup);
}
}
/**
* Runnable (that can be submitted to thread pool) that submits MultiAction to a
* single server. The server call is synchronous, therefore we do it on a thread pool.
*/
@VisibleForTesting
class SingleServerRequestRunnable implements Runnable {
private final MultiAction multiAction;
private final int numAttempt;
private final ServerName server;
private final Set callsInProgress;
@VisibleForTesting
SingleServerRequestRunnable(
MultiAction multiAction, int numAttempt, ServerName server,
Set callsInProgress) {
this.multiAction = multiAction;
this.numAttempt = numAttempt;
this.server = server;
this.callsInProgress = callsInProgress;
}
@Override
public void run() {
MultiResponse res = null;
PayloadCarryingServerCallable callable = currentCallable;
try {
// setup the callable based on the actions, if we don't have one already from the request
if (callable == null) {
callable = createCallable(server, tableName, multiAction);
}
RpcRetryingCaller caller = createCaller(callable, rpcTimeout);
try {
if (callsInProgress != null) {
callsInProgress.add(callable);
}
res = caller.callWithoutRetries(callable, operationTimeout);
if (res == null) {
// Cancelled
return;
}
} catch (IOException e) {
// The service itself failed . It may be an error coming from the communication
// layer, but, as well, a functional error raised by the server.
receiveGlobalFailure(multiAction, server, numAttempt, e);
return;
} catch (Throwable t) {
// This should not happen. Let's log & retry anyway.
LOG.error("#" + id + ", Caught throwable while calling. This is unexpected." +
" Retrying. Server is " + server + ", tableName=" + tableName, t);
receiveGlobalFailure(multiAction, server, numAttempt, t);
return;
}
// Normal case: we received an answer from the server, and it's not an exception.
receiveMultiAction(multiAction, server, res, numAttempt);
} catch (Throwable t) {
// Something really bad happened. We are on the send thread that will now die.
LOG.error("Internal AsyncProcess #" + id + " error for "
+ tableName + " processing for " + server, t);
throw new RuntimeException(t);
} finally {
decTaskCounters(multiAction.getRegions(), server);
if (callsInProgress != null && callable != null && res != null) {
callsInProgress.remove(callable);
}
}
}
}
private final Batch.Callback callback;
private final BatchErrors errors;
private final ConnectionManager.ServerErrorTracker errorsByServer;
private final ExecutorService pool;
private final Set callsInProgress;
private final TableName tableName;
private final AtomicLong actionsInProgress = new AtomicLong(-1);
/**
* The lock controls access to results. It is only held when populating results where
* there might be several callers (eventual consistency gets). For other requests,
* there's one unique call going on per result index.
*/
private final Object replicaResultLock = new Object();
/**
* Result array. Null if results are not needed. Otherwise, each index corresponds to
* the action index in initial actions submitted. For most request types, has null-s for
* requests that are not done, and result/exception for those that are done.
* For eventual-consistency gets, initially the same applies; at some point, replica calls
* might be started, and ReplicaResultState is put at the corresponding indices. The
* returning calls check the type to detect when this is the case. After all calls are done,
* ReplicaResultState-s are replaced with results for the user.
*/
private final Object[] results;
/**
* Indices of replica gets in results. If null, all or no actions are replica-gets.
*/
private final int[] replicaGetIndices;
private final boolean hasAnyReplicaGets;
private final long nonceGroup;
private PayloadCarryingServerCallable currentCallable;
private int operationTimeout;
private int rpcTimeout;
private RetryingTimeTracker tracker;
public AsyncRequestFutureImpl(TableName tableName, List> actions, long nonceGroup,
ExecutorService pool, boolean needResults, Object[] results,
Batch.Callback callback, PayloadCarryingServerCallable callable,
int operationTimeout, int rpcTimeout) {
this.pool = pool;
this.callback = callback;
this.nonceGroup = nonceGroup;
this.tableName = tableName;
this.actionsInProgress.set(actions.size());
if (results != null) {
assert needResults;
if (results.length != actions.size()) {
throw new AssertionError("results.length");
}
this.results = results;
for (int i = 0; i != this.results.length; ++i) {
results[i] = null;
}
} else {
this.results = needResults ? new Object[actions.size()] : null;
}
List replicaGetIndices = null;
boolean hasAnyReplicaGets = false;
if (needResults) {
// Check to see if any requests might require replica calls.
// We expect that many requests will consist of all or no multi-replica gets; in such
// cases we would just use a boolean (hasAnyReplicaGets). If there's a mix, we will
// store the list of action indexes for which replica gets are possible, and set
// hasAnyReplicaGets to true.
boolean hasAnyNonReplicaReqs = false;
int posInList = 0;
for (Action action : actions) {
boolean isReplicaGet = isReplicaGet(action.getAction());
if (isReplicaGet) {
hasAnyReplicaGets = true;
if (hasAnyNonReplicaReqs) { // Mixed case
if (replicaGetIndices == null) {
replicaGetIndices = new ArrayList(actions.size() - 1);
}
replicaGetIndices.add(posInList);
}
} else if (!hasAnyNonReplicaReqs) {
// The first non-multi-replica request in the action list.
hasAnyNonReplicaReqs = true;
if (posInList > 0) {
// Add all the previous requests to the index lists. We know they are all
// replica-gets because this is the first non-multi-replica request in the list.
replicaGetIndices = new ArrayList(actions.size() - 1);
for (int i = 0; i < posInList; ++i) {
replicaGetIndices.add(i);
}
}
}
++posInList;
}
}
this.hasAnyReplicaGets = hasAnyReplicaGets;
if (replicaGetIndices != null) {
this.replicaGetIndices = new int[replicaGetIndices.size()];
int i = 0;
for (Integer el : replicaGetIndices) {
this.replicaGetIndices[i++] = el;
}
} else {
this.replicaGetIndices = null;
}
this.callsInProgress = !hasAnyReplicaGets ? null :
Collections.newSetFromMap(
new ConcurrentHashMap());
this.errorsByServer = createServerErrorTracker();
this.errors = (globalErrors != null) ? globalErrors : new BatchErrors();
this.currentCallable = callable;
this.operationTimeout = operationTimeout;
this.rpcTimeout = rpcTimeout;
if (callable == null) {
tracker = new RetryingTimeTracker();
tracker.start();
}
}
public Set getCallsInProgress() {
return callsInProgress;
}
@VisibleForTesting
SingleServerRequestRunnable createSingleServerRequest(MultiAction multiAction, int numAttempt, ServerName server,
Set callsInProgress) {
return new SingleServerRequestRunnable(multiAction, numAttempt, server, callsInProgress);
}
/**
* Group a list of actions per region servers, and send them.
*
* @param currentActions - the list of row to submit
* @param numAttempt - the current numAttempt (first attempt is 1)
*/
private void groupAndSendMultiAction(List> currentActions, int numAttempt) {
Map> actionsByServer =
new HashMap>();
boolean isReplica = false;
List> unknownReplicaActions = null;
for (Action action : currentActions) {
RegionLocations locs = findAllLocationsOrFail(action, true);
if (locs == null) continue;
boolean isReplicaAction = !RegionReplicaUtil.isDefaultReplica(action.getReplicaId());
if (isReplica && !isReplicaAction) {
// This is the property of the current implementation, not a requirement.
throw new AssertionError("Replica and non-replica actions in the same retry");
}
isReplica = isReplicaAction;
HRegionLocation loc = locs.getRegionLocation(action.getReplicaId());
if (loc == null || loc.getServerName() == null) {
if (isReplica) {
if (unknownReplicaActions == null) {
unknownReplicaActions = new ArrayList>();
}
unknownReplicaActions.add(action);
} else {
// TODO: relies on primary location always being fetched
manageLocationError(action, null);
}
} else {
byte[] regionName = loc.getRegionInfo().getRegionName();
addAction(loc.getServerName(), regionName, action, actionsByServer, nonceGroup);
}
}
boolean doStartReplica = (numAttempt == 1 && !isReplica && hasAnyReplicaGets);
boolean hasUnknown = unknownReplicaActions != null && !unknownReplicaActions.isEmpty();
if (!actionsByServer.isEmpty()) {
// If this is a first attempt to group and send, no replicas, we need replica thread.
sendMultiAction(actionsByServer, numAttempt, (doStartReplica && !hasUnknown)
? currentActions : null, numAttempt > 1 && !hasUnknown);
}
if (hasUnknown) {
actionsByServer = new HashMap>();
for (Action action : unknownReplicaActions) {
HRegionLocation loc = getReplicaLocationOrFail(action);
if (loc == null) continue;
byte[] regionName = loc.getRegionInfo().getRegionName();
addAction(loc.getServerName(), regionName, action, actionsByServer, nonceGroup);
}
if (!actionsByServer.isEmpty()) {
sendMultiAction(
actionsByServer, numAttempt, doStartReplica ? currentActions : null, true);
}
}
}
private HRegionLocation getReplicaLocationOrFail(Action action) {
// We are going to try get location once again. For each action, we'll do it once
// from cache, because the previous calls in the loop might populate it.
int replicaId = action.getReplicaId();
RegionLocations locs = findAllLocationsOrFail(action, true);
if (locs == null) return null; // manageError already called
HRegionLocation loc = locs.getRegionLocation(replicaId);
if (loc == null || loc.getServerName() == null) {
locs = findAllLocationsOrFail(action, false);
if (locs == null) return null; // manageError already called
loc = locs.getRegionLocation(replicaId);
}
if (loc == null || loc.getServerName() == null) {
manageLocationError(action, null);
return null;
}
return loc;
}
private void manageLocationError(Action action, Exception ex) {
String msg = "Cannot get replica " + action.getReplicaId()
+ " location for " + action.getAction();
LOG.error(msg);
if (ex == null) {
ex = new IOException(msg);
}
manageError(action.getOriginalIndex(), action.getAction(),
Retry.NO_LOCATION_PROBLEM, ex, null);
}
private RegionLocations findAllLocationsOrFail(Action action, boolean useCache) {
if (action.getAction() == null) throw new IllegalArgumentException("#" + id +
", row cannot be null");
RegionLocations loc = null;
try {
loc = connection.locateRegion(
tableName, action.getAction().getRow(), useCache, true, action.getReplicaId());
} catch (IOException ex) {
manageLocationError(action, ex);
}
return loc;
}
/**
* Send a multi action structure to the servers, after a delay depending on the attempt
* number. Asynchronous.
*
* @param actionsByServer the actions structured by regions
* @param numAttempt the attempt number.
* @param actionsForReplicaThread original actions for replica thread; null on non-first call.
*/
private void sendMultiAction(Map> actionsByServer,
int numAttempt, List> actionsForReplicaThread, boolean reuseThread) {
// Run the last item on the same thread if we are already on a send thread.
// We hope most of the time it will be the only item, so we can cut down on threads.
int actionsRemaining = actionsByServer.size();
// This iteration is by server (the HRegionLocation comparator is by server portion only).
for (Map.Entry> e : actionsByServer.entrySet()) {
ServerName server = e.getKey();
MultiAction multiAction = e.getValue();
Collection extends Runnable> runnables = getNewMultiActionRunnable(server, multiAction,
numAttempt);
// make sure we correctly count the number of runnables before we try to reuse the send
// thread, in case we had to split the request into different runnables because of backoff
if (runnables.size() > actionsRemaining) {
actionsRemaining = runnables.size();
}
// run all the runnables
// HBASE-17475: Do not reuse the thread after stack reach a certain depth to prevent stack overflow
// for now, we use HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER to control the depth
for (Runnable runnable : runnables) {
if ((--actionsRemaining == 0) && reuseThread
&& numAttempt % HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER != 0) {
runnable.run();
} else {
try {
pool.submit(runnable);
} catch (Throwable t) {
if (t instanceof RejectedExecutionException) {
// This should never happen. But as the pool is provided by the end user,
// let's secure this a little.
LOG.warn("#" + id + ", the task was rejected by the pool. This is unexpected." +
" Server is " + server.getServerName(), t);
} else {
// see #HBASE-14359 for more details
LOG.warn("Caught unexpected exception/error: ", t);
}
decTaskCounters(multiAction.getRegions(), server);
// We're likely to fail again, but this will increment the attempt counter,
// so it will finish.
receiveGlobalFailure(multiAction, server, numAttempt, t);
}
}
}
}
if (actionsForReplicaThread != null) {
startWaitingForReplicaCalls(actionsForReplicaThread);
}
}
private Collection extends Runnable> getNewMultiActionRunnable(ServerName server,
MultiAction multiAction,
int numAttempt) {
// no stats to manage, just do the standard action
if (AsyncProcess.this.connection.getStatisticsTracker() == null) {
if (connection.getConnectionMetrics() != null) {
connection.getConnectionMetrics().incrNormalRunners();
}
incTaskCounters(multiAction.getRegions(), server);
SingleServerRequestRunnable runnable = createSingleServerRequest(multiAction, numAttempt, server, callsInProgress);
return Collections.singletonList(Trace.wrap("AsyncProcess.sendMultiAction", runnable));
}
// group the actions by the amount of delay
Map actions = new HashMap(multiAction
.size());
// split up the actions
for (Map.Entry>> e : multiAction.actions.entrySet()) {
Long backoff = getBackoff(server, e.getKey());
DelayingRunner runner = actions.get(backoff);
if (runner == null) {
actions.put(backoff, new DelayingRunner(backoff, e));
} else {
runner.add(e);
}
}
List toReturn = new ArrayList(actions.size());
for (DelayingRunner runner : actions.values()) {
incTaskCounters(runner.getActions().getRegions(), server);
String traceText = "AsyncProcess.sendMultiAction";
Runnable runnable = createSingleServerRequest(runner.getActions(), numAttempt, server, callsInProgress);
// use a delay runner only if we need to sleep for some time
if (runner.getSleepTime() > 0) {
runner.setRunner(runnable);
traceText = "AsyncProcess.clientBackoff.sendMultiAction";
runnable = runner;
if (connection.getConnectionMetrics() != null) {
connection.getConnectionMetrics().incrDelayRunners();
connection.getConnectionMetrics().updateDelayInterval(runner.getSleepTime());
}
} else {
if (connection.getConnectionMetrics() != null) {
connection.getConnectionMetrics().incrNormalRunners();
}
}
runnable = Trace.wrap(traceText, runnable);
toReturn.add(runnable);
}
return toReturn;
}
/**
* @param server server location where the target region is hosted
* @param regionName name of the region which we are going to write some data
* @return the amount of time the client should wait until it submit a request to the
* specified server and region
*/
private Long getBackoff(ServerName server, byte[] regionName) {
ServerStatisticTracker tracker = AsyncProcess.this.connection.getStatisticsTracker();
ServerStatistics stats = tracker.getStats(server);
return AsyncProcess.this.connection.getBackoffPolicy()
.getBackoffTime(server, regionName, stats);
}
/**
* Starts waiting to issue replica calls on a different thread; or issues them immediately.
*/
private void startWaitingForReplicaCalls(List> actionsForReplicaThread) {
long startTime = EnvironmentEdgeManager.currentTime();
ReplicaCallIssuingRunnable replicaRunnable = new ReplicaCallIssuingRunnable(
actionsForReplicaThread, startTime);
if (primaryCallTimeoutMicroseconds == 0) {
// Start replica calls immediately.
replicaRunnable.run();
} else {
// Start the thread that may kick off replica gets.
// TODO: we could do it on the same thread, but it's a user thread, might be a bad idea.
try {
pool.submit(replicaRunnable);
} catch (RejectedExecutionException ree) {
LOG.warn("#" + id + ", replica task was rejected by the pool - no replica calls", ree);
}
}
}
/**
* Check that we can retry acts accordingly: logs, set the error status.
*
* @param originalIndex the position in the list sent
* @param row the row
* @param canRetry if false, we won't retry whatever the settings.
* @param throwable the throwable, if any (can be null)
* @param server the location, if any (can be null)
* @return true if the action can be retried, false otherwise.
*/
public Retry manageError(int originalIndex, Row row, Retry canRetry,
Throwable throwable, ServerName server) {
if (canRetry == Retry.YES
&& throwable != null && (throwable instanceof DoNotRetryIOException ||
throwable instanceof NeedUnmanagedConnectionException)) {
canRetry = Retry.NO_NOT_RETRIABLE;
}
if (canRetry != Retry.YES) {
// Batch.Callback was not called on failure in 0.94. We keep this.
setError(originalIndex, row, throwable, server);
} else if (isActionComplete(originalIndex, row)) {
canRetry = Retry.NO_OTHER_SUCCEEDED;
}
return canRetry;
}
/**
* Resubmit all the actions from this multiaction after a failure.
*
* @param rsActions the actions still to do from the initial list
* @param server the destination
* @param numAttempt the number of attempts so far
* @param t the throwable (if any) that caused the resubmit
*/
private void receiveGlobalFailure(
MultiAction rsActions, ServerName server, int numAttempt, Throwable t) {
errorsByServer.reportServerError(server);
Retry canRetry = errorsByServer.canRetryMore(numAttempt)
? Retry.YES : Retry.NO_RETRIES_EXHAUSTED;
if (tableName == null && ClientExceptionsUtil.isMetaClearingException(t)) {
// tableName is null when we made a cross-table RPC call.
connection.clearCaches(server);
}
int failed = 0, stopped = 0;
List> toReplay = new ArrayList>();
for (Map.Entry>> e : rsActions.actions.entrySet()) {
byte[] regionName = e.getKey();
byte[] row = e.getValue().iterator().next().getAction().getRow();
// Do not use the exception for updating cache because it might be coming from
// any of the regions in the MultiAction.
try {
if (tableName != null) {
connection.updateCachedLocations(tableName, regionName, row,
ClientExceptionsUtil.isMetaClearingException(t) ? null : t, server);
}
} catch (Throwable ex) {
// That should never happen, but if it did, we want to make sure
// we still process errors
LOG.error("Couldn't update cached region locations: " + ex);
}
for (Action action : e.getValue()) {
Retry retry = manageError(
action.getOriginalIndex(), action.getAction(), canRetry, t, server);
if (retry == Retry.YES) {
toReplay.add(action);
} else if (retry == Retry.NO_OTHER_SUCCEEDED) {
++stopped;
} else {
++failed;
}
}
}
if (toReplay.isEmpty()) {
logNoResubmit(server, numAttempt, rsActions.size(), t, failed, stopped);
} else {
resubmit(server, toReplay, numAttempt, rsActions.size(), t);
}
}
/**
* Log as much info as possible, and, if there is something to replay,
* submit it again after a back off sleep.
*/
private void resubmit(ServerName oldServer, List> toReplay,
int numAttempt, int failureCount, Throwable throwable) {
// We have something to replay. We're going to sleep a little before.
// We have two contradicting needs here:
// 1) We want to get the new location after having slept, as it may change.
// 2) We want to take into account the location when calculating the sleep time.
// 3) If all this is just because the response needed to be chunked try again FAST.
// It should be possible to have some heuristics to take the right decision. Short term,
// we go for one.
boolean retryImmediately = throwable instanceof RetryImmediatelyException;
int nextAttemptNumber = retryImmediately ? numAttempt : numAttempt + 1;
long backOffTime;
if (retryImmediately) {
backOffTime = 0;
} else if (throwable instanceof CallQueueTooBigException) {
// Give a special check on CQTBE, see #HBASE-17114
backOffTime = errorsByServer.calculateBackoffTime(oldServer, pauseForCQTBE);
} else {
backOffTime = errorsByServer.calculateBackoffTime(oldServer, pause);
}
if (numAttempt > startLogErrorsCnt) {
// We use this value to have some logs when we have multiple failures, but not too many
// logs, as errors are to be expected when a region moves, splits and so on
LOG.info(createLog(numAttempt, failureCount, toReplay.size(),
oldServer, throwable, backOffTime, true, null, -1, -1));
}
try {
if (backOffTime > 0) {
Thread.sleep(backOffTime);
}
} catch (InterruptedException e) {
LOG.warn("#" + id + ", not sent: " + toReplay.size() + " operations, " + oldServer, e);
Thread.currentThread().interrupt();
return;
}
groupAndSendMultiAction(toReplay, nextAttemptNumber);
}
private void logNoResubmit(ServerName oldServer, int numAttempt,
int failureCount, Throwable throwable, int failed, int stopped) {
if (failureCount != 0 || numAttempt > startLogErrorsCnt + 1) {
String timeStr = new Date(errorsByServer.getStartTrackingTime()).toString();
String logMessage = createLog(numAttempt, failureCount, 0, oldServer,
throwable, -1, false, timeStr, failed, stopped);
if (failed != 0) {
// Only log final failures as warning
LOG.warn(logMessage);
} else {
LOG.info(logMessage);
}
}
}
@VisibleForTesting
long getActionsInProgress() {
return actionsInProgress.get();
}
/**
* Called when we receive the result of a server query.
*
* @param multiAction - the multiAction we sent
* @param server - the location. It's used as a server name.
* @param responses - the response, if any
* @param numAttempt - the attempt
*/
private void receiveMultiAction(MultiAction multiAction,
ServerName server, MultiResponse responses, int numAttempt) {
assert responses != null;
// Success or partial success
// Analyze detailed results. We can still have individual failures to be redo.
// two specific throwables are managed:
// - DoNotRetryIOException: we continue to retry for other actions
// - RegionMovedException: we update the cache with the new region location
List> toReplay = new ArrayList>();
Throwable throwable = null;
int failureCount = 0;
Retry retry = null;
Map results = responses.getResults();
updateStats(server, results);
int failed = 0;
int stopped = 0;
// Go by original action.
for (Map.Entry>> regionEntry : multiAction.actions.entrySet()) {
byte[] regionName = regionEntry.getKey();
Throwable regionException = responses.getExceptions().get(regionName);
if (tableName == null && regionException != null &&
ClientExceptionsUtil.isMetaClearingException(regionException)) {
// For multi-actions, we don't have a table name, but we want to make sure to clear the
// cache in case there were location-related exceptions. We don't to clear the cache
// for every possible exception that comes through, however.
connection.clearCaches(server);
}
Map regionResults;
if (results.containsKey(regionName)) {
regionResults = results.get(regionName).result;
} else {
regionResults = Collections.emptyMap();
}
boolean regionFailureRegistered = false;
for (Action sentAction : regionEntry.getValue()) {
Object result = regionResults.get(sentAction.getOriginalIndex());
if (result == null) {
if (regionException == null) {
LOG.error("Server sent us neither results nor exceptions for " + Bytes
.toStringBinary(regionName) + ", numAttempt:" + numAttempt);
regionException = new RuntimeException("Invalid response");
}
// If the row operation encounters the region-lever error, the exception of action
// may be null.
result = regionException;
}
// Failure: retry if it's make sense else update the errors lists
if (result instanceof Throwable) {
Row row = sentAction.getAction();
throwable = regionException != null ? regionException
: ClientExceptionsUtil.findException(result);
// Register corresponding failures once per server/once per region.
if (!regionFailureRegistered) {
regionFailureRegistered = true;
try {
connection.updateCachedLocations(
tableName, regionName, row.getRow(), result, server);
} catch (Throwable ex) {
// That should never happen, but if it did, we want to make sure
// we still process errors
LOG.error("Couldn't update cached region locations: " + ex);
}
}
if (retry == null) {
errorsByServer.reportServerError(server);
// We determine canRetry only once for all calls, after reporting server failure.
retry =
errorsByServer.canRetryMore(numAttempt) ? Retry.YES : Retry.NO_RETRIES_EXHAUSTED;
}
++failureCount;
switch (manageError(sentAction.getOriginalIndex(), row, retry, (Throwable) result,
server)) {
case YES:
toReplay.add(sentAction);
break;
case NO_OTHER_SUCCEEDED:
++stopped;
break;
default:
++failed;
break;
}
} else {
if (callback != null) {
try {
//noinspection unchecked
// TODO: would callback expect a replica region name if it gets one?
this.callback.update(regionName, sentAction.getAction().getRow(), (CResult) result);
} catch (Throwable t) {
LOG.error("User callback threw an exception for "
+ Bytes.toStringBinary(regionName) + ", ignoring", t);
}
}
setResult(sentAction, result);
}
}
}
if (toReplay.isEmpty()) {
logNoResubmit(server, numAttempt, failureCount, throwable, failed, stopped);
} else {
resubmit(server, toReplay, numAttempt, failureCount, throwable);
}
}
private String createLog(int numAttempt, int failureCount, int replaySize, ServerName sn,
Throwable error, long backOffTime, boolean willRetry, String startTime,
int failed, int stopped) {
StringBuilder sb = new StringBuilder();
sb.append("#").append(id).append(", table=").append(tableName).append(", ")
.append("attempt=").append(numAttempt)
.append("/").append(numTries).append(" ");
if (failureCount > 0 || error != null){
sb.append("failed=").append(failureCount).append("ops").append(", last exception: ").
append(error == null ? "null" : error);
} else {
sb.append("succeeded");
}
sb.append(" on ").append(sn).append(", tracking started ").append(startTime);
if (willRetry) {
sb.append(", retrying after=").append(backOffTime).append("ms").
append(", replay=").append(replaySize).append("ops");
} else if (failureCount > 0) {
if (stopped > 0) {
sb.append("; not retrying ").append(stopped).append(" due to success from other replica");
}
if (failed > 0) {
sb.append("; not retrying ").append(failed).append(" - final failure");
}
}
return sb.toString();
}
/**
* Sets the non-error result from a particular action.
* @param action Action (request) that the server responded to.
* @param result The result.
*/
private void setResult(Action action, Object result) {
if (result == null) {
throw new RuntimeException("Result cannot be null");
}
ReplicaResultState state = null;
boolean isStale = !RegionReplicaUtil.isDefaultReplica(action.getReplicaId());
int index = action.getOriginalIndex();
if (results == null) {
decActionCounter(index);
return; // Simple case, no replica requests.
} else if ((state = trySetResultSimple(
index, action.getAction(), false, result, null, isStale)) == null) {
return; // Simple case, no replica requests.
}
assert state != null;
// At this point we know that state is set to replica tracking class.
// It could be that someone else is also looking at it; however, we know there can
// only be one state object, and only one thread can set callCount to 0. Other threads
// will either see state with callCount 0 after locking it; or will not see state at all
// we will replace it with the result.
synchronized (state) {
if (state.callCount == 0) {
return; // someone already set the result
}
state.callCount = 0;
}
synchronized (replicaResultLock) {
if (results[index] != state) {
throw new AssertionError("We set the callCount but someone else replaced the result");
}
results[index] = result;
}
decActionCounter(index);
}
/**
* Sets the error from a particular action.
* @param index Original action index.
* @param row Original request.
* @param throwable The resulting error.
* @param server The source server.
*/
private void setError(int index, Row row, Throwable throwable, ServerName server) {
ReplicaResultState state = null;
if (results == null) {
// Note that we currently cannot have replica requests with null results. So it shouldn't
// happen that multiple replica calls will call dAC for same actions with results == null.
// Only one call per action should be present in this case.
errors.add(throwable, row, server);
decActionCounter(index);
return; // Simple case, no replica requests.
} else if ((state = trySetResultSimple(
index, row, true, throwable, server, false)) == null) {
return; // Simple case, no replica requests.
}
assert state != null;
BatchErrors target = null; // Error will be added to final errors, or temp replica errors.
boolean isActionDone = false;
synchronized (state) {
switch (state.callCount) {
case 0: return; // someone already set the result
case 1: { // All calls failed, we are the last error.
target = errors;
isActionDone = true;
break;
}
default: {
assert state.callCount > 1;
if (state.replicaErrors == null) {
state.replicaErrors = new BatchErrors();
}
target = state.replicaErrors;
break;
}
}
--state.callCount;
}
target.add(throwable, row, server);
if (isActionDone) {
if (state.replicaErrors != null) { // last call, no need to lock
errors.merge(state.replicaErrors);
}
// See setResult for explanations.
synchronized (replicaResultLock) {
if (results[index] != state) {
throw new AssertionError("We set the callCount but someone else replaced the result");
}
results[index] = throwable;
}
decActionCounter(index);
}
}
/**
* Checks if the action is complete; used on error to prevent needless retries.
* Does not synchronize, assuming element index/field accesses are atomic.
* This is an opportunistic optimization check, doesn't have to be strict.
* @param index Original action index.
* @param row Original request.
*/
private boolean isActionComplete(int index, Row row) {
if (!isReplicaGet(row)) return false;
Object resObj = results[index];
return (resObj != null) && (!(resObj instanceof ReplicaResultState)
|| ((ReplicaResultState)resObj).callCount == 0);
}
/**
* Tries to set the result or error for a particular action as if there were no replica calls.
* @return null if successful; replica state if there were in fact replica calls.
*/
private ReplicaResultState trySetResultSimple(int index, Row row, boolean isError,
Object result, ServerName server, boolean isFromReplica) {
Object resObj = null;
if (!isReplicaGet(row)) {
if (isFromReplica) {
throw new AssertionError("Unexpected stale result for " + row);
}
results[index] = result;
} else {
synchronized (replicaResultLock) {
if ((resObj = results[index]) == null) {
if (isFromReplica) {
throw new AssertionError("Unexpected stale result for " + row);
}
results[index] = result;
}
}
}
ReplicaResultState rrs =
(resObj instanceof ReplicaResultState) ? (ReplicaResultState)resObj : null;
if (rrs == null && isError) {
// The resObj is not replica state (null or already set).
errors.add((Throwable)result, row, server);
}
if (resObj == null) {
// resObj is null - no replica calls were made.
decActionCounter(index);
return null;
}
return rrs;
}
private void decActionCounter(int index) {
long actionsRemaining = actionsInProgress.decrementAndGet();
if (actionsRemaining < 0) {
String error = buildDetailedErrorMsg("Incorrect actions in progress", index);
throw new AssertionError(error);
} else if (actionsRemaining == 0) {
synchronized (actionsInProgress) {
actionsInProgress.notifyAll();
}
}
}
private String buildDetailedErrorMsg(String string, int index) {
StringBuilder error = new StringBuilder(string);
error.append("; called for ").
append(index).
append(", actionsInProgress ").
append(actionsInProgress.get()).
append("; replica gets: ");
if (replicaGetIndices != null) {
for (int i = 0; i < replicaGetIndices.length; ++i) {
error.append(replicaGetIndices[i]).append(", ");
}
} else {
error.append(hasAnyReplicaGets ? "all" : "none");
}
error.append("; results ");
if (results != null) {
for (int i = 0; i < results.length; ++i) {
Object o = results[i];
error.append(((o == null) ? "null" : o.toString())).append(", ");
}
}
return error.toString();
}
@Override
public void waitUntilDone() throws InterruptedIOException {
try {
waitUntilDone(Long.MAX_VALUE);
} catch (InterruptedException iex) {
throw new InterruptedIOException(iex.getMessage());
} finally {
if (callsInProgress != null) {
for (PayloadCarryingServerCallable clb : callsInProgress) {
clb.cancel();
}
}
}
}
private boolean waitUntilDone(long cutoff) throws InterruptedException {
boolean hasWait = cutoff != Long.MAX_VALUE;
long lastLog = EnvironmentEdgeManager.currentTime();
long currentInProgress;
while (0 != (currentInProgress = actionsInProgress.get())) {
long now = EnvironmentEdgeManager.currentTime();
if (hasWait && (now * 1000L) > cutoff) {
return false;
}
if (!hasWait) { // Only log if wait is infinite.
if (now > lastLog + 10000) {
lastLog = now;
LOG.info("#" + id + ", waiting for " + currentInProgress
+ " actions to finish on table: " + tableName);
if (currentInProgress <= thresholdToLogUndoneTaskDetails) {
logDetailsOfUndoneTasks(currentInProgress);
}
}
}
synchronized (actionsInProgress) {
if (actionsInProgress.get() == 0) break;
if (!hasWait) {
actionsInProgress.wait(10);
} else {
long waitMicroSecond = Math.min(100000L, (cutoff - now * 1000L));
TimeUnit.MICROSECONDS.timedWait(actionsInProgress, waitMicroSecond);
}
}
}
return true;
}
@Override
public boolean hasError() {
return errors.hasErrors();
}
@Override
public List extends Row> getFailedOperations() {
return errors.actions;
}
@Override
public RetriesExhaustedWithDetailsException getErrors() {
return errors.makeException(logBatchErrorDetails);
}
@Override
public Object[] getResults() throws InterruptedIOException {
waitUntilDone();
return results;
}
/**
* Create a callable. Isolated to be easily overridden in the tests.
*/
@VisibleForTesting
protected MultiServerCallable createCallable(final ServerName server,
TableName tableName, final MultiAction multi) {
return new MultiServerCallable(connection, tableName, server,
AsyncProcess.this.rpcFactory, multi, rpcTimeout, tracker, multi.getPriority());
}
}
@VisibleForTesting
protected void updateStats(ServerName server, Map results) {
boolean metrics = AsyncProcess.this.connection.getConnectionMetrics() != null;
boolean stats = AsyncProcess.this.connection.getStatisticsTracker() != null;
if (!stats && !metrics) {
return;
}
for (Map.Entry regionStats : results.entrySet()) {
byte[] regionName = regionStats.getKey();
ClientProtos.RegionLoadStats stat = regionStats.getValue().getStat();
ResultStatsUtil.updateStats(AsyncProcess.this.connection.getStatisticsTracker(), server,
regionName, stat);
ResultStatsUtil.updateStats(AsyncProcess.this.connection.getConnectionMetrics(),
server, regionName, stat);
}
}
@VisibleForTesting
AsyncRequestFutureImpl createAsyncRequestFuture(
TableName tableName, List> actions, long nonceGroup, ExecutorService pool,
Batch.Callback callback, Object[] results, boolean needResults,
PayloadCarryingServerCallable callable, int operationTimeout, int rpcTimeout) {
return new AsyncRequestFutureImpl(
tableName, actions, nonceGroup, getPool(pool), needResults,
results, callback, callable, operationTimeout, rpcTimeout);
}
/**
* Create a caller. Isolated to be easily overridden in the tests.
*/
@VisibleForTesting
protected RpcRetryingCaller createCaller(PayloadCarryingServerCallable callable,
int rpcTimeout) {
return rpcCallerFactory. newCaller(rpcTimeout);
}
@VisibleForTesting
/** Waits until all outstanding tasks are done. Used in tests. */
void waitUntilDone() throws InterruptedIOException {
waitForMaximumCurrentTasks(0, null);
}
/** Wait until the async does not have more than max tasks in progress. */
private void waitForMaximumCurrentTasks(int max, String tableName)
throws InterruptedIOException {
waitForMaximumCurrentTasks(max, tasksInProgress, id, tableName);
}
// Break out this method so testable
@VisibleForTesting
void waitForMaximumCurrentTasks(int max, final AtomicLong tasksInProgress, final long id,
String tableName) throws InterruptedIOException {
long lastLog = EnvironmentEdgeManager.currentTime();
long currentInProgress, oldInProgress = Long.MAX_VALUE;
while ((currentInProgress = tasksInProgress.get()) > max) {
if (oldInProgress != currentInProgress) { // Wait for in progress to change.
long now = EnvironmentEdgeManager.currentTime();
if (now > lastLog + 10000) {
lastLog = now;
LOG.info("#" + id + ", waiting for some tasks to finish. Expected max="
+ max + ", tasksInProgress=" + currentInProgress +
" hasError=" + hasError() + (tableName == null ? "" : ", tableName=" + tableName));
if (currentInProgress <= thresholdToLogUndoneTaskDetails) {
logDetailsOfUndoneTasks(currentInProgress);
}
}
}
oldInProgress = currentInProgress;
try {
synchronized (tasksInProgress) {
if (tasksInProgress.get() == oldInProgress) {
tasksInProgress.wait(10);
}
}
} catch (InterruptedException e) {
throw new InterruptedIOException("#" + id + ", interrupted." +
" currentNumberOfTask=" + currentInProgress);
}
}
}
private void logDetailsOfUndoneTasks(long taskInProgress) {
ArrayList servers = new ArrayList();
for (Map.Entry entry : taskCounterPerServer.entrySet()) {
if (entry.getValue().get() > 0) {
servers.add(entry.getKey());
}
}
LOG.info("Left over " + taskInProgress + " task(s) are processed on server(s): " + servers);
if (taskInProgress <= THRESHOLD_TO_LOG_REGION_DETAILS) {
ArrayList regions = new ArrayList();
for (Map.Entry entry : taskCounterPerRegion.entrySet()) {
if (entry.getValue().get() > 0) {
regions.add(Bytes.toString(entry.getKey()));
}
}
LOG.info("Regions against which left over task(s) are processed: " + regions);
}
}
/**
* Only used w/useGlobalErrors ctor argument, for HTable backward compat.
* @return Whether there were any errors in any request since the last time
* {@link #waitForAllPreviousOpsAndReset(List)} was called, or AP was created.
*/
public boolean hasError() {
return globalErrors.hasErrors();
}
/**
* Only used w/useGlobalErrors ctor argument, for HTable backward compat.
* Waits for all previous operations to finish, and returns errors and (optionally)
* failed operations themselves.
* @param failedRows an optional list into which the rows that failed since the last time
* {@link #waitForAllPreviousOpsAndReset(List)} was called, or AP was created, are saved.
* @param tableName name of the table
* @return all the errors since the last time {@link #waitForAllPreviousOpsAndReset(List)}
* was called, or AP was created.
*/
public RetriesExhaustedWithDetailsException waitForAllPreviousOpsAndReset(
List failedRows, String tableName) throws InterruptedIOException {
waitForMaximumCurrentTasks(0, tableName);
if (!globalErrors.hasErrors()) {
return null;
}
if (failedRows != null) {
failedRows.addAll(globalErrors.actions);
}
RetriesExhaustedWithDetailsException result = globalErrors.makeException(logBatchErrorDetails);
globalErrors.clear();
return result;
}
/**
* increment the tasks counters for a given set of regions. MT safe.
*/
protected void incTaskCounters(Collection regions, ServerName sn) {
tasksInProgress.incrementAndGet();
AtomicInteger serverCnt = taskCounterPerServer.get(sn);
if (serverCnt == null) {
taskCounterPerServer.putIfAbsent(sn, new AtomicInteger());
serverCnt = taskCounterPerServer.get(sn);
}
serverCnt.incrementAndGet();
for (byte[] regBytes : regions) {
AtomicInteger regionCnt = taskCounterPerRegion.get(regBytes);
if (regionCnt == null) {
regionCnt = new AtomicInteger();
AtomicInteger oldCnt = taskCounterPerRegion.putIfAbsent(regBytes, regionCnt);
if (oldCnt != null) {
regionCnt = oldCnt;
}
}
regionCnt.incrementAndGet();
}
}
/**
* Decrements the counters for a given region and the region server. MT Safe.
*/
protected void decTaskCounters(Collection regions, ServerName sn) {
for (byte[] regBytes : regions) {
AtomicInteger regionCnt = taskCounterPerRegion.get(regBytes);
regionCnt.decrementAndGet();
}
taskCounterPerServer.get(sn).decrementAndGet();
tasksInProgress.decrementAndGet();
synchronized (tasksInProgress) {
tasksInProgress.notifyAll();
}
}
/**
* Creates the server error tracker to use inside process.
* Currently, to preserve the main assumption about current retries, and to work well with
* the retry-limit-based calculation, the calculation is local per Process object.
* We may benefit from connection-wide tracking of server errors.
* @return ServerErrorTracker to use, null if there is no ServerErrorTracker on this connection
*/
protected ConnectionManager.ServerErrorTracker createServerErrorTracker() {
return new ConnectionManager.ServerErrorTracker(
this.serverTrackerTimeout, this.numTries);
}
private static boolean isReplicaGet(Row row) {
return (row instanceof Get) && (((Get)row).getConsistency() == Consistency.TIMELINE);
}
/**
* For manageError. Only used to make logging more clear, we don't actually care why we don't retry.
*/
private enum Retry {
YES,
NO_LOCATION_PROBLEM,
NO_NOT_RETRIABLE,
NO_RETRIES_EXHAUSTED,
NO_OTHER_SUCCEEDED
}
/**
* Collect all advices from checkers and make the final decision.
*/
@VisibleForTesting
static class RowCheckerHost {
private final List checkers;
private boolean isEnd = false;
RowCheckerHost(final List checkers) {
this.checkers = checkers;
}
void reset() throws InterruptedIOException {
isEnd = false;
InterruptedIOException e = null;
for (RowChecker checker : checkers) {
try {
checker.reset();
} catch (InterruptedIOException ex) {
e = ex;
}
}
if (e != null) {
throw e;
}
}
ReturnCode canTakeOperation(HRegionLocation loc, long rowSize) {
if (isEnd) {
return ReturnCode.END;
}
ReturnCode code = ReturnCode.INCLUDE;
for (RowChecker checker : checkers) {
switch (checker.canTakeOperation(loc, rowSize)) {
case END:
isEnd = true;
code = ReturnCode.END;
break;
case SKIP:
code = ReturnCode.SKIP;
break;
case INCLUDE:
default:
break;
}
if (code == ReturnCode.END) {
break;
}
}
for (RowChecker checker : checkers) {
checker.notifyFinal(code, loc, rowSize);
}
return code;
}
}
/**
* Provide a way to control the flow of rows iteration.
*/
@VisibleForTesting
interface RowChecker {
enum ReturnCode {
/**
* Accept current row.
*/
INCLUDE,
/**
* Skip current row.
*/
SKIP,
/**
* No more row can be included.
*/
END
};
ReturnCode canTakeOperation(HRegionLocation loc, long rowSize);
/**
* Add the final ReturnCode to the checker.
* The ReturnCode may be reversed, so the checker need the final decision to update
* the inner state.
*/
void notifyFinal(ReturnCode code, HRegionLocation loc, long rowSize);
/**
* Reset the inner state.
*/
void reset() throws InterruptedIOException ;
}
/**
* limit the heapsize of total submitted data.
* Reduce the limit of heapsize for submitting quickly
* if there is no running task.
*/
@VisibleForTesting
static class SubmittedSizeChecker implements RowChecker {
private final long maxHeapSizeSubmit;
private long heapSize = 0;
SubmittedSizeChecker(final long maxHeapSizeSubmit) {
this.maxHeapSizeSubmit = maxHeapSizeSubmit;
}
@Override
public ReturnCode canTakeOperation(HRegionLocation loc, long rowSize) {
if (heapSize >= maxHeapSizeSubmit) {
return ReturnCode.END;
}
return ReturnCode.INCLUDE;
}
@Override
public void notifyFinal(ReturnCode code, HRegionLocation loc, long rowSize) {
if (code == ReturnCode.INCLUDE) {
heapSize += rowSize;
}
}
@Override
public void reset() {
heapSize = 0;
}
}
/**
* limit the max number of tasks in an AsyncProcess.
*/
@VisibleForTesting
static class TaskCountChecker implements RowChecker {
private static final long MAX_WAITING_TIME = 1000; //ms
private final Set regionsIncluded = new HashSet<>();
private final Set serversIncluded = new HashSet<>();
private final int maxConcurrentTasksPerRegion;
private final int maxTotalConcurrentTasks;
private final int maxConcurrentTasksPerServer;
private final Map taskCounterPerRegion;
private final Map taskCounterPerServer;
private final Set busyRegions = new TreeSet<>(Bytes.BYTES_COMPARATOR);
private final AtomicLong tasksInProgress;
TaskCountChecker(final int maxTotalConcurrentTasks,
final int maxConcurrentTasksPerServer,
final int maxConcurrentTasksPerRegion,
final AtomicLong tasksInProgress,
final Map taskCounterPerServer,
final Map taskCounterPerRegion) {
this.maxTotalConcurrentTasks = maxTotalConcurrentTasks;
this.maxConcurrentTasksPerRegion = maxConcurrentTasksPerRegion;
this.maxConcurrentTasksPerServer = maxConcurrentTasksPerServer;
this.taskCounterPerRegion = taskCounterPerRegion;
this.taskCounterPerServer = taskCounterPerServer;
this.tasksInProgress = tasksInProgress;
}
@Override
public void reset() throws InterruptedIOException {
// prevent the busy-waiting
waitForRegion();
regionsIncluded.clear();
serversIncluded.clear();
busyRegions.clear();
}
private void waitForRegion() throws InterruptedIOException {
if (busyRegions.isEmpty()) {
return;
}
EnvironmentEdge ee = EnvironmentEdgeManager.getDelegate();
final long start = ee.currentTime();
while ((ee.currentTime() - start) <= MAX_WAITING_TIME) {
for (byte[] region : busyRegions) {
AtomicInteger count = taskCounterPerRegion.get(region);
if (count == null || count.get() < maxConcurrentTasksPerRegion) {
return;
}
}
try {
synchronized (tasksInProgress) {
tasksInProgress.wait(10);
}
} catch (InterruptedException e) {
throw new InterruptedIOException("Interrupted." +
" tasksInProgress=" + tasksInProgress);
}
}
}
/**
* 1) check the regions is allowed.
* 2) check the concurrent tasks for regions.
* 3) check the total concurrent tasks.
* 4) check the concurrent tasks for server.
* @param loc
* @param rowSize
* @return
*/
@Override
public ReturnCode canTakeOperation(HRegionLocation loc, long rowSize) {
HRegionInfo regionInfo = loc.getRegionInfo();
if (regionsIncluded.contains(regionInfo)) {
// We already know what to do with this region.
return ReturnCode.INCLUDE;
}
AtomicInteger regionCnt = taskCounterPerRegion.get(loc.getRegionInfo().getRegionName());
if (regionCnt != null && regionCnt.get() >= maxConcurrentTasksPerRegion) {
// Too many tasks on this region already.
return ReturnCode.SKIP;
}
int newServers = serversIncluded.size()
+ (serversIncluded.contains(loc.getServerName()) ? 0 : 1);
if ((newServers + tasksInProgress.get()) > maxTotalConcurrentTasks) {
// Too many tasks.
return ReturnCode.SKIP;
}
AtomicInteger serverCnt = taskCounterPerServer.get(loc.getServerName());
if (serverCnt != null && serverCnt.get() >= maxConcurrentTasksPerServer) {
// Too many tasks for this individual server
return ReturnCode.SKIP;
}
return ReturnCode.INCLUDE;
}
@Override
public void notifyFinal(ReturnCode code, HRegionLocation loc, long rowSize) {
if (code == ReturnCode.INCLUDE) {
regionsIncluded.add(loc.getRegionInfo());
serversIncluded.add(loc.getServerName());
}
busyRegions.add(loc.getRegionInfo().getRegionName());
}
}
/**
* limit the request size for each regionserver.
*/
@VisibleForTesting
static class RequestSizeChecker implements RowChecker {
private final long maxHeapSizePerRequest;
private final Map serverRequestSizes = new HashMap<>();
RequestSizeChecker(final long maxHeapSizePerRequest) {
this.maxHeapSizePerRequest = maxHeapSizePerRequest;
}
@Override
public void reset() {
serverRequestSizes.clear();
}
@Override
public ReturnCode canTakeOperation(HRegionLocation loc, long rowSize) {
// Is it ok for limit of request size?
long currentRequestSize = serverRequestSizes.containsKey(loc.getServerName()) ?
serverRequestSizes.get(loc.getServerName()) : 0L;
// accept at least one request
if (currentRequestSize == 0 || currentRequestSize + rowSize <= maxHeapSizePerRequest) {
return ReturnCode.INCLUDE;
}
return ReturnCode.SKIP;
}
@Override
public void notifyFinal(ReturnCode code, HRegionLocation loc, long rowSize) {
if (code == ReturnCode.INCLUDE) {
long currentRequestSize = serverRequestSizes.containsKey(loc.getServerName()) ?
serverRequestSizes.get(loc.getServerName()) : 0L;
serverRequestSizes.put(loc.getServerName(), currentRequestSize + rowSize);
}
}
}
public static class ListRowAccess implements RowAccess {
private final List data;
ListRowAccess(final List data) {
this.data = data;
}
@Override
public int size() {
return data.size();
}
@Override
public boolean isEmpty() {
return data.isEmpty();
}
@Override
public Iterator iterator() {
return data.iterator();
}
}
}