com.datastax.driver.core.policies.DefaultRetryPolicy Maven / Gradle / Ivy
Show all versions of driver-cql-shaded Show documentation
/*
* Copyright DataStax, Inc.
*
* This software can be used solely with DataStax Enterprise. Please consult the license at
* http://www.datastax.com/terms/datastax-dse-driver-license-terms
*/
package com.datastax.driver.core.policies;
import com.datastax.driver.core.Cluster;
import com.datastax.driver.core.ConsistencyLevel;
import com.datastax.driver.core.Statement;
import com.datastax.driver.core.WriteType;
import com.datastax.driver.core.exceptions.DriverException;
import com.datastax.driver.core.exceptions.ReadFailureException;
import com.datastax.driver.core.exceptions.WriteFailureException;
/**
* The default retry policy.
*
* This policy retries queries in only two cases:
*
*
* - On a read timeout, retries once on the same host if enough replicas replied but data was
* not retrieved.
*
- On a write timeout, retries once on the same host if we timeout while writing the
* distributed log used by batch statements.
*
- On an unavailable exception, retries once on the next host.
*
- On a request error, such as a client timeout, the query is retried on the next host. Do not
* retry on read or write failures.
*
*
* This retry policy is conservative in that it will never retry with a different consistency
* level than the one of the initial operation.
*
*
In some cases, it may be convenient to use a more aggressive retry policy like {@link
* DowngradingConsistencyRetryPolicy}.
*/
public class DefaultRetryPolicy implements RetryPolicy {
public static final DefaultRetryPolicy INSTANCE = new DefaultRetryPolicy();
private DefaultRetryPolicy() {}
/**
* {@inheritDoc}
*
*
This implementation triggers a maximum of one retry, and only if enough replicas had
* responded to the read request but data was not retrieved amongst those. Indeed, that case
* usually means that enough replica are alive to satisfy the consistency but the coordinator
* picked a dead one for data retrieval, not having detected that replica as dead yet. The
* reasoning for retrying then is that by the time we get the timeout the dead replica will likely
* have been detected as dead and the retry has a high chance of success.
*
* @return {@code RetryDecision.retry(cl)} if no retry attempt has yet been tried and {@code
* receivedResponses >= requiredResponses && !dataRetrieved}, {@code RetryDecision.rethrow()}
* otherwise.
*/
@Override
public RetryDecision onReadTimeout(
Statement statement,
ConsistencyLevel cl,
int requiredResponses,
int receivedResponses,
boolean dataRetrieved,
int nbRetry) {
if (nbRetry != 0) return RetryDecision.rethrow();
return receivedResponses >= requiredResponses && !dataRetrieved
? RetryDecision.retry(cl)
: RetryDecision.rethrow();
}
/**
* {@inheritDoc}
*
*
This implementation triggers a maximum of one retry, and only in the case of a {@code
* WriteType.BATCH_LOG} write. The reasoning for the retry in that case is that write to the
* distributed batch log is tried by the coordinator of the write against a small subset of all
* the nodes alive in the local datacenter. Hence, a timeout usually means that none of the nodes
* in that subset were alive but the coordinator hasn't detected them as dead. By the time we get
* the timeout the dead nodes will likely have been detected as dead and the retry has thus a high
* chance of success.
*
* @return {@code RetryDecision.retry(cl)} if no retry attempt has yet been tried and {@code
* writeType == WriteType.BATCH_LOG}, {@code RetryDecision.rethrow()} otherwise.
*/
@Override
public RetryDecision onWriteTimeout(
Statement statement,
ConsistencyLevel cl,
WriteType writeType,
int requiredAcks,
int receivedAcks,
int nbRetry) {
if (nbRetry != 0) return RetryDecision.rethrow();
// If the batch log write failed, retry the operation as this might just be we were unlucky at
// picking candidates
// JAVA-764: testing the write type automatically filters out serial consistency levels as these
// have always WriteType.CAS.
return writeType == WriteType.BATCH_LOG ? RetryDecision.retry(cl) : RetryDecision.rethrow();
}
/**
* {@inheritDoc}
*
*
This implementation does the following:
*
*
* - if this is the first retry ({@code nbRetry == 0}), it triggers a retry on the next host
* in the query plan with the same consistency level ({@link
* RetryPolicy.RetryDecision#tryNextHost(ConsistencyLevel) RetryDecision#tryNextHost(null)}.
* The rationale is that the first coordinator might have been network-isolated from all
* other nodes (thinking they're down), but still able to communicate with the client; in
* that case, retrying on the same host has almost no chance of success, but moving to the
* next host might solve the issue.
*
- otherwise, the exception is rethrow.
*
*/
@Override
public RetryDecision onUnavailable(
Statement statement,
ConsistencyLevel cl,
int requiredReplica,
int aliveReplica,
int nbRetry) {
return (nbRetry == 0) ? RetryDecision.tryNextHost(null) : RetryDecision.rethrow();
}
/** {@inheritDoc} */
@Override
public RetryDecision onRequestError(
Statement statement, ConsistencyLevel cl, DriverException e, int nbRetry) {
// do not retry these by default as they generally indicate a data problem or
// other issue that is unlikely to be resolved by a retry.
if (e instanceof WriteFailureException || e instanceof ReadFailureException) {
return RetryDecision.rethrow();
}
return RetryDecision.tryNextHost(cl);
}
@Override
public void init(Cluster cluster) {
// nothing to do
}
@Override
public void close() {
// nothing to do
}
}