io.stargate.grpc.retries.DefaultRetryPolicy Maven / Gradle / Ivy
/*
* Copyright The Stargate Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.stargate.grpc.retries;
import com.datastax.oss.driver.shaded.guava.common.annotations.VisibleForTesting;
import edu.umd.cs.findbugs.annotations.NonNull;
import net.jcip.annotations.ThreadSafe;
import org.apache.cassandra.stargate.db.WriteType;
import org.apache.cassandra.stargate.exceptions.PreparedQueryNotFoundException;
import org.apache.cassandra.stargate.exceptions.ReadTimeoutException;
import org.apache.cassandra.stargate.exceptions.WriteTimeoutException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The default retry policy.
*
* This is a very conservative implementation: it triggers a maximum of one retry per request,
* and only in cases that have a high chance of success (see the method javadocs for detailed
* explanations of each case). The exception is the {@link
* RetryPolicy#onUnprepared(PreparedQueryNotFoundException, int)}, which allows 2 retries.
*/
@ThreadSafe
public class DefaultRetryPolicy implements RetryPolicy {
private static final Logger LOG = LoggerFactory.getLogger(DefaultRetryPolicy.class);
@VisibleForTesting
public static final String RETRYING_ON_READ_TIMEOUT =
"Retrying on read timeout (consistency: {}, required responses: {}, "
+ "received responses: {}, data retrieved: {}, retries: {})";
@VisibleForTesting
public static final String RETRYING_ON_WRITE_TIMEOUT =
"Retrying on write timeout (consistency: {}, write type: {}, "
+ "required acknowledgments: {}, received acknowledgments: {}, retries: {})";
@VisibleForTesting
public static final String RETRYING_ON_UNPREPARED =
"Retrying on unprepared (MD5 digest: {}, retries: {})";
/**
* {@inheritDoc}
*
*
This implementation triggers a maximum of one retry (to the same node), and only if enough
* replicas had responded to the read request but data was not retrieved amongst those. That
* usually means that enough replicas are alive to satisfy the consistency, but the coordinator
* picked a dead one for data retrieval, not having detected that replica as dead yet. The
* reasoning is that by the time we get the timeout, the dead replica will likely have been
* detected as dead and the retry has a high chance of success.
*
*
Otherwise, the exception is rethrown.
*/
@Override
public RetryDecision onReadTimeout(@NonNull ReadTimeoutException rte, int retryCount) {
RetryDecision decision =
(retryCount == 0 && rte.received >= rte.blockFor && !rte.dataPresent)
? RetryDecision.RETRY
: RetryDecision.RETHROW;
if (decision == RetryDecision.RETRY && LOG.isTraceEnabled()) {
LOG.trace(
RETRYING_ON_READ_TIMEOUT, rte.consistency, rte.blockFor, rte.received, false, retryCount);
}
return decision;
}
/**
* {@inheritDoc}
*
*
This implementation triggers a maximum of one retry, and only for a {@code
* WriteType.BATCH_LOG} write. The reasoning is that the coordinator tries to write the
* distributed batch log against a small subset of nodes in the local datacenter; a timeout
* usually means that none of these nodes were alive but the coordinator hadn't detected them as
* dead yet. By the time we get the timeout, the dead nodes will likely have been detected as
* dead, and the retry has thus a high chance of success.
*
*
Otherwise, the exception is rethrown.
*/
@Override
public RetryDecision onWriteTimeout(@NonNull WriteTimeoutException wte, int retryCount) {
RetryDecision decision =
(retryCount == 0 && wte.writeType == WriteType.BATCH_LOG)
? RetryDecision.RETRY
: RetryDecision.RETHROW;
if (decision == RetryDecision.RETRY && LOG.isTraceEnabled()) {
LOG.trace(
RETRYING_ON_WRITE_TIMEOUT,
wte.consistency,
wte.writeType,
wte.blockFor,
wte.received,
retryCount);
}
return decision;
}
/**
* {@inheritDoc}
*
*
Two retries max when UNPREPARED occurs.
*/
@Override
public RetryDecision onUnprepared(PreparedQueryNotFoundException pe, int retryCount) {
RetryDecision decision = retryCount < 2 ? RetryDecision.RETRY : RetryDecision.RETHROW;
if (decision == RetryDecision.RETRY && LOG.isTraceEnabled()) {
LOG.trace(RETRYING_ON_UNPREPARED, pe.id.toString(), retryCount);
}
return decision;
}
}