com.datastax.oss.driver.internal.core.retry.ConsistencyDowngradingRetryPolicy Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of java-driver-core-shaded
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.datastax.oss.driver.internal.core.retry;

import static com.datastax.oss.driver.api.core.servererrors.WriteType.BATCH;
import static com.datastax.oss.driver.api.core.servererrors.WriteType.BATCH_LOG;
import static com.datastax.oss.driver.api.core.servererrors.WriteType.SIMPLE;
import static com.datastax.oss.driver.api.core.servererrors.WriteType.UNLOGGED_BATCH;

import com.datastax.oss.driver.api.core.ConsistencyLevel;
import com.datastax.oss.driver.api.core.connection.ClosedConnectionException;
import com.datastax.oss.driver.api.core.connection.HeartbeatException;
import com.datastax.oss.driver.api.core.context.DriverContext;
import com.datastax.oss.driver.api.core.retry.RetryDecision;
import com.datastax.oss.driver.api.core.retry.RetryPolicy;
import com.datastax.oss.driver.api.core.retry.RetryVerdict;
import com.datastax.oss.driver.api.core.servererrors.CoordinatorException;
import com.datastax.oss.driver.api.core.servererrors.ReadFailureException;
import com.datastax.oss.driver.api.core.servererrors.WriteFailureException;
import com.datastax.oss.driver.api.core.servererrors.WriteType;
import com.datastax.oss.driver.api.core.session.Request;
import com.datastax.oss.driver.shaded.guava.common.annotations.VisibleForTesting;
import edu.umd.cs.findbugs.annotations.NonNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A retry policy that sometimes retries with a lower consistency level than the one initially
 * requested.
 *
 * BEWARE: this policy may retry queries using a lower consistency level than the one
 * initially requested. By doing so, it may break consistency guarantees. In other words, if you use
 * this retry policy, there are cases (documented below) where a read at {@code QUORUM} may
 * not see a preceding write at {@code QUORUM}. Furthermore, this policy doesn't always respect
 * datacenter locality; for example, it may downgrade {@code LOCAL_QUORUM} to {@code ONE}, and thus
 * could accidentally send a write that was intended for the local datacenter to another
 * datacenter.Do not use this policy unless you have understood the cases where this can happen and
 * are ok with that.
 *
 * 
This policy implements the same retries than the {@link DefaultRetryPolicy} policy. But on top
 * of that, it also retries in the following cases:
 *
 * 

 *   On a read timeout: if the number of replicas that responded is greater than one, but lower
 *       than is required by the requested consistency level, the operation is retried at a lower
 *       consistency level.
 *   
On a write timeout: if the operation is a {@code WriteType.UNLOGGED_BATCH} and at least one
 *       replica acknowledged the write, the operation is retried at a lower consistency level.
 *       Furthermore, for other operations, if at least one replica acknowledged the write, the
 *       timeout is ignored.
 *   
On an unavailable exception: if at least one replica is alive, the operation is retried at
 *       a lower consistency level.
 * 
 *
 * The lower consistency level to use for retries is determined by the following rules:
 *
 * 
 *   if more than 3 replicas responded, use {@code THREE}.
 *   
if 1, 2 or 3 replicas responded, use the corresponding level {@code ONE}, {@code TWO} or
 *       {@code THREE}.
 * 
 *
 * Note that if the initial consistency level was {@code EACH_QUORUM}, Cassandra returns the number
 * of live replicas in the datacenter that failed to reach consistency, not the overall
 * number in the cluster. Therefore if this number is 0, we still retry at {@code ONE}, on the
 * assumption that a host may still be up in another datacenter.
 *
 * The reasoning behind this retry policy is the following one. If, based on the information the
 * Cassandra coordinator node returns, retrying the operation with the initially requested
 * consistency has a chance to succeed, do it. Otherwise, if based on this information, we know that
 * the initially requested consistency level cannot be achieved currently, then:
 *
 * 

 *   For writes, ignore the exception (thus silently failing the consistency requirement) if we
 *       know the write has been persisted on at least one replica.
 *   
For reads, try reading at a lower consistency level (thus silently failing the consistency
 *       requirement).
 * 
 *
 * In other words, this policy implements the idea that if the requested consistency level cannot be
 * achieved, the next best thing for writes is to make sure the data is persisted, and that reading
 * something is better than reading nothing, even if there is a risk of reading stale data.
 */
public class ConsistencyDowngradingRetryPolicy implements RetryPolicy {

  private static final Logger LOG =
      LoggerFactory.getLogger(ConsistencyDowngradingRetryPolicy.class);

  @VisibleForTesting
  public static final String VERDICT_ON_READ_TIMEOUT =
      "[{}] Verdict on read timeout (consistency: {}, required responses: {}, "
          + "received responses: {}, data retrieved: {}, retries: {}): {}";

  @VisibleForTesting
  public static final String VERDICT_ON_WRITE_TIMEOUT =
      "[{}] Verdict on write timeout (consistency: {}, write type: {}, "
          + "required acknowledgments: {}, received acknowledgments: {}, retries: {}): {}";

  @VisibleForTesting
  public static final String VERDICT_ON_UNAVAILABLE =
      "[{}] Verdict on unavailable exception (consistency: {}, "
          + "required replica: {}, alive replica: {}, retries: {}): {}";

  @VisibleForTesting
  public static final String VERDICT_ON_ABORTED =
      "[{}] Verdict on aborted request (type: {}, message: '{}', retries: {}): {}";

  @VisibleForTesting
  public static final String VERDICT_ON_ERROR =
      "[{}] Verdict on node error (type: {}, message: '{}', retries: {}): {}";

  private final String logPrefix;

  @SuppressWarnings("unused")
  public ConsistencyDowngradingRetryPolicy(
      @NonNull DriverContext context, @NonNull String profileName) {
    this(context.getSessionName() + "|" + profileName);
  }

  public ConsistencyDowngradingRetryPolicy(@NonNull String logPrefix) {
    this.logPrefix = logPrefix;
  }

  /**
   * {@inheritDoc}
   *
   * This implementation triggers a maximum of one retry. If less replicas responded than
   * required by the consistency level (but at least one replica did respond), the operation is
   * retried at a lower consistency level. If enough replicas responded but data was not retrieved,
   * the operation is retried with the initial consistency level. Otherwise, an exception is thrown.
   */
  @Override
  public RetryVerdict onReadTimeoutVerdict(
      @NonNull Request request,
      @NonNull ConsistencyLevel cl,
      int blockFor,
      int received,
      boolean dataPresent,
      int retryCount) {
    RetryVerdict verdict;
    if (retryCount != 0) {
      verdict = RetryVerdict.RETHROW;
    } else if (cl.isSerial()) {
      // CAS reads are not all that useful in terms of visibility of the writes since CAS write
      // supports the normal consistency levels on the committing phase. So the main use case for
      // CAS reads is probably for when you've timed out on a CAS write and want to make sure what
      // happened. Downgrading in that case would be always wrong so we just special-case to
      // rethrow.
      verdict = RetryVerdict.RETHROW;
    } else if (received < blockFor) {
      verdict = maybeDowngrade(received, cl);
    } else if (!dataPresent) {
      // Retry with same CL since this usually means that enough replica are alive to satisfy the
      // consistency but the coordinator picked a dead one for data retrieval, not having detected
      // that replica as dead yet.
      verdict = RetryVerdict.RETRY_SAME;
    } else {
      // This usually means a digest mismatch, in which case it's pointless to retry since
      // the inconsistency has to be repaired first.
      verdict = RetryVerdict.RETHROW;
    }
    if (LOG.isTraceEnabled()) {
      LOG.trace(
          VERDICT_ON_READ_TIMEOUT,
          logPrefix,
          cl,
          blockFor,
          received,
          dataPresent,
          retryCount,
          verdict);
    }
    return verdict;
  }

  /**
   * {@inheritDoc}
   *
   * 
This implementation triggers a maximum of one retry. If {@code writeType ==
   * WriteType.BATCH_LOG}, the write is retried with the initial consistency level. If {@code
   * writeType == WriteType.UNLOGGED_BATCH} and at least one replica acknowledged, the write is
   * retried with a lower consistency level (with unlogged batch, a write timeout can always
   * mean that part of the batch haven't been persisted at all, even if {@code receivedAcks > 0}).
   * For other write types ({@code WriteType.SIMPLE} and {@code WriteType.BATCH}), if we know the
   * write has been persisted on at least one replica, we ignore the exception. Otherwise, an
   * exception is thrown.
   */
  @Override
  public RetryVerdict onWriteTimeoutVerdict(
      @NonNull Request request,
      @NonNull ConsistencyLevel cl,
      @NonNull WriteType writeType,
      int blockFor,
      int received,
      int retryCount) {
    RetryVerdict verdict;
    if (retryCount != 0) {
      verdict = RetryVerdict.RETHROW;
    } else if (SIMPLE.equals(writeType) || BATCH.equals(writeType)) {
      // Since we provide atomicity, if at least one replica acknowledged the write,
      // there is no point in retrying
      verdict = received > 0 ? RetryVerdict.IGNORE : RetryVerdict.RETHROW;
    } else if (UNLOGGED_BATCH.equals(writeType)) {
      // Since only part of the batch could have been persisted,
      // retry with whatever consistency should allow to persist all
      verdict = maybeDowngrade(received, cl);
    } else if (BATCH_LOG.equals(writeType)) {
      verdict = RetryVerdict.RETRY_SAME;
    } else {
      verdict = RetryVerdict.RETHROW;
    }
    if (LOG.isTraceEnabled()) {
      LOG.trace(
          VERDICT_ON_WRITE_TIMEOUT,
          logPrefix,
          cl,
          writeType,
          blockFor,
          received,
          retryCount,
          verdict);
    }
    return verdict;
  }

  /**
   * {@inheritDoc}
   *
   * This implementation triggers a maximum of one retry. If at least one replica is known to be
   * alive, the operation is retried at a lower consistency level.
   */
  @Override
  public RetryVerdict onUnavailableVerdict(
      @NonNull Request request,
      @NonNull ConsistencyLevel cl,
      int required,
      int alive,
      int retryCount) {
    RetryVerdict verdict;
    if (retryCount != 0) {
      verdict = RetryVerdict.RETHROW;
    } else if (cl.isSerial()) {
      // JAVA-764: if the requested consistency level is serial, it means that the
      // operation failed at the paxos phase of a LWT.
      // Retry on the next host, on the assumption that the initial coordinator could be
      // network-isolated.
      verdict = RetryVerdict.RETRY_NEXT;
    } else {
      verdict = maybeDowngrade(alive, cl);
    }
    if (LOG.isTraceEnabled()) {
      LOG.trace(VERDICT_ON_UNAVAILABLE, logPrefix, cl, required, alive, retryCount, verdict);
    }
    return verdict;
  }

  @Override
  public RetryVerdict onRequestAbortedVerdict(
      @NonNull Request request, @NonNull Throwable error, int retryCount) {
    RetryVerdict verdict =
        error instanceof ClosedConnectionException || error instanceof HeartbeatException
            ? RetryVerdict.RETRY_NEXT
            : RetryVerdict.RETHROW;
    if (LOG.isTraceEnabled()) {
      LOG.trace(
          VERDICT_ON_ABORTED,
          logPrefix,
          error.getClass().getSimpleName(),
          error.getMessage(),
          retryCount,
          verdict);
    }
    return verdict;
  }

  @Override
  public RetryVerdict onErrorResponseVerdict(
      @NonNull Request request, @NonNull CoordinatorException error, int retryCount) {
    RetryVerdict verdict =
        error instanceof WriteFailureException || error instanceof ReadFailureException
            ? RetryVerdict.RETHROW
            : RetryVerdict.RETRY_NEXT;
    if (LOG.isTraceEnabled()) {
      LOG.trace(
          VERDICT_ON_ERROR,
          logPrefix,
          error.getClass().getSimpleName(),
          error.getMessage(),
          retryCount,
          verdict);
    }
    return verdict;
  }

  @Override
  @Deprecated
  public RetryDecision onReadTimeout(
      @NonNull Request request,
      @NonNull ConsistencyLevel cl,
      int blockFor,
      int received,
      boolean dataPresent,
      int retryCount) {
    throw new UnsupportedOperationException("onReadTimeout");
  }

  @Override
  @Deprecated
  public RetryDecision onWriteTimeout(
      @NonNull Request request,
      @NonNull ConsistencyLevel cl,
      @NonNull WriteType writeType,
      int blockFor,
      int received,
      int retryCount) {
    throw new UnsupportedOperationException("onWriteTimeout");
  }

  @Override
  @Deprecated
  public RetryDecision onUnavailable(
      @NonNull Request request,
      @NonNull ConsistencyLevel cl,
      int required,
      int alive,
      int retryCount) {
    throw new UnsupportedOperationException("onUnavailable");
  }

  @Override
  @Deprecated
  public RetryDecision onRequestAborted(
      @NonNull Request request, @NonNull Throwable error, int retryCount) {
    throw new UnsupportedOperationException("onRequestAborted");
  }

  @Override
  @Deprecated
  public RetryDecision onErrorResponse(
      @NonNull Request request, @NonNull CoordinatorException error, int retryCount) {
    throw new UnsupportedOperationException("onErrorResponse");
  }

  @Override
  public void close() {}

  private RetryVerdict maybeDowngrade(int alive, ConsistencyLevel current) {
    if (alive >= 3) {
      return new ConsistencyDowngradingRetryVerdict(ConsistencyLevel.THREE);
    }
    if (alive == 2) {
      return new ConsistencyDowngradingRetryVerdict(ConsistencyLevel.TWO);
    }
    // JAVA-1005: EACH_QUORUM does not report a global number of alive replicas
    // so even if we get 0 alive replicas, there might be a node up in some other datacenter
    if (alive == 1 || current.getProtocolCode() == ConsistencyLevel.EACH_QUORUM.getProtocolCode()) {
      return new ConsistencyDowngradingRetryVerdict(ConsistencyLevel.ONE);
    }
    return RetryVerdict.RETHROW;
  }
}