com.datastax.driver.core.RequestHandler Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of cassandra-driver Show documentation
Shaded version of DataStax Java Driver for Apache Cassandra
The newest version!
/*
 * Copyright DataStax, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.datastax.driver.core;

import com.codahale.metrics.Timer;
import com.datastax.driver.core.exceptions.BootstrappingException;
import com.datastax.driver.core.exceptions.BusyConnectionException;
import com.datastax.driver.core.exceptions.BusyPoolException;
import com.datastax.driver.core.exceptions.ConnectionException;
import com.datastax.driver.core.exceptions.DriverException;
import com.datastax.driver.core.exceptions.DriverInternalError;
import com.datastax.driver.core.exceptions.NoHostAvailableException;
import com.datastax.driver.core.exceptions.OperationTimedOutException;
import com.datastax.driver.core.exceptions.OverloadedException;
import com.datastax.driver.core.exceptions.ReadFailureException;
import com.datastax.driver.core.exceptions.ReadTimeoutException;
import com.datastax.driver.core.exceptions.ServerError;
import com.datastax.driver.core.exceptions.UnavailableException;
import com.datastax.driver.core.exceptions.WriteFailureException;
import com.datastax.driver.core.exceptions.WriteTimeoutException;
import com.datastax.driver.core.policies.RetryPolicy;
import com.datastax.driver.core.policies.RetryPolicy.RetryDecision.Type;
import com.datastax.driver.core.policies.SpeculativeExecutionPolicy.SpeculativeExecutionPlan;
import com.datastax.driver.$internal.com.google.common.collect.ImmutableList;
import com.datastax.driver.$internal.com.google.common.collect.Iterators;
import com.datastax.driver.$internal.com.google.common.collect.Sets;
import com.datastax.driver.$internal.com.google.common.util.concurrent.FutureCallback;
import com.datastax.driver.$internal.com.google.common.util.concurrent.ListenableFuture;
import com.datastax.driver.$internal.io.netty.util.Timeout;
import com.datastax.driver.$internal.io.netty.util.TimerTask;
import java.net.InetSocketAddress;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Handles a request to cassandra, dealing with host failover and retries on unavailable/timeout.
 */
class RequestHandler {
  private static final Logger logger = LoggerFactory.getLogger(RequestHandler.class);

  private static final boolean HOST_METRICS_ENABLED =
      Boolean.getBoolean("com.datastax.driver.HOST_METRICS_ENABLED");

  final String id;

  private final SessionManager manager;
  private final Callback callback;

  private final QueryPlan queryPlan;
  private final SpeculativeExecutionPlan speculativeExecutionPlan;
  private final boolean allowSpeculativeExecutions;
  private final Set runningExecutions = Sets.newCopyOnWriteArraySet();
  private final Set scheduledExecutions = Sets.newCopyOnWriteArraySet();
  private final Statement statement;
  private final com.datastax.driver.$internal.io.netty.util.Timer scheduler;

  private volatile List triedHosts;
  private volatile ConcurrentMap errors;

  private final Timer.Context timerContext;
  private final long startTime;

  private final AtomicBoolean isDone = new AtomicBoolean();
  private final AtomicInteger executionIndex = new AtomicInteger();

  public RequestHandler(SessionManager manager, Callback callback, Statement statement) {
    this.id = Long.toString(System.identityHashCode(this));
    if (logger.isTraceEnabled()) logger.trace("[{}] {}", id, statement);
    this.manager = manager;
    this.callback = callback;
    this.scheduler = manager.cluster.manager.connectionFactory.timer;

    callback.register(this);

    // If host is explicitly set on statement, bypass load balancing policy.
    if (statement.getHost() != null) {
      this.queryPlan = new QueryPlan(Iterators.singletonIterator(statement.getHost()));
    } else {
      this.queryPlan =
          new QueryPlan(
              manager.loadBalancingPolicy().newQueryPlan(manager.poolsState.keyspace, statement));
    }

    this.speculativeExecutionPlan =
        manager.speculativeExecutionPolicy().newPlan(manager.poolsState.keyspace, statement);
    this.allowSpeculativeExecutions =
        statement != Statement.DEFAULT
            && statement.isIdempotentWithDefault(manager.configuration().getQueryOptions());
    this.statement = statement;

    this.timerContext = metricsEnabled() ? metrics().getRequestsTimer().time() : null;
    this.startTime = System.nanoTime();
  }

  void sendRequest() {
    startNewExecution();
  }

  // Called when the corresponding ResultSetFuture is cancelled by the client
  void cancel() {
    if (!isDone.compareAndSet(false, true)) return;

    cancelPendingExecutions(null);
  }

  private void startNewExecution() {
    if (isDone.get()) return;

    Message.Request request = callback.request();
    int position = executionIndex.getAndIncrement();

    SpeculativeExecution execution = new SpeculativeExecution(request, position);
    runningExecutions.add(execution);
    execution.findNextHostAndQuery();
  }

  private void scheduleExecution(long delayMillis) {
    if (isDone.get() || delayMillis < 0) return;
    if (logger.isTraceEnabled())
      logger.trace("[{}] Schedule next speculative execution in {} ms", id, delayMillis);
    if (delayMillis == 0) {
      // kick off request immediately
      scheduleExecutionImmediately();
    } else {
      scheduledExecutions.add(
          scheduler.newTimeout(newExecutionTask, delayMillis, TimeUnit.MILLISECONDS));
    }
  }

  private final TimerTask newExecutionTask =
      new TimerTask() {
        @Override
        public void run(final Timeout timeout) throws Exception {
          scheduledExecutions.remove(timeout);
          if (!isDone.get()) {
            // We're on the timer thread so reschedule to another executor
            manager
                .executor()
                .execute(
                    new Runnable() {
                      @Override
                      public void run() {
                        scheduleExecutionImmediately();
                      }
                    });
          }
        }
      };

  private void scheduleExecutionImmediately() {
    if (metricsEnabled()) metrics().getErrorMetrics().getSpeculativeExecutions().inc();
    startNewExecution();
  }

  private void cancelPendingExecutions(SpeculativeExecution ignore) {
    for (SpeculativeExecution execution : runningExecutions)
      if (execution != ignore) // not vital but this produces nicer logs
      execution.cancel();
    for (Timeout execution : scheduledExecutions) execution.cancel();
  }

  private void setFinalResult(
      SpeculativeExecution execution, Connection connection, Message.Response response) {
    if (!isDone.compareAndSet(false, true)) {
      if (logger.isTraceEnabled())
        logger.trace("[{}] Got beaten to setting the result", execution.id);
      return;
    }

    if (logger.isTraceEnabled()) logger.trace("[{}] Setting final result", execution.id);

    cancelPendingExecutions(execution);

    try {
      if (timerContext != null) timerContext.stop();

      ExecutionInfo info;
      int speculativeExecutions = executionIndex.get() - 1;
      // Avoid creating a new instance if we can reuse the host's default one
      if (execution.position == 0
          && speculativeExecutions == 0
          && triedHosts == null
          && execution.retryConsistencyLevel == null
          && response.getCustomPayload() == null) {
        info = execution.current.defaultExecutionInfo;
      } else {
        List hosts;
        if (triedHosts == null) {
          hosts = ImmutableList.of(execution.current);
        } else {
          hosts = triedHosts;
          hosts.add(execution.current);
        }
        info =
            new ExecutionInfo(
                speculativeExecutions,
                execution.position,
                hosts,
                execution.retryConsistencyLevel,
                response.getCustomPayload());
      }
      callback.onSet(connection, response, info, statement, System.nanoTime() - startTime);
    } catch (Exception e) {
      callback.onException(
          connection,
          new DriverInternalError(
              "Unexpected exception while setting final result from " + response, e),
          System.nanoTime() - startTime, /*unused*/
          0);
    }
  }

  private void setFinalException(
      SpeculativeExecution execution, Connection connection, Exception exception) {
    if (!isDone.compareAndSet(false, true)) {
      if (logger.isTraceEnabled())
        logger.trace("[{}] Got beaten to setting final exception", execution.id);
      return;
    }

    if (logger.isTraceEnabled()) logger.trace("[{}] Setting final exception", execution.id);

    cancelPendingExecutions(execution);

    try {
      if (timerContext != null) timerContext.stop();
    } finally {
      callback.onException(connection, exception, System.nanoTime() - startTime, /*unused*/ 0);
    }
  }

  // Triggered when an execution reaches the end of the query plan.
  // This is only a failure if there are no other running executions.
  private void reportNoMoreHosts(SpeculativeExecution execution) {
    runningExecutions.remove(execution);
    if (runningExecutions.isEmpty())
      setFinalException(
          execution,
          null,
          new NoHostAvailableException(
              errors == null ? Collections.emptyMap() : errors));
  }

  private boolean metricsEnabled() {
    return manager.configuration().getMetricsOptions().isEnabled();
  }

  private boolean hostMetricsEnabled() {
    return HOST_METRICS_ENABLED && metricsEnabled();
  }

  private Metrics metrics() {
    return manager.cluster.manager.metrics;
  }

  private RetryPolicy retryPolicy() {
    return statement.getRetryPolicy() == null
        ? manager.configuration().getPolicies().getRetryPolicy()
        : statement.getRetryPolicy();
  }

  interface Callback extends Connection.ResponseCallback {
    void onSet(
        Connection connection,
        Message.Response response,
        ExecutionInfo info,
        Statement statement,
        long latency);

    void register(RequestHandler handler);
  }

  /**
   * An execution of the query against the cluster. There is at least one instance per
   * RequestHandler, and possibly more (depending on the SpeculativeExecutionPolicy). Each instance
   * may retry on the same host, or on other hosts as defined by the RetryPolicy. All instances run
   * concurrently and share the same query plan. There are three ways a SpeculativeExecution can
   * stop: - it completes the query (with either a success or a fatal error), and reports the result
   * to the RequestHandler - it gets cancelled, either because another execution completed the
   * query, or because the RequestHandler was cancelled - it reaches the end of the query plan and
   * informs the RequestHandler, which will decide what to do
   */
  class SpeculativeExecution implements Connection.ResponseCallback {
    final String id;
    private final Message.Request request;
    private final int position;
    private volatile Host current;
    private volatile ConsistencyLevel retryConsistencyLevel;
    private final AtomicReference queryStateRef;
    private final AtomicBoolean nextExecutionScheduled = new AtomicBoolean();
    private final long startTime = System.nanoTime();

    // This represents the number of times a retry has been triggered by the RetryPolicy (this is
    // different from
    // queryStateRef.get().retryCount, because some retries don't involve the policy, for example
    // after an
    // UNPREPARED response).
    // This is incremented by one writer at a time, so volatile is good enough.
    private volatile int retriesByPolicy;

    private volatile Connection.ResponseHandler connectionHandler;

    SpeculativeExecution(Message.Request request, int position) {
      this.id = RequestHandler.this.id + "-" + position;
      this.request = request;
      this.position = position;
      this.queryStateRef = new AtomicReference(QueryState.INITIAL);
      if (logger.isTraceEnabled()) logger.trace("[{}] Starting", id);
    }

    void findNextHostAndQuery() {
      try {
        Host host;
        while (!isDone.get()
            && (host = queryPlan.next()) != null
            && !queryStateRef.get().isCancelled()) {
          if (query(host)) {
            if (hostMetricsEnabled()) {
              metrics().getRegistry().counter(MetricsUtil.hostMetricName("writes.", host)).inc();
            }
            return;
          } else if (hostMetricsEnabled()) {
            metrics()
                .getRegistry()
                .counter(MetricsUtil.hostMetricName("write-errors.", host))
                .inc();
          }
        }
        if (current != null) {
          if (triedHosts == null) triedHosts = new CopyOnWriteArrayList();
          triedHosts.add(current);
        }
        reportNoMoreHosts(this);
      } catch (Exception e) {
        // Shouldn't happen really, but if ever the loadbalancing policy returned iterator throws,
        // we don't want to block.
        setFinalException(
            null,
            new DriverInternalError("An unexpected error happened while sending requests", e));
      }
    }

    private boolean query(final Host host) {
      HostConnectionPool pool = manager.pools.get(host);
      if (pool == null || pool.isClosed()) return false;

      if (logger.isTraceEnabled()) logger.trace("[{}] Querying node {}", id, host);

      if (allowSpeculativeExecutions && nextExecutionScheduled.compareAndSet(false, true))
        scheduleExecution(speculativeExecutionPlan.nextExecution(host));

      PoolingOptions poolingOptions = manager.configuration().getPoolingOptions();
      ListenableFuture connectionFuture =
          pool.borrowConnection(
              poolingOptions.getPoolTimeoutMillis(),
              TimeUnit.MILLISECONDS,
              poolingOptions.getMaxQueueSize());
      GuavaCompatibility.INSTANCE.addCallback(
          connectionFuture,
          new FutureCallback() {
            @Override
            public void onSuccess(Connection connection) {
              if (isDone.get()) {
                connection.release();
                return;
              }
              if (current != null) {
                if (triedHosts == null) triedHosts = new CopyOnWriteArrayList();
                triedHosts.add(current);
              }
              current = host;
              try {
                write(connection, SpeculativeExecution.this);
              } catch (ConnectionException e) {
                // If we have any problem with the connection, move to the next node.
                if (metricsEnabled()) metrics().getErrorMetrics().getConnectionErrors().inc();
                if (connection != null) connection.release();
                logError(host.getSocketAddress(), e);
                findNextHostAndQuery();
              } catch (BusyConnectionException e) {
                // The pool shouldn't have give us a busy connection unless we've maxed up the pool,
                // so move on to the next host.
                connection.release();
                logError(host.getSocketAddress(), e);
                findNextHostAndQuery();
              } catch (RuntimeException e) {
                if (connection != null) connection.release();
                logger.error("Unexpected error while querying " + host.getAddress(), e);
                logError(host.getSocketAddress(), e);
                findNextHostAndQuery();
              }
            }

            @Override
            public void onFailure(Throwable t) {
              if (t instanceof BusyPoolException) {
                logError(host.getSocketAddress(), t);
              } else {
                logger.error("Unexpected error while querying " + host.getAddress(), t);
                logError(host.getSocketAddress(), t);
              }
              findNextHostAndQuery();
            }
          });
      return true;
    }

    private void write(Connection connection, Connection.ResponseCallback responseCallback)
        throws ConnectionException, BusyConnectionException {
      // Make sure cancel() does not see a stale connectionHandler if it sees the new query state
      // before connection.write has completed
      connectionHandler = null;

      // Ensure query state is "in progress" (can be already if connection.write failed on a
      // previous node and we're retrying)
      while (true) {
        QueryState previous = queryStateRef.get();
        if (previous.isCancelled()) {
          connection.release();
          return;
        }
        if (previous.inProgress || queryStateRef.compareAndSet(previous, previous.startNext()))
          break;
      }

      connectionHandler =
          connection.write(responseCallback, statement.getReadTimeoutMillis(), false);
      // Only start the timeout when we're sure connectionHandler is set. This avoids an edge case
      // where onTimeout() was triggered
      // *before* the call to connection.write had returned.
      connectionHandler.startTimeout();

      // Note that we could have already received the response here (so onSet() / onException()
      // would have been called). This is
      // why we only test for CANCELLED_WHILE_IN_PROGRESS below.

      // If cancel() was called after we set the state to "in progress", but before connection.write
      // had completed, it might have
      // missed the new value of connectionHandler. So make sure that cancelHandler() gets called
      // here (we might call it twice,
      // but it knows how to deal with it).
      if (queryStateRef.get() == QueryState.CANCELLED_WHILE_IN_PROGRESS
          && connectionHandler.cancelHandler()) connection.release();
    }

    private RetryPolicy.RetryDecision computeRetryDecisionOnRequestError(
        DriverException exception) {
      RetryPolicy.RetryDecision decision;
      if (statement.isIdempotentWithDefault(manager.cluster.getConfiguration().getQueryOptions())) {
        decision =
            retryPolicy()
                .onRequestError(statement, request().consistency(), exception, retriesByPolicy);
      } else {
        decision = RetryPolicy.RetryDecision.rethrow();
      }
      if (metricsEnabled()) {
        if (exception instanceof OperationTimedOutException) {
          metrics().getErrorMetrics().getClientTimeouts().inc();
          if (decision.getType() == Type.RETRY)
            metrics().getErrorMetrics().getRetriesOnClientTimeout().inc();
          if (decision.getType() == Type.IGNORE)
            metrics().getErrorMetrics().getIgnoresOnClientTimeout().inc();
        } else if (exception instanceof ConnectionException) {
          metrics().getErrorMetrics().getConnectionErrors().inc();
          if (decision.getType() == Type.RETRY)
            metrics().getErrorMetrics().getRetriesOnConnectionError().inc();
          if (decision.getType() == Type.IGNORE)
            metrics().getErrorMetrics().getIgnoresOnConnectionError().inc();
        } else {
          metrics().getErrorMetrics().getOthers().inc();
          if (decision.getType() == Type.RETRY)
            metrics().getErrorMetrics().getRetriesOnOtherErrors().inc();
          if (decision.getType() == Type.IGNORE)
            metrics().getErrorMetrics().getIgnoresOnOtherErrors().inc();
        }
      }
      return decision;
    }

    private void processRetryDecision(
        RetryPolicy.RetryDecision retryDecision,
        Connection connection,
        Exception exceptionToReport) {
      switch (retryDecision.getType()) {
        case RETRY:
          retriesByPolicy++;
          if (logger.isDebugEnabled())
            logger.debug(
                "[{}] Doing retry {} for query {} at consistency {}",
                id,
                retriesByPolicy,
                statement,
                retryDecision.getRetryConsistencyLevel());
          if (metricsEnabled()) metrics().getErrorMetrics().getRetries().inc();
          // log error for the current host if we are switching to another one
          if (!retryDecision.isRetryCurrent()) logError(connection.address, exceptionToReport);
          retry(retryDecision.isRetryCurrent(), retryDecision.getRetryConsistencyLevel());
          break;
        case RETHROW:
          setFinalException(connection, exceptionToReport);
          break;
        case IGNORE:
          if (metricsEnabled()) metrics().getErrorMetrics().getIgnores().inc();
          setFinalResult(connection, new Responses.Result.Void());
          break;
      }
    }

    private void retry(final boolean retryCurrent, ConsistencyLevel newConsistencyLevel) {
      final Host h = current;
      if (newConsistencyLevel != null) this.retryConsistencyLevel = newConsistencyLevel;

      if (queryStateRef.get().isCancelled()) return;

      if (!retryCurrent || !query(h)) findNextHostAndQuery();
    }

    private void logError(InetSocketAddress address, Throwable exception) {
      logger.debug("[{}] Error querying {} : {}", id, address, exception.toString());
      if (errors == null) {
        synchronized (RequestHandler.this) {
          if (errors == null) {
            errors = new ConcurrentHashMap();
          }
        }
      }
      errors.put(address, exception);
    }

    void cancel() {
      // Atomically set a special QueryState, that will cause any further operation to abort.
      // We want to remember whether a request was in progress when we did this, so there are two
      // cancel states.
      while (true) {
        QueryState previous = queryStateRef.get();
        if (previous.isCancelled()) {
          return;
        } else if (previous.inProgress
            && queryStateRef.compareAndSet(previous, QueryState.CANCELLED_WHILE_IN_PROGRESS)) {
          if (logger.isTraceEnabled()) logger.trace("[{}] Cancelled while in progress", id);
          // The connectionHandler should be non-null, but we might miss the update if we're racing
          // with write().
          // If it's still null, this will be handled by re-checking queryStateRef at the end of
          // write().
          if (connectionHandler != null && connectionHandler.cancelHandler())
            connectionHandler.connection.release();
          Host queriedHost = current;
          if (queriedHost != null && statement != Statement.DEFAULT) {
            manager.cluster.manager.reportQuery(
                queriedHost,
                statement,
                CancelledSpeculativeExecutionException.INSTANCE,
                System.nanoTime() - startTime);
          }
          return;
        } else if (!previous.inProgress
            && queryStateRef.compareAndSet(previous, QueryState.CANCELLED_WHILE_COMPLETE)) {
          if (logger.isTraceEnabled()) logger.trace("[{}] Cancelled while complete", id);
          Host queriedHost = current;
          if (queriedHost != null && statement != Statement.DEFAULT) {
            manager.cluster.manager.reportQuery(
                queriedHost,
                statement,
                CancelledSpeculativeExecutionException.INSTANCE,
                System.nanoTime() - startTime);
          }
          return;
        }
      }
    }

    @Override
    public Message.Request request() {
      if (retryConsistencyLevel != null && retryConsistencyLevel != request.consistency())
        return request.copy(retryConsistencyLevel);
      else return request;
    }

    @Override
    public void onSet(
        Connection connection, Message.Response response, long latency, int retryCount) {
      QueryState queryState = queryStateRef.get();
      if (!queryState.isInProgressAt(retryCount)
          || !queryStateRef.compareAndSet(queryState, queryState.complete())) {
        logger.debug(
            "onSet triggered but the response was completed by another thread, cancelling (retryCount = {}, queryState = {}, queryStateRef = {})",
            retryCount,
            queryState,
            queryStateRef.get());
        return;
      }

      Host queriedHost = current;
      Exception exceptionToReport = null;
      try {
        switch (response.type) {
          case RESULT:
            connection.release();
            setFinalResult(connection, response);
            break;
          case ERROR:
            Responses.Error err = (Responses.Error) response;
            exceptionToReport = err.asException(connection.address);
            RetryPolicy.RetryDecision retry = null;
            RetryPolicy retryPolicy = retryPolicy();
            switch (err.code) {
              case READ_TIMEOUT:
                connection.release();
                assert err.infos instanceof ReadTimeoutException;
                ReadTimeoutException rte = (ReadTimeoutException) err.infos;
                retry =
                    retryPolicy.onReadTimeout(
                        statement,
                        rte.getConsistencyLevel(),
                        rte.getRequiredAcknowledgements(),
                        rte.getReceivedAcknowledgements(),
                        rte.wasDataRetrieved(),
                        retriesByPolicy);
                if (metricsEnabled()) {
                  metrics().getErrorMetrics().getReadTimeouts().inc();
                  if (retry.getType() == Type.RETRY)
                    metrics().getErrorMetrics().getRetriesOnReadTimeout().inc();
                  if (retry.getType() == Type.IGNORE)
                    metrics().getErrorMetrics().getIgnoresOnReadTimeout().inc();
                }
                break;
              case WRITE_TIMEOUT:
                connection.release();
                assert err.infos instanceof WriteTimeoutException;
                WriteTimeoutException wte = (WriteTimeoutException) err.infos;
                if (statement.isIdempotentWithDefault(
                    manager.cluster.getConfiguration().getQueryOptions()))
                  retry =
                      retryPolicy.onWriteTimeout(
                          statement,
                          wte.getConsistencyLevel(),
                          wte.getWriteType(),
                          wte.getRequiredAcknowledgements(),
                          wte.getReceivedAcknowledgements(),
                          retriesByPolicy);
                else {
                  retry = RetryPolicy.RetryDecision.rethrow();
                }
                if (metricsEnabled()) {
                  metrics().getErrorMetrics().getWriteTimeouts().inc();
                  if (retry.getType() == Type.RETRY)
                    metrics().getErrorMetrics().getRetriesOnWriteTimeout().inc();
                  if (retry.getType() == Type.IGNORE)
                    metrics().getErrorMetrics().getIgnoresOnWriteTimeout().inc();
                }
                break;
              case UNAVAILABLE:
                connection.release();
                assert err.infos instanceof UnavailableException;
                UnavailableException ue = (UnavailableException) err.infos;
                retry =
                    retryPolicy.onUnavailable(
                        statement,
                        ue.getConsistencyLevel(),
                        ue.getRequiredReplicas(),
                        ue.getAliveReplicas(),
                        retriesByPolicy);
                if (metricsEnabled()) {
                  metrics().getErrorMetrics().getUnavailables().inc();
                  if (retry.getType() == Type.RETRY)
                    metrics().getErrorMetrics().getRetriesOnUnavailable().inc();
                  if (retry.getType() == Type.IGNORE)
                    metrics().getErrorMetrics().getIgnoresOnUnavailable().inc();
                }
                break;
              case OVERLOADED:
                connection.release();
                assert exceptionToReport instanceof OverloadedException;
                logger.warn("Host {} is overloaded.", connection.address);
                retry = computeRetryDecisionOnRequestError((OverloadedException) exceptionToReport);
                break;
              case SERVER_ERROR:
                connection.release();
                assert exceptionToReport instanceof ServerError;
                logger.warn(
                    "{} replied with server error ({}), defuncting connection.",
                    connection.address,
                    err.message);
                // Defunct connection
                connection.defunct(exceptionToReport);
                retry = computeRetryDecisionOnRequestError((ServerError) exceptionToReport);
                break;
              case IS_BOOTSTRAPPING:
                connection.release();
                assert exceptionToReport instanceof BootstrappingException;
                logger.error(
                    "Query sent to {} but it is bootstrapping. This shouldn't happen but trying next host.",
                    connection.address);
                if (metricsEnabled()) {
                  metrics().getErrorMetrics().getOthers().inc();
                }
                logError(connection.address, exceptionToReport);
                retry(false, null);
                return;
              case UNPREPARED:
                // Do not release connection yet, because we might reuse it to send the PREPARE
                // message (see write() call below)
                assert err.infos instanceof MD5Digest;
                MD5Digest id = (MD5Digest) err.infos;
                PreparedStatement toPrepare = manager.cluster.manager.preparedQueries.get(id);
                if (toPrepare == null) {
                  // This shouldn't happen
                  connection.release();
                  String msg = String.format("Tried to execute unknown prepared query %s", id);
                  logger.error(msg);
                  setFinalException(connection, new DriverInternalError(msg));
                  return;
                }

                String currentKeyspace = connection.keyspace();
                String prepareKeyspace = toPrepare.getQueryKeyspace();
                if (prepareKeyspace != null
                    && (currentKeyspace == null || !currentKeyspace.equals(prepareKeyspace))) {
                  // This shouldn't happen in normal use, because a user shouldn't try to execute
                  // a prepared statement with the wrong keyspace set.
                  // Fail fast (we can't change the keyspace to reprepare, because we're using a
                  // pooled connection
                  // that's shared with other requests).
                  connection.release();
                  throw new IllegalStateException(
                      String.format(
                          "Statement was prepared on keyspace %s, can't execute it on %s (%s)",
                          toPrepare.getQueryKeyspace(),
                          connection.keyspace(),
                          toPrepare.getQueryString()));
                }

                logger.info(
                    "Query {} is not prepared on {}, preparing before retrying executing. "
                        + "Seeing this message a few times is fine, but seeing it a lot may be source of performance problems",
                    toPrepare.getQueryString(),
                    connection.address);

                write(connection, prepareAndRetry(toPrepare.getQueryString()));
                // we're done for now, the prepareAndRetry callback will handle the rest
                return;
              case READ_FAILURE:
                assert exceptionToReport instanceof ReadFailureException;
                connection.release();
                retry =
                    computeRetryDecisionOnRequestError((ReadFailureException) exceptionToReport);
                break;
              case WRITE_FAILURE:
                assert exceptionToReport instanceof WriteFailureException;
                connection.release();
                if (statement.isIdempotentWithDefault(
                    manager.cluster.getConfiguration().getQueryOptions())) {
                  retry =
                      computeRetryDecisionOnRequestError((WriteFailureException) exceptionToReport);
                } else {
                  retry = RetryPolicy.RetryDecision.rethrow();
                }
                break;
              default:
                connection.release();
                if (metricsEnabled()) metrics().getErrorMetrics().getOthers().inc();
                break;
            }

            if (retry == null) setFinalResult(connection, response);
            else {
              processRetryDecision(retry, connection, exceptionToReport);
            }
            break;
          default:
            connection.release();
            setFinalResult(connection, response);
            break;
        }
      } catch (Exception e) {
        exceptionToReport = e;
        setFinalException(connection, e);
      } finally {
        if (queriedHost != null && statement != Statement.DEFAULT) {
          manager.cluster.manager.reportQuery(queriedHost, statement, exceptionToReport, latency);
        }
      }
    }

    private Connection.ResponseCallback prepareAndRetry(final String toPrepare) {
      // do not bother inspecting retry policy at this step, no other decision
      // makes sense than retry on the same host if the query was prepared,
      // or on another host, if an error/timeout occurred.
      // The original request hasn't been executed so far, so there is no risk
      // of re-executing non-idempotent statements.
      return new Connection.ResponseCallback() {

        @Override
        public Message.Request request() {
          Requests.Prepare request = new Requests.Prepare(toPrepare);
          // propagate the original custom payload in the prepare request
          request.setCustomPayload(statement.getOutgoingPayload());
          return request;
        }

        @Override
        public int retryCount() {
          return SpeculativeExecution.this.retryCount();
        }

        @Override
        public void onSet(
            Connection connection, Message.Response response, long latency, int retryCount) {
          QueryState queryState = queryStateRef.get();
          if (!queryState.isInProgressAt(retryCount)
              || !queryStateRef.compareAndSet(queryState, queryState.complete())) {
            logger.debug(
                "onSet triggered but the response was completed by another thread, cancelling (retryCount = {}, queryState = {}, queryStateRef = {})",
                retryCount,
                queryState,
                queryStateRef.get());
            return;
          }

          connection.release();

          switch (response.type) {
            case RESULT:
              if (((Responses.Result) response).kind == Responses.Result.Kind.PREPARED) {
                logger.debug("Scheduling retry now that query is prepared");
                retry(true, null);
              } else {
                logError(
                    connection.address,
                    new DriverException("Got unexpected response to prepare message: " + response));
                retry(false, null);
              }
              break;
            case ERROR:
              logError(
                  connection.address,
                  new DriverException("Error preparing query, got " + response));
              if (metricsEnabled()) metrics().getErrorMetrics().getOthers().inc();
              retry(false, null);
              break;
            default:
              // Something's wrong, so we return but we let setFinalResult propagate the exception
              SpeculativeExecution.this.setFinalResult(connection, response);
              break;
          }
        }

        @Override
        public void onException(
            Connection connection, Exception exception, long latency, int retryCount) {
          SpeculativeExecution.this.onException(connection, exception, latency, retryCount);
        }

        @Override
        public boolean onTimeout(Connection connection, long latency, int retryCount) {
          QueryState queryState = queryStateRef.get();
          if (!queryState.isInProgressAt(retryCount)
              || !queryStateRef.compareAndSet(queryState, queryState.complete())) {
            logger.debug(
                "onTimeout triggered but the response was completed by another thread, cancelling (retryCount = {}, queryState = {}, queryStateRef = {})",
                retryCount,
                queryState,
                queryStateRef.get());
            return false;
          }
          connection.release();
          logError(
              connection.address,
              new OperationTimedOutException(
                  connection.address, "Timed out waiting for response to PREPARE message"));
          retry(false, null);
          return true;
        }
      };
    }

    @Override
    public void onException(
        Connection connection, Exception exception, long latency, int retryCount) {
      QueryState queryState = queryStateRef.get();
      if (!queryState.isInProgressAt(retryCount)
          || !queryStateRef.compareAndSet(queryState, queryState.complete())) {
        logger.debug(
            "onException triggered but the response was completed by another thread, cancelling (retryCount = {}, queryState = {}, queryStateRef = {})",
            retryCount,
            queryState,
            queryStateRef.get());
        return;
      }

      Host queriedHost = current;
      try {
        connection.release();

        if (exception instanceof ConnectionException) {
          RetryPolicy.RetryDecision decision =
              computeRetryDecisionOnRequestError((ConnectionException) exception);
          processRetryDecision(decision, connection, exception);
          return;
        }
        setFinalException(connection, exception);
      } catch (Exception e) {
        // This shouldn't happen, but if it does, we want to signal the callback, not let it hang
        // indefinitely
        setFinalException(
            null,
            new DriverInternalError(
                "An unexpected error happened while handling exception " + exception, e));
      } finally {
        if (queriedHost != null && statement != Statement.DEFAULT)
          manager.cluster.manager.reportQuery(queriedHost, statement, exception, latency);
      }
    }

    @Override
    public boolean onTimeout(Connection connection, long latency, int retryCount) {
      QueryState queryState = queryStateRef.get();
      if (!queryState.isInProgressAt(retryCount)
          || !queryStateRef.compareAndSet(queryState, queryState.complete())) {
        logger.debug(
            "onTimeout triggered but the response was completed by another thread, cancelling (retryCount = {}, queryState = {}, queryStateRef = {})",
            retryCount,
            queryState,
            queryStateRef.get());
        return false;
      }

      Host queriedHost = current;

      OperationTimedOutException timeoutException =
          new OperationTimedOutException(
              connection.address, "Timed out waiting for server response");

      try {
        connection.release();

        RetryPolicy.RetryDecision decision = computeRetryDecisionOnRequestError(timeoutException);
        processRetryDecision(decision, connection, timeoutException);
      } catch (Exception e) {
        // This shouldn't happen, but if it does, we want to signal the callback, not let it hang
        // indefinitely
        setFinalException(
            null,
            new DriverInternalError("An unexpected error happened while handling timeout", e));
      } finally {
        if (queriedHost != null && statement != Statement.DEFAULT)
          manager.cluster.manager.reportQuery(queriedHost, statement, timeoutException, latency);
      }
      return true;
    }

    @Override
    public int retryCount() {
      return queryStateRef.get().retryCount;
    }

    private void setFinalException(Connection connection, Exception exception) {
      RequestHandler.this.setFinalException(this, connection, exception);
    }

    private void setFinalResult(Connection connection, Message.Response response) {
      RequestHandler.this.setFinalResult(this, connection, response);
    }
  }

  /**
   * The state of a SpeculativeExecution.
   *
   * This is used to prevent races between request completion (either success or error) and
   * timeout. A retry is in progress once we have written the request to the connection and until we
   * get back a response (see onSet or onException) or a timeout (see onTimeout). The count
   * increments on each retry.
   */
  static class QueryState {
    static final QueryState INITIAL = new QueryState(-1, false);
    static final QueryState CANCELLED_WHILE_IN_PROGRESS = new QueryState(Integer.MIN_VALUE, false);
    static final QueryState CANCELLED_WHILE_COMPLETE = new QueryState(Integer.MIN_VALUE + 1, false);

    final int retryCount;
    final boolean inProgress;

    private QueryState(int count, boolean inProgress) {
      this.retryCount = count;
      this.inProgress = inProgress;
    }

    boolean isInProgressAt(int retryCount) {
      return inProgress && this.retryCount == retryCount;
    }

    QueryState complete() {
      assert inProgress;
      return new QueryState(retryCount, false);
    }

    QueryState startNext() {
      assert !inProgress;
      return new QueryState(retryCount + 1, true);
    }

    public boolean isCancelled() {
      return this == CANCELLED_WHILE_IN_PROGRESS || this == CANCELLED_WHILE_COMPLETE;
    }

    @Override
    public String toString() {
      return String.format(
          "QueryState(count=%d, inProgress=%s, cancelled=%s)",
          retryCount, inProgress, isCancelled());
    }
  }

  /**
   * Wraps the iterator return by {@link com.datastax.driver.core.policies.LoadBalancingPolicy} to
   * make it safe for concurrent access by multiple threads.
   */
  static class QueryPlan {
    private final Iterator iterator;

    QueryPlan(Iterator iterator) {
      this.iterator = iterator;
    }

    /** @return null if there are no more hosts */
    synchronized Host next() {
      return iterator.hasNext() ? iterator.next() : null;
    }
  }
}