com.datastax.oss.driver.internal.core.control.ControlConnection Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datastax.oss.driver.internal.core.control;
import com.datastax.oss.driver.api.core.AllNodesFailedException;
import com.datastax.oss.driver.api.core.AsyncAutoCloseable;
import com.datastax.oss.driver.api.core.auth.AuthenticationException;
import com.datastax.oss.driver.api.core.config.DefaultDriverOption;
import com.datastax.oss.driver.api.core.config.DriverConfig;
import com.datastax.oss.driver.api.core.connection.ReconnectionPolicy;
import com.datastax.oss.driver.api.core.loadbalancing.NodeDistance;
import com.datastax.oss.driver.api.core.metadata.Node;
import com.datastax.oss.driver.api.core.metadata.NodeState;
import com.datastax.oss.driver.internal.core.channel.ChannelEvent;
import com.datastax.oss.driver.internal.core.channel.DriverChannel;
import com.datastax.oss.driver.internal.core.channel.DriverChannelOptions;
import com.datastax.oss.driver.internal.core.channel.EventCallback;
import com.datastax.oss.driver.internal.core.context.InternalDriverContext;
import com.datastax.oss.driver.internal.core.metadata.DefaultTopologyMonitor;
import com.datastax.oss.driver.internal.core.metadata.DistanceEvent;
import com.datastax.oss.driver.internal.core.metadata.MetadataManager;
import com.datastax.oss.driver.internal.core.metadata.NodeStateEvent;
import com.datastax.oss.driver.internal.core.metadata.TopologyEvent;
import com.datastax.oss.driver.internal.core.util.Loggers;
import com.datastax.oss.driver.internal.core.util.concurrent.CompletableFutures;
import com.datastax.oss.driver.internal.core.util.concurrent.Reconnection;
import com.datastax.oss.driver.internal.core.util.concurrent.RunOrSchedule;
import com.datastax.oss.driver.internal.core.util.concurrent.UncaughtExceptions;
import com.datastax.oss.driver.shaded.guava.common.collect.ImmutableList;
import com.datastax.oss.protocol.internal.Message;
import com.datastax.oss.protocol.internal.ProtocolConstants;
import com.datastax.oss.protocol.internal.response.Event;
import com.datastax.oss.protocol.internal.response.event.SchemaChangeEvent;
import com.datastax.oss.protocol.internal.response.event.StatusChangeEvent;
import com.datastax.oss.protocol.internal.response.event.TopologyChangeEvent;
import edu.umd.cs.findbugs.annotations.NonNull;
import com.datastax.oss.driver.shaded.netty.util.concurrent.EventExecutor;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.WeakHashMap;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionStage;
import java.util.function.Consumer;
import net.jcip.annotations.ThreadSafe;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Maintains a dedicated connection to a Cassandra node for administrative queries.
*
* If the control node goes down, a reconnection is triggered. The control node is chosen
* randomly among the contact points at startup, or according to the load balancing policy for later
* reconnections.
*
*
The control connection is used by:
*
*
* - {@link DefaultTopologyMonitor} to determine cluster connectivity and retrieve node
* metadata;
*
- {@link MetadataManager} to run schema metadata queries.
*
*/
@ThreadSafe
public class ControlConnection implements EventCallback, AsyncAutoCloseable {
private static final Logger LOG = LoggerFactory.getLogger(ControlConnection.class);
private final InternalDriverContext context;
private final String logPrefix;
private final EventExecutor adminExecutor;
private final SingleThreaded singleThreaded;
// The single channel used by this connection. This field is accessed concurrently, but only
// mutated on adminExecutor (by SingleThreaded methods)
private volatile DriverChannel channel;
public ControlConnection(InternalDriverContext context) {
this.context = context;
this.logPrefix = context.getSessionName();
this.adminExecutor = context.getNettyOptions().adminEventExecutorGroup().next();
this.singleThreaded = new SingleThreaded(context);
}
/**
* Initializes the control connection. If it is already initialized, this is a no-op and all
* parameters are ignored.
*
* @param listenToClusterEvents whether to register for TOPOLOGY_CHANGE and STATUS_CHANGE events.
* If the control connection has already initialized with another value, this is ignored.
* SCHEMA_CHANGE events are always registered.
* @param reconnectOnFailure whether to schedule a reconnection if the initial attempt fails (if
* true, the returned future will only complete once the reconnection has succeeded).
* @param useInitialReconnectionSchedule if no node can be reached, the type of reconnection
* schedule to use. In other words, the value that will be passed to {@link
* ReconnectionPolicy#newControlConnectionSchedule(boolean)}. Note that this parameter is only
* relevant if {@code reconnectOnFailure} is true, otherwise it is not used.
*/
public CompletionStage init(
boolean listenToClusterEvents,
boolean reconnectOnFailure,
boolean useInitialReconnectionSchedule) {
RunOrSchedule.on(
adminExecutor,
() ->
singleThreaded.init(
listenToClusterEvents, reconnectOnFailure, useInitialReconnectionSchedule));
return singleThreaded.initFuture;
}
public CompletionStage initFuture() {
return singleThreaded.initFuture;
}
public boolean isInit() {
return singleThreaded.initFuture.isDone();
}
/**
* The channel currently used by this control connection. This is modified concurrently in the
* event of a reconnection, so it may occasionally return a closed channel (clients should be
* ready to deal with that).
*/
public DriverChannel channel() {
return channel;
}
/**
* Forces an immediate reconnect: if we were connected to a node, that connection will be closed;
* if we were already reconnecting, the next attempt is started immediately, without waiting for
* the next scheduled interval; in all cases, a new query plan is fetched from the load balancing
* policy, and each node in it will be tried in sequence.
*/
public void reconnectNow() {
RunOrSchedule.on(adminExecutor, singleThreaded::reconnectNow);
}
@NonNull
@Override
public CompletionStage closeFuture() {
return singleThreaded.closeFuture;
}
@NonNull
@Override
public CompletionStage closeAsync() {
// Control queries are never critical, so there is no graceful close.
return forceCloseAsync();
}
@NonNull
@Override
public CompletionStage forceCloseAsync() {
RunOrSchedule.on(adminExecutor, singleThreaded::forceClose);
return singleThreaded.closeFuture;
}
@Override
public void onEvent(Message eventMessage) {
if (!(eventMessage instanceof Event)) {
LOG.warn("[{}] Unsupported event class: {}", logPrefix, eventMessage.getClass().getName());
} else {
LOG.debug("[{}] Processing incoming event {}", logPrefix, eventMessage);
Event event = (Event) eventMessage;
switch (event.type) {
case ProtocolConstants.EventType.TOPOLOGY_CHANGE:
processTopologyChange(event);
break;
case ProtocolConstants.EventType.STATUS_CHANGE:
processStatusChange(event);
break;
case ProtocolConstants.EventType.SCHEMA_CHANGE:
processSchemaChange(event);
break;
default:
LOG.warn("[{}] Unsupported event type: {}", logPrefix, event.type);
}
}
}
private void processTopologyChange(Event event) {
TopologyChangeEvent tce = (TopologyChangeEvent) event;
switch (tce.changeType) {
case ProtocolConstants.TopologyChangeType.NEW_NODE:
context.getEventBus().fire(TopologyEvent.suggestAdded(tce.address));
break;
case ProtocolConstants.TopologyChangeType.REMOVED_NODE:
context.getEventBus().fire(TopologyEvent.suggestRemoved(tce.address));
break;
default:
LOG.warn("[{}] Unsupported topology change type: {}", logPrefix, tce.changeType);
}
}
private void processStatusChange(Event event) {
StatusChangeEvent sce = (StatusChangeEvent) event;
switch (sce.changeType) {
case ProtocolConstants.StatusChangeType.UP:
context.getEventBus().fire(TopologyEvent.suggestUp(sce.address));
break;
case ProtocolConstants.StatusChangeType.DOWN:
context.getEventBus().fire(TopologyEvent.suggestDown(sce.address));
break;
default:
LOG.warn("[{}] Unsupported status change type: {}", logPrefix, sce.changeType);
}
}
private void processSchemaChange(Event event) {
SchemaChangeEvent sce = (SchemaChangeEvent) event;
context
.getMetadataManager()
.refreshSchema(sce.keyspace, false, false)
.whenComplete(
(metadata, error) -> {
if (error != null) {
Loggers.warnWithException(
LOG,
"[{}] Unexpected error while refreshing schema for a SCHEMA_CHANGE event, "
+ "keeping previous version",
logPrefix,
error);
}
});
}
private class SingleThreaded {
private final InternalDriverContext context;
private final DriverConfig config;
private final CompletableFuture initFuture = new CompletableFuture<>();
private boolean initWasCalled;
private final CompletableFuture closeFuture = new CompletableFuture<>();
private boolean closeWasCalled;
private final ReconnectionPolicy reconnectionPolicy;
private final Reconnection reconnection;
private DriverChannelOptions channelOptions;
// The last events received for each node
private final Map lastDistanceEvents = new WeakHashMap<>();
private final Map lastStateEvents = new WeakHashMap<>();
private SingleThreaded(InternalDriverContext context) {
this.context = context;
this.config = context.getConfig();
this.reconnectionPolicy = context.getReconnectionPolicy();
this.reconnection =
new Reconnection(
logPrefix,
adminExecutor,
() -> reconnectionPolicy.newControlConnectionSchedule(false),
this::reconnect);
// In "reconnect-on-init" mode, handle cancellation of the initFuture by user code
CompletableFutures.whenCancelled(
this.initFuture,
() -> {
LOG.debug("[{}] Init future was cancelled, stopping reconnection", logPrefix);
reconnection.stop();
});
context
.getEventBus()
.register(DistanceEvent.class, RunOrSchedule.on(adminExecutor, this::onDistanceEvent));
context
.getEventBus()
.register(NodeStateEvent.class, RunOrSchedule.on(adminExecutor, this::onStateEvent));
}
private void init(
boolean listenToClusterEvents,
boolean reconnectOnFailure,
boolean useInitialReconnectionSchedule) {
assert adminExecutor.inEventLoop();
if (initWasCalled) {
return;
}
initWasCalled = true;
try {
ImmutableList eventTypes = buildEventTypes(listenToClusterEvents);
LOG.debug("[{}] Initializing with event types {}", logPrefix, eventTypes);
channelOptions =
DriverChannelOptions.builder()
.withEvents(eventTypes, ControlConnection.this)
.withOwnerLogPrefix(logPrefix + "|control")
.build();
Queue nodes = context.getLoadBalancingPolicyWrapper().newQueryPlan();
connect(
nodes,
null,
() -> initFuture.complete(null),
error -> {
if (isAuthFailure(error)) {
LOG.warn(
"[{}] Authentication errors encountered on all contact points. Please check your authentication configuration.",
logPrefix);
}
if (reconnectOnFailure && !closeWasCalled) {
reconnection.start(
reconnectionPolicy.newControlConnectionSchedule(
useInitialReconnectionSchedule));
} else {
// Special case for the initial connection: reword to a more user-friendly error
// message
if (error instanceof AllNodesFailedException) {
error =
((AllNodesFailedException) error)
.reword(
"Could not reach any contact point, "
+ "make sure you've provided valid addresses");
}
initFuture.completeExceptionally(error);
}
});
} catch (Throwable t) {
initFuture.completeExceptionally(t);
}
}
private CompletionStage reconnect() {
assert adminExecutor.inEventLoop();
Queue nodes = context.getLoadBalancingPolicyWrapper().newQueryPlan();
CompletableFuture result = new CompletableFuture<>();
connect(
nodes,
null,
() -> {
result.complete(true);
onSuccessfulReconnect();
},
error -> result.complete(false));
return result;
}
private void connect(
Queue nodes,
List> errors,
Runnable onSuccess,
Consumer onFailure) {
assert adminExecutor.inEventLoop();
Node node = nodes.poll();
if (node == null) {
onFailure.accept(AllNodesFailedException.fromErrors(errors));
} else {
LOG.debug("[{}] Trying to establish a connection to {}", logPrefix, node);
context
.getChannelFactory()
.connect(node, channelOptions)
.whenCompleteAsync(
(channel, error) -> {
try {
DistanceEvent lastDistanceEvent = lastDistanceEvents.get(node);
NodeStateEvent lastStateEvent = lastStateEvents.get(node);
if (error != null) {
if (closeWasCalled || initFuture.isCancelled()) {
onSuccess.run(); // abort, we don't really care about the result
} else {
if (error instanceof AuthenticationException) {
Loggers.warnWithException(
LOG, "[{}] Authentication error", logPrefix, error);
} else {
if (config
.getDefaultProfile()
.getBoolean(DefaultDriverOption.CONNECTION_WARN_INIT_ERROR)) {
Loggers.warnWithException(
LOG,
"[{}] Error connecting to {}, trying next node",
logPrefix,
node,
error);
} else {
LOG.debug(
"[{}] Error connecting to {}, trying next node",
logPrefix,
node,
error);
}
}
List> newErrors =
(errors == null) ? new ArrayList<>() : errors;
newErrors.add(new SimpleEntry<>(node, error));
context.getEventBus().fire(ChannelEvent.controlConnectionFailed(node));
connect(nodes, newErrors, onSuccess, onFailure);
}
} else if (closeWasCalled || initFuture.isCancelled()) {
LOG.debug(
"[{}] New channel opened ({}) but the control connection was closed, closing it",
logPrefix,
channel);
channel.forceClose();
onSuccess.run();
} else if (lastDistanceEvent != null
&& lastDistanceEvent.distance == NodeDistance.IGNORED) {
LOG.debug(
"[{}] New channel opened ({}) but node became ignored, "
+ "closing and trying next node",
logPrefix,
channel);
channel.forceClose();
connect(nodes, errors, onSuccess, onFailure);
} else if (lastStateEvent != null
&& (lastStateEvent.newState == null /*(removed)*/
|| lastStateEvent.newState == NodeState.FORCED_DOWN)) {
LOG.debug(
"[{}] New channel opened ({}) but node was removed or forced down, "
+ "closing and trying next node",
logPrefix,
channel);
channel.forceClose();
connect(nodes, errors, onSuccess, onFailure);
} else {
LOG.debug("[{}] New channel opened {}", logPrefix, channel);
DriverChannel previousChannel = ControlConnection.this.channel;
ControlConnection.this.channel = channel;
if (previousChannel != null) {
// We were reconnecting: make sure previous channel gets closed (it may
// still be open if reconnection was forced)
LOG.debug(
"[{}] Forcefully closing previous channel {}", logPrefix, channel);
previousChannel.forceClose();
}
context.getEventBus().fire(ChannelEvent.channelOpened(node));
channel
.closeFuture()
.addListener(
f ->
adminExecutor
.submit(() -> onChannelClosed(channel, node))
.addListener(UncaughtExceptions::log));
onSuccess.run();
}
} catch (Exception e) {
Loggers.warnWithException(
LOG,
"[{}] Unexpected exception while processing channel init result",
logPrefix,
e);
}
},
adminExecutor);
}
}
private void onSuccessfulReconnect() {
// If reconnectOnFailure was true and we've never connected before, complete the future now to
// signal that the initialization is complete.
boolean isFirstConnection = initFuture.complete(null);
// Otherwise, perform a full refresh (we don't know how long we were disconnected)
if (!isFirstConnection) {
context
.getMetadataManager()
.refreshNodes()
.whenComplete(
(result, error) -> {
if (error != null) {
LOG.debug("[{}] Error while refreshing node list", logPrefix, error);
} else {
try {
// A failed node list refresh at startup is not fatal, so this might be the
// first successful refresh; make sure the LBP gets initialized (this is a
// no-op if it was initialized already).
context.getLoadBalancingPolicyWrapper().init();
context
.getMetadataManager()
.refreshSchema(null, false, true)
.whenComplete(
(metadata, schemaError) -> {
if (schemaError != null) {
Loggers.warnWithException(
LOG,
"[{}] Unexpected error while refreshing schema after a "
+ "successful reconnection, keeping previous version",
logPrefix,
schemaError);
}
});
} catch (Throwable t) {
Loggers.warnWithException(
LOG,
"[{}] Unexpected error on control connection reconnect",
logPrefix,
t);
}
}
});
}
}
private void onChannelClosed(DriverChannel channel, Node node) {
assert adminExecutor.inEventLoop();
if (!closeWasCalled) {
context.getEventBus().fire(ChannelEvent.channelClosed(node));
// If this channel is the current control channel, we must start a
// reconnection attempt to get a new control channel.
if (channel == ControlConnection.this.channel) {
LOG.debug(
"[{}] The current control channel {} was closed, scheduling reconnection",
logPrefix,
channel);
reconnection.start();
} else {
LOG.trace(
"[{}] A previous control channel {} was closed, reconnection not required",
logPrefix,
channel);
}
}
}
private void reconnectNow() {
assert adminExecutor.inEventLoop();
if (initWasCalled && !closeWasCalled) {
reconnection.reconnectNow(true);
}
}
private void onDistanceEvent(DistanceEvent event) {
assert adminExecutor.inEventLoop();
this.lastDistanceEvents.put(event.node, event);
if (event.distance == NodeDistance.IGNORED
&& channel != null
&& !channel.closeFuture().isDone()
&& event.node.getEndPoint().equals(channel.getEndPoint())) {
LOG.debug(
"[{}] Control node {} became IGNORED, reconnecting to a different node",
logPrefix,
event.node);
reconnectNow();
}
}
private void onStateEvent(NodeStateEvent event) {
assert adminExecutor.inEventLoop();
this.lastStateEvents.put(event.node, event);
if ((event.newState == null /*(removed)*/ || event.newState == NodeState.FORCED_DOWN)
&& channel != null
&& !channel.closeFuture().isDone()
&& event.node.getEndPoint().equals(channel.getEndPoint())) {
LOG.debug(
"[{}] Control node {} was removed or forced down, reconnecting to a different node",
logPrefix,
event.node);
reconnectNow();
}
}
private void forceClose() {
assert adminExecutor.inEventLoop();
if (closeWasCalled) {
return;
}
closeWasCalled = true;
LOG.debug("[{}] Starting shutdown", logPrefix);
reconnection.stop();
if (channel == null) {
LOG.debug("[{}] Shutdown complete", logPrefix);
closeFuture.complete(null);
} else {
channel
.forceClose()
.addListener(
f -> {
if (f.isSuccess()) {
LOG.debug("[{}] Shutdown complete", logPrefix);
closeFuture.complete(null);
} else {
closeFuture.completeExceptionally(f.cause());
}
});
}
}
}
private boolean isAuthFailure(Throwable error) {
if (error instanceof AllNodesFailedException) {
Collection> errors =
((AllNodesFailedException) error).getAllErrors().values();
if (errors.size() == 0) {
return false;
}
for (List nodeErrors : errors) {
for (Throwable nodeError : nodeErrors) {
if (!(nodeError instanceof AuthenticationException)) {
return false;
}
}
}
}
return true;
}
private static ImmutableList buildEventTypes(boolean listenClusterEvents) {
ImmutableList.Builder builder = ImmutableList.builder();
builder.add(ProtocolConstants.EventType.SCHEMA_CHANGE);
if (listenClusterEvents) {
builder
.add(ProtocolConstants.EventType.STATUS_CHANGE)
.add(ProtocolConstants.EventType.TOPOLOGY_CHANGE);
}
return builder.build();
}
}