All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.oss.driver.internal.core.metadata.DefaultTopologyMonitor Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.datastax.oss.driver.internal.core.metadata;

import com.datastax.dse.driver.api.core.metadata.DseNodeProperties;
import com.datastax.oss.driver.api.core.Version;
import com.datastax.oss.driver.api.core.config.DefaultDriverOption;
import com.datastax.oss.driver.api.core.config.DriverExecutionProfile;
import com.datastax.oss.driver.api.core.metadata.EndPoint;
import com.datastax.oss.driver.api.core.metadata.Node;
import com.datastax.oss.driver.internal.core.adminrequest.AdminRequestHandler;
import com.datastax.oss.driver.internal.core.adminrequest.AdminResult;
import com.datastax.oss.driver.internal.core.adminrequest.AdminRow;
import com.datastax.oss.driver.internal.core.adminrequest.UnexpectedResponseException;
import com.datastax.oss.driver.internal.core.channel.DriverChannel;
import com.datastax.oss.driver.internal.core.context.InternalDriverContext;
import com.datastax.oss.driver.internal.core.control.ControlConnection;
import com.datastax.oss.driver.internal.core.util.concurrent.CompletableFutures;
import com.datastax.oss.driver.shaded.guava.common.annotations.VisibleForTesting;
import com.datastax.oss.driver.shaded.guava.common.collect.ImmutableMap;
import com.datastax.oss.driver.shaded.guava.common.collect.ImmutableSet;
import com.datastax.oss.driver.shaded.guava.common.collect.Iterators;
import com.datastax.oss.protocol.internal.ProtocolConstants;
import com.datastax.oss.protocol.internal.response.Error;
import edu.umd.cs.findbugs.annotations.NonNull;
import edu.umd.cs.findbugs.annotations.Nullable;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.SocketAddress;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionStage;
import net.jcip.annotations.ThreadSafe;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The default topology monitor, based on {@link ControlConnection}.
 *
 * 

Note that event processing is implemented directly in the control connection, not here. */ @ThreadSafe public class DefaultTopologyMonitor implements TopologyMonitor { private static final Logger LOG = LoggerFactory.getLogger(DefaultTopologyMonitor.class); // Assume topology queries never need paging private static final int INFINITE_PAGE_SIZE = -1; // A few system.peers columns which get special handling below private static final String NATIVE_PORT = "native_port"; private static final String NATIVE_TRANSPORT_PORT = "native_transport_port"; private final String logPrefix; private final InternalDriverContext context; private final ControlConnection controlConnection; private final Duration timeout; private final boolean reconnectOnInit; private final CompletableFuture closeFuture; @VisibleForTesting volatile boolean isSchemaV2; @VisibleForTesting volatile int port = -1; public DefaultTopologyMonitor(InternalDriverContext context) { this.logPrefix = context.getSessionName(); this.context = context; this.controlConnection = context.getControlConnection(); DriverExecutionProfile config = context.getConfig().getDefaultProfile(); this.timeout = config.getDuration(DefaultDriverOption.CONTROL_CONNECTION_TIMEOUT); this.reconnectOnInit = config.getBoolean(DefaultDriverOption.RECONNECT_ON_INIT); this.closeFuture = new CompletableFuture<>(); // Set this to true initially, after the first refreshNodes is called this will either stay true // or be set to false; this.isSchemaV2 = true; } @Override public CompletionStage init() { if (closeFuture.isDone()) { return CompletableFutures.failedFuture(new IllegalStateException("closed")); } return controlConnection.init(true, reconnectOnInit, true); } @Override public CompletionStage initFuture() { return controlConnection.initFuture(); } @Override public CompletionStage> refreshNode(Node node) { if (closeFuture.isDone()) { return CompletableFutures.failedFuture(new IllegalStateException("closed")); } LOG.debug("[{}] Refreshing info for {}", logPrefix, node); DriverChannel channel = controlConnection.channel(); EndPoint localEndPoint = channel.getEndPoint(); if (node.getEndPoint().equals(channel.getEndPoint())) { // refreshNode is called for nodes that just came up. If the control node just came up, it // means the control connection just reconnected, which means we did a full node refresh. So // we don't need to process this call. LOG.debug("[{}] Ignoring refresh of control node", logPrefix); return CompletableFuture.completedFuture(Optional.empty()); } else if (node.getBroadcastAddress().isPresent()) { CompletionStage query; if (isSchemaV2) { query = query( channel, "SELECT * FROM " + getPeerTableName() + " WHERE peer = :address and peer_port = :port", ImmutableMap.of( "address", node.getBroadcastAddress().get().getAddress(), "port", node.getBroadcastAddress().get().getPort())); } else { query = query( channel, "SELECT * FROM " + getPeerTableName() + " WHERE peer = :address", ImmutableMap.of("address", node.getBroadcastAddress().get().getAddress())); } return query.thenApply(result -> firstPeerRowAsNodeInfo(result, localEndPoint)); } else { return query(channel, "SELECT * FROM " + getPeerTableName()) .thenApply(result -> findInPeers(result, node.getHostId(), localEndPoint)); } } @Override public CompletionStage> getNewNodeInfo(InetSocketAddress broadcastRpcAddress) { if (closeFuture.isDone()) { return CompletableFutures.failedFuture(new IllegalStateException("closed")); } LOG.debug("[{}] Fetching info for new node {}", logPrefix, broadcastRpcAddress); DriverChannel channel = controlConnection.channel(); EndPoint localEndPoint = channel.getEndPoint(); return query(channel, "SELECT * FROM " + getPeerTableName()) .thenApply(result -> findInPeers(result, broadcastRpcAddress, localEndPoint)); } @Override public CompletionStage> refreshNodeList() { if (closeFuture.isDone()) { return CompletableFutures.failedFuture(new IllegalStateException("closed")); } LOG.debug("[{}] Refreshing node list", logPrefix); DriverChannel channel = controlConnection.channel(); EndPoint localEndPoint = channel.getEndPoint(); savePort(channel); CompletionStage localQuery = query(channel, "SELECT * FROM system.local"); CompletionStage peersV2Query = query(channel, "SELECT * FROM system.peers_v2"); CompletableFuture peersQuery = new CompletableFuture<>(); peersV2Query.whenComplete( (r, t) -> { if (t != null) { // If system.peers_v2 does not exist, downgrade to system.peers if (t instanceof UnexpectedResponseException && ((UnexpectedResponseException) t).message instanceof Error) { Error error = (Error) ((UnexpectedResponseException) t).message; if (error.code == ProtocolConstants.ErrorCode.INVALID // Also downgrade on server error with a specific error message (DSE 6.0.0 to // 6.0.2 with search enabled) || (error.code == ProtocolConstants.ErrorCode.SERVER_ERROR && error.message.contains("Unknown keyspace/cf pair (system.peers_v2)"))) { this.isSchemaV2 = false; // We should not attempt this query in the future. CompletableFutures.completeFrom( query(channel, "SELECT * FROM system.peers"), peersQuery); return; } } peersQuery.completeExceptionally(t); } else { peersQuery.complete(r); } }); return localQuery.thenCombine( peersQuery, (controlNodeResult, peersResult) -> { List nodeInfos = new ArrayList<>(); AdminRow localRow = controlNodeResult.iterator().next(); InetSocketAddress localBroadcastRpcAddress = getBroadcastRpcAddress(localRow, localEndPoint); nodeInfos.add(nodeInfoBuilder(localRow, localBroadcastRpcAddress, localEndPoint).build()); for (AdminRow peerRow : peersResult) { if (isPeerValid(peerRow)) { InetSocketAddress peerBroadcastRpcAddress = getBroadcastRpcAddress(peerRow, localEndPoint); if (peerBroadcastRpcAddress != null) { NodeInfo nodeInfo = nodeInfoBuilder(peerRow, peerBroadcastRpcAddress, localEndPoint).build(); nodeInfos.add(nodeInfo); } } } return nodeInfos; }); } @Override public CompletionStage checkSchemaAgreement() { if (closeFuture.isDone()) { return CompletableFuture.completedFuture(true); } DriverChannel channel = controlConnection.channel(); return new SchemaAgreementChecker(channel, context, logPrefix).run(); } @NonNull @Override public CompletionStage closeFuture() { return closeFuture; } @NonNull @Override public CompletionStage closeAsync() { closeFuture.complete(null); return closeFuture; } @NonNull @Override public CompletionStage forceCloseAsync() { return closeAsync(); } @VisibleForTesting protected CompletionStage query( DriverChannel channel, String queryString, Map parameters) { AdminRequestHandler handler; try { handler = AdminRequestHandler.query( channel, queryString, parameters, timeout, INFINITE_PAGE_SIZE, logPrefix); } catch (Exception e) { return CompletableFutures.failedFuture(e); } return handler.start(); } private CompletionStage query(DriverChannel channel, String queryString) { return query(channel, queryString, Collections.emptyMap()); } private String getPeerTableName() { return isSchemaV2 ? "system.peers_v2" : "system.peers"; } private Optional firstPeerRowAsNodeInfo(AdminResult result, EndPoint localEndPoint) { Iterator iterator = result.iterator(); if (iterator.hasNext()) { AdminRow row = iterator.next(); if (isPeerValid(row)) { return Optional.ofNullable(getBroadcastRpcAddress(row, localEndPoint)) .map( broadcastRpcAddress -> nodeInfoBuilder(row, broadcastRpcAddress, localEndPoint).build()); } } return Optional.empty(); } /** * Creates a {@link DefaultNodeInfo.Builder} instance from the given row. * * @param broadcastRpcAddress this is a parameter only because we already have it when we come * from {@link #findInPeers(AdminResult, InetSocketAddress, EndPoint)}. Callers that don't * already have it can use {@link #getBroadcastRpcAddress}. For the control host, this can be * null; if this node is a peer however, this cannot be null, since we use that address to * create the node's endpoint. Callers can use {@link #isPeerValid(AdminRow)} to check that * before calling this method. * @param localEndPoint the control node endpoint that was used to query the node's system tables. * This is a parameter because it would be racy to call {@code * controlConnection.channel().getEndPoint()} from within this method, as the control * connection may have changed its channel since. So this parameter must be provided by the * caller. */ @NonNull protected DefaultNodeInfo.Builder nodeInfoBuilder( @NonNull AdminRow row, @Nullable InetSocketAddress broadcastRpcAddress, @NonNull EndPoint localEndPoint) { EndPoint endPoint = buildNodeEndPoint(row, broadcastRpcAddress, localEndPoint); // in system.local InetAddress broadcastInetAddress = row.getInetAddress("broadcast_address"); if (broadcastInetAddress == null) { // in system.peers or system.peers_v2 broadcastInetAddress = row.getInetAddress("peer"); } Integer broadcastPort = 0; if (row.contains("broadcast_port")) { // system.local for Cassandra >= 4.0 broadcastPort = row.getInteger("broadcast_port"); } else if (row.contains("peer_port")) { // system.peers_v2 broadcastPort = row.getInteger("peer_port"); } InetSocketAddress broadcastAddress = null; if (broadcastInetAddress != null && broadcastPort != null) { broadcastAddress = new InetSocketAddress(broadcastInetAddress, broadcastPort); } // in system.local only, and only for Cassandra versions >= 2.0.17, 2.1.8, 2.2.0 rc2; // not present in system.peers nor system.peers_v2 InetAddress listenInetAddress = row.getInetAddress("listen_address"); // in system.local only, and only for Cassandra >= 4.0 Integer listenPort = 0; if (row.contains("listen_port")) { listenPort = row.getInteger("listen_port"); } InetSocketAddress listenAddress = null; if (listenInetAddress != null && listenPort != null) { listenAddress = new InetSocketAddress(listenInetAddress, listenPort); } DefaultNodeInfo.Builder builder = DefaultNodeInfo.builder() .withEndPoint(endPoint) .withBroadcastRpcAddress(broadcastRpcAddress) .withBroadcastAddress(broadcastAddress) .withListenAddress(listenAddress) .withDatacenter(row.getString("data_center")) .withRack(row.getString("rack")) .withCassandraVersion(row.getString("release_version")) .withTokens(row.getSetOfString("tokens")) .withPartitioner(row.getString("partitioner")) .withHostId(Objects.requireNonNull(row.getUuid("host_id"))) .withSchemaVersion(row.getUuid("schema_version")); // Handle DSE-specific columns, if present String rawVersion = row.getString("dse_version"); if (rawVersion != null) { builder.withExtra(DseNodeProperties.DSE_VERSION, Version.parse(rawVersion)); } ImmutableSet.Builder workloadsBuilder = ImmutableSet.builder(); Boolean legacyGraph = row.getBoolean("graph"); // DSE 5.0 if (legacyGraph != null && legacyGraph) { workloadsBuilder.add("Graph"); } String legacyWorkload = row.getString("workload"); // DSE 5.0 (other than graph) if (legacyWorkload != null) { workloadsBuilder.add(legacyWorkload); } Set modernWorkloads = row.getSetOfString("workloads"); // DSE 5.1+ if (modernWorkloads != null) { workloadsBuilder.addAll(modernWorkloads); } ImmutableSet workloads = workloadsBuilder.build(); if (!workloads.isEmpty()) { builder.withExtra(DseNodeProperties.DSE_WORKLOADS, workloads); } // Note: withExtra discards null values builder .withExtra(DseNodeProperties.SERVER_ID, row.getString("server_id")) .withExtra(DseNodeProperties.NATIVE_TRANSPORT_PORT, row.getInteger("native_transport_port")) .withExtra( DseNodeProperties.NATIVE_TRANSPORT_PORT_SSL, row.getInteger("native_transport_port_ssl")) .withExtra(DseNodeProperties.STORAGE_PORT, row.getInteger("storage_port")) .withExtra(DseNodeProperties.STORAGE_PORT_SSL, row.getInteger("storage_port_ssl")) .withExtra(DseNodeProperties.JMX_PORT, row.getInteger("jmx_port")); return builder; } /** * Builds the node's endpoint from the given row. * * @param broadcastRpcAddress this is a parameter only because we already have it when we come * from {@link #findInPeers(AdminResult, InetSocketAddress, EndPoint)}. Callers that don't * already have it can use {@link #getBroadcastRpcAddress}. For the control host, this can be * null; if this node is a peer however, this cannot be null, since we use that address to * create the node's endpoint. Callers can use {@link #isPeerValid(AdminRow)} to check that * before calling this method. * @param localEndPoint the control node endpoint that was used to query the node's system tables. * This is a parameter because it would be racy to call {@code * controlConnection.channel().getEndPoint()} from within this method, as the control * connection may have changed its channel since. So this parameter must be provided by the * caller. */ @NonNull protected EndPoint buildNodeEndPoint( @NonNull AdminRow row, @Nullable InetSocketAddress broadcastRpcAddress, @NonNull EndPoint localEndPoint) { boolean peer = row.contains("peer"); if (peer) { // If this node is a peer, its broadcast RPC address must be present. Objects.requireNonNull( broadcastRpcAddress, "broadcastRpcAddress cannot be null for a peer row"); // Deployments that use a custom EndPoint implementation will need their own TopologyMonitor. // One simple approach is to extend this class and override this method. return new DefaultEndPoint(context.getAddressTranslator().translate(broadcastRpcAddress)); } else { // Don't rely on system.local.rpc_address for the control node, because it mistakenly // reports the normal RPC address instead of the broadcast one (CASSANDRA-11181). We // already know the endpoint anyway since we've just used it to query. return localEndPoint; } } // Called when a new node is being added; the peers table is keyed by broadcast_address, // but the received event only contains broadcast_rpc_address, so // we have to traverse the whole table and check the rows one by one. private Optional findInPeers( AdminResult result, InetSocketAddress broadcastRpcAddressToFind, EndPoint localEndPoint) { for (AdminRow row : result) { InetSocketAddress broadcastRpcAddress = getBroadcastRpcAddress(row, localEndPoint); if (broadcastRpcAddress != null && broadcastRpcAddress.equals(broadcastRpcAddressToFind) && isPeerValid(row)) { return Optional.of(nodeInfoBuilder(row, broadcastRpcAddress, localEndPoint).build()); } } LOG.debug("[{}] Could not find any peer row matching {}", logPrefix, broadcastRpcAddressToFind); return Optional.empty(); } // Called when refreshing an existing node, and we don't know its broadcast address; in this // case we attempt a search by host id and have to traverse the whole table and check the rows one // by one. private Optional findInPeers( AdminResult result, UUID hostIdToFind, EndPoint localEndPoint) { for (AdminRow row : result) { UUID hostId = row.getUuid("host_id"); if (hostId != null && hostId.equals(hostIdToFind) && isPeerValid(row)) { return Optional.ofNullable(getBroadcastRpcAddress(row, localEndPoint)) .map( broadcastRpcAddress -> nodeInfoBuilder(row, broadcastRpcAddress, localEndPoint).build()); } } LOG.debug("[{}] Could not find any peer row matching {}", logPrefix, hostIdToFind); return Optional.empty(); } // Current versions of Cassandra (3.11 at the time of writing), require the same port for all // nodes. As a consequence, the port is not stored in system tables. // We save it the first time we get a control connection channel. private void savePort(DriverChannel channel) { if (port < 0) { SocketAddress address = channel.getEndPoint().resolve(); if (address instanceof InetSocketAddress) { port = ((InetSocketAddress) address).getPort(); } } } /** * Determines the broadcast RPC address of the node represented by the given row. * * @param row The row to inspect; can represent either a local (control) node or a peer node. * @param localEndPoint the control node endpoint that was used to query the node's system tables. * This is a parameter because it would be racy to call {@code * controlConnection.channel().getEndPoint()} from within this method, as the control * connection may have changed its channel since. So this parameter must be provided by the * caller. * @return the broadcast RPC address of the node, if it could be determined; or {@code null} * otherwise. */ @Nullable protected InetSocketAddress getBroadcastRpcAddress( @NonNull AdminRow row, @NonNull EndPoint localEndPoint) { InetAddress broadcastRpcInetAddress = null; Iterator addrCandidates = Iterators.forArray( // in system.peers_v2 (Cassandra >= 4.0) "native_address", // DSE 6.8 introduced native_transport_address and native_transport_port for the // listen address. "native_transport_address", // in system.peers or system.local "rpc_address"); while (broadcastRpcInetAddress == null && addrCandidates.hasNext()) broadcastRpcInetAddress = row.getInetAddress(addrCandidates.next()); // This could only happen if system tables are corrupted, but handle gracefully if (broadcastRpcInetAddress == null) { LOG.warn( "[{}] Unable to determine broadcast RPC IP address, returning null. " + "This is likely due to a misconfiguration or invalid system tables. " + "Please validate the contents of system.local and/or {}.", logPrefix, getPeerTableName()); return null; } Integer broadcastRpcPort = null; Iterator portCandidates = Iterators.forArray( // in system.peers_v2 (Cassandra >= 4.0) NATIVE_PORT, // DSE 6.8 introduced native_transport_address and native_transport_port for the // listen address. NATIVE_TRANSPORT_PORT, // system.local for Cassandra >= 4.0 "rpc_port"); while ((broadcastRpcPort == null || broadcastRpcPort == 0) && portCandidates.hasNext()) { String colName = portCandidates.next(); broadcastRpcPort = row.getInteger(colName); // Support override for SSL port (if enabled) in DSE if (NATIVE_TRANSPORT_PORT.equals(colName) && context.getSslEngineFactory().isPresent()) { String sslColName = colName + "_ssl"; broadcastRpcPort = row.getInteger(sslColName); } } // use the default port if no port information was found in the row; // note that in rare situations, the default port might not be known, in which case we // report zero, as advertised in the javadocs of Node and NodeInfo. if (broadcastRpcPort == null || broadcastRpcPort == 0) { LOG.warn( "[{}] Unable to determine broadcast RPC port. " + "Trying to fall back to port used by the control connection.", logPrefix); broadcastRpcPort = port == -1 ? 0 : port; } InetSocketAddress broadcastRpcAddress = new InetSocketAddress(broadcastRpcInetAddress, broadcastRpcPort); if (row.contains("peer") && broadcastRpcAddress.equals(localEndPoint.resolve())) { // JAVA-2303: if the peer is actually the control node, ignore that peer as it is likely // a misconfiguration problem. LOG.warn( "[{}] Control node {} has an entry for itself in {}: this entry will be ignored. " + "This is likely due to a misconfiguration; please verify your rpc_address " + "configuration in cassandra.yaml on all nodes in your cluster.", logPrefix, localEndPoint, getPeerTableName()); return null; } return broadcastRpcAddress; } /** * Returns {@code true} if the given peer row is valid, and {@code false} otherwise. * *

This method must at least ensure that the row contains enough information to extract the * node's broadcast RPC address and host ID; otherwise the driver may not work properly. */ protected boolean isPeerValid(AdminRow peerRow) { if (PeerRowValidator.isValid(peerRow)) { return true; } else { LOG.warn( "[{}] Found invalid row in {} for peer: {}. " + "This is likely a gossip or snitch issue, this node will be ignored.", logPrefix, getPeerTableName(), peerRow.getInetAddress("peer")); return false; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy