All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.driver.core.ControlConnection Maven / Gradle / Ivy

/*
 *      Copyright (C) 2012-2015 DataStax Inc.
 *
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 *
 *   The following only applies to changes made to this file as part of YugaByte development.
 *
 *      Portions Copyright (c) YugaByte, Inc.
 *
 *   Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software distributed under the
 *   License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 *   either express or implied.  See the License for the specific language governing permissions
 *   and limitations under the License.
 */
package com.datastax.driver.core;

import com.datastax.driver.core.exceptions.*;
import com.datastax.driver.core.utils.MoreObjects;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.util.concurrent.ListenableFuture;
import com.yugabyte.driver.core.TableSplitMetadata;
import com.yugabyte.driver.core.PartitionMetadata;
import com.yugabyte.driver.core.QualifiedTableName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.UnknownHostException;
import java.nio.ByteBuffer;
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;

import static com.datastax.driver.core.SchemaElement.KEYSPACE;
import static com.datastax.driver.core.SchemaElement.TABLE;

class ControlConnection implements Connection.Owner {

    private static final Logger logger = LoggerFactory.getLogger(ControlConnection.class);

    private static final boolean EXTENDED_PEER_CHECK = SystemProperties.getBoolean("com.datastax.driver.EXTENDED_PEER_CHECK", true);

    private static final InetAddress bindAllAddress;

    static {
        try {
            bindAllAddress = InetAddress.getByAddress(new byte[4]);
        } catch (UnknownHostException e) {
            throw new RuntimeException(e);
        }
    }

    private static final String SELECT_PEERS = "SELECT * FROM system.peers";
    private static final String SELECT_LOCAL = "SELECT * FROM system.local WHERE key='local'";

    private static final String SELECT_SCHEMA_PEERS = "SELECT peer, rpc_address, schema_version FROM system.peers";
    private static final String SELECT_SCHEMA_LOCAL = "SELECT schema_version FROM system.local WHERE key='local'";

    // Query to load partition metadata for all tables.
    private static final String SELECT_PARTITIONS =
        "SELECT keyspace_name, table_name, start_key, end_key, replica_addresses FROM system.partitions";

    @VisibleForTesting
    final AtomicReference connectionRef = new AtomicReference();

    private final Cluster.Manager cluster;

    private final AtomicReference> reconnectionAttempt = new AtomicReference>();

    private volatile boolean isShutdown;

    public ControlConnection(Cluster.Manager manager) {
        this.cluster = manager;
    }

    // Only for the initial connection. Does not schedule retries if it fails
    void connect() throws UnsupportedProtocolVersionException {
        if (isShutdown)
            return;

        // NB: at this stage, allHosts() only contains the initial contact points
        List hosts = new ArrayList(cluster.metadata.allHosts());
        // shuffle so that multiple clients with the same contact points don't all pick the same control host
        Collections.shuffle(hosts);
        setNewConnection(reconnectInternal(hosts.iterator(), true));
    }

    CloseFuture closeAsync() {
        // We don't have to be fancy here. We just set a flag so that we stop trying to reconnect (and thus change the
        // connection used) and shutdown the current one.
        isShutdown = true;

        // Cancel any reconnection attempt in progress
        ListenableFuture r = reconnectionAttempt.get();
        if (r != null)
            r.cancel(false);

        Connection connection = connectionRef.get();
        return connection == null ? CloseFuture.immediateFuture() : connection.closeAsync().force();
    }

    Host connectedHost() {
        Connection current = connectionRef.get();
        return (current == null)
                ? null
                : cluster.metadata.getHost(current.address);
    }

    void triggerReconnect() {
        backgroundReconnect(0);
    }

    /**
     * @param initialDelayMs if >=0, bypass the schedule and use this for the first call
     */
    private void backgroundReconnect(long initialDelayMs) {
        if (isShutdown)
            return;

        // Abort if a reconnection is already in progress. This is not thread-safe: two threads might race through this and both
        // schedule a reconnection; in that case AbstractReconnectionHandler knows how to deal with it correctly.
        // But this cheap check can help us avoid creating the object unnecessarily.
        ListenableFuture reconnection = reconnectionAttempt.get();
        if (reconnection != null && !reconnection.isDone())
            return;

        new AbstractReconnectionHandler("Control connection", cluster.reconnectionExecutor, cluster.reconnectionPolicy().newSchedule(), reconnectionAttempt, initialDelayMs) {
            @Override
            protected Connection tryReconnect() throws ConnectionException {
                if (isShutdown)
                    throw new ConnectionException(null, "Control connection was shut down");

                try {
                    return reconnectInternal(queryPlan(), false);
                } catch (NoHostAvailableException e) {
                    throw new ConnectionException(null, e.getMessage());
                } catch (UnsupportedProtocolVersionException e) {
                    // reconnectInternal only propagate those if we've not decided on the protocol version yet,
                    // which should only happen on the initial connection and thus in connect() but never here.
                    throw new AssertionError();
                }
            }

            @Override
            protected void onReconnection(Connection connection) {
                if (isShutdown) {
                    connection.closeAsync().force();
                    return;
                }

                setNewConnection(connection);
            }

            @Override
            protected boolean onConnectionException(ConnectionException e, long nextDelayMs) {
                if (isShutdown)
                    return false;

                logger.error("[Control connection] Cannot connect to any host, scheduling retry in {} milliseconds", nextDelayMs);
                return true;
            }

            @Override
            protected boolean onUnknownException(Exception e, long nextDelayMs) {
                if (isShutdown)
                    return false;

                logger.error(String.format("[Control connection] Unknown error during reconnection, scheduling retry in %d milliseconds", nextDelayMs), e);
                return true;
            }
        }.start();
    }

    private Iterator queryPlan() {
        return cluster.loadBalancingPolicy().newQueryPlan(null, Statement.DEFAULT);
    }

    private void signalError() {
        Connection connection = connectionRef.get();
        if (connection != null)
            connection.closeAsync().force();

        // If the error caused the host to go down, onDown might have already triggered a reconnect.
        // But backgroundReconnect knows how to deal with that.
        backgroundReconnect(0);
    }

    private void setNewConnection(Connection newConnection) {
        Host.statesLogger.debug("[Control connection] established to {}", newConnection.address);
        newConnection.setOwner(this);
        Connection old = connectionRef.getAndSet(newConnection);
        if (old != null && !old.isClosed())
            old.closeAsync().force();
    }

    private Connection reconnectInternal(Iterator iter, boolean isInitialConnection) throws UnsupportedProtocolVersionException {

        Map errors = null;

        Host host = null;
        try {
            while (iter.hasNext()) {
                host = iter.next();
                if (!host.convictionPolicy.canReconnectNow())
                    continue;
                try {
                    return tryConnect(host, isInitialConnection);
                } catch (ConnectionException e) {
                    errors = logError(host, e, errors, iter);
                    if (isInitialConnection) {
                        // Mark the host down right away so that we don't try it again during the initialization process.
                        // We don't call cluster.triggerOnDown because it does a bunch of other things we don't want to do here (notify LBP, etc.)
                        host.setDown();
                    }
                } catch (ExecutionException e) {
                    errors = logError(host, e.getCause(), errors, iter);
                } catch (UnsupportedProtocolVersionException e) {
                    // If it's the very first node we've connected to, rethrow the exception and
                    // Cluster.init() will handle it. Otherwise, just mark this node in error.
                    if (isInitialConnection)
                        throw e;
                    logger.debug("Ignoring host {}: {}", host, e.getMessage());
                    errors = logError(host, e, errors, iter);
                } catch (ClusterNameMismatchException e) {
                    logger.debug("Ignoring host {}: {}", host, e.getMessage());
                    errors = logError(host, e, errors, iter);
                }
            }
        } catch (InterruptedException e) {
            // Sets interrupted status
            Thread.currentThread().interrupt();

            // Indicates that all remaining hosts are skipped due to the interruption
            errors = logError(host, new DriverException("Connection thread interrupted"), errors, iter);
            while (iter.hasNext())
                errors = logError(iter.next(), new DriverException("Connection thread interrupted"), errors, iter);
        }
        throw new NoHostAvailableException(errors == null ? Collections.emptyMap() : errors);
    }

    private static Map logError(Host host, Throwable exception, Map errors, Iterator iter) {
        if (errors == null)
            errors = new HashMap();

        errors.put(host.getSocketAddress(), exception);

        if (logger.isDebugEnabled()) {
            if (iter.hasNext()) {
                logger.debug(String.format("[Control connection] error on %s connection, trying next host", host), exception);
            } else {
                logger.debug(String.format("[Control connection] error on %s connection, no more host to try", host), exception);
            }
        }
        return errors;
    }

    private Connection tryConnect(Host host, boolean isInitialConnection) throws ConnectionException, ExecutionException, InterruptedException, UnsupportedProtocolVersionException, ClusterNameMismatchException {
        Connection connection = cluster.connectionFactory.open(host);

        // If no protocol version was specified, set the default as soon as a connection succeeds (it's needed to parse UDTs in refreshSchema)
        if (cluster.connectionFactory.protocolVersion == null)
            cluster.connectionFactory.protocolVersion = ProtocolVersion.NEWEST_SUPPORTED;

        try {
            logger.trace("[Control connection] Registering for events");
            List evs = Arrays.asList(
                    ProtocolEvent.Type.TOPOLOGY_CHANGE,
                    ProtocolEvent.Type.STATUS_CHANGE,
                    ProtocolEvent.Type.SCHEMA_CHANGE
            );
            connection.write(new Requests.Register(evs));

            // We need to refresh the node list first so we know about the cassandra version of
            // the node we're connecting to.
            // This will create the token map for the first time, but it will be incomplete
            // due to the lack of keyspace information
            refreshNodeListAndTokenMap(connection, cluster, isInitialConnection, true);

            // refresh schema will also update the token map again,
            // this time with information about keyspaces
            logger.debug("[Control connection] Refreshing schema");
            refreshSchema(connection, null, null, null, null, cluster);

            return connection;
        } catch (BusyConnectionException e) {
            connection.closeAsync().force();
            throw new DriverInternalError("Newly created connection should not be busy");
        } catch (InterruptedException e) {
            connection.closeAsync().force();
            throw e;
        } catch (ConnectionException e) {
            connection.closeAsync().force();
            throw e;
        } catch (ExecutionException e) {
            connection.closeAsync().force();
            throw e;
        } catch (RuntimeException e) {
            connection.closeAsync().force();
            throw e;
        }
    }

    public void refreshSchema(SchemaElement targetType, String targetKeyspace, String targetName, List signature) throws InterruptedException {
        logger.debug("[Control connection] Refreshing schema for {}{}",
                targetType == null ? "everything" : targetKeyspace,
                (targetType == KEYSPACE) ? "" : "." + targetName + " (" + targetType + ")");
        try {
            Connection c = connectionRef.get();
            // At startup, when we add the initial nodes, this will be null, which is ok
            if (c == null || c.isClosed())
                return;
            refreshSchema(c, targetType, targetKeyspace, targetName, signature, cluster);
        } catch (ConnectionException e) {
            logger.debug("[Control connection] Connection error while refreshing schema ({})", e.getMessage());
            signalError();
        } catch (ExecutionException e) {
            // If we're being shutdown during schema refresh, this can happen. That's fine so don't scare the user.
            if (!isShutdown)
                logger.error("[Control connection] Unexpected error while refreshing schema", e);
            signalError();
        } catch (BusyConnectionException e) {
            logger.debug("[Control connection] Connection is busy, reconnecting");
            signalError();
        }
    }

    static void refreshSchema(Connection connection, SchemaElement targetType, String targetKeyspace, String targetName, List targetSignature, Cluster.Manager cluster) throws ConnectionException, BusyConnectionException, ExecutionException, InterruptedException {
        Host host = cluster.metadata.getHost(connection.address);
        // Neither host, nor it's version should be null. But instead of dying if there is a race or something, we can kind of try to infer
        // a Cassandra version from the protocol version (this is not full proof, we can have the protocol 1 against C* 2.0+, but it's worth
        // a shot, and since we log in this case, it should be relatively easy to debug when if this ever fail).
        VersionNumber cassandraVersion;
        if (host == null || host.getCassandraVersion() == null) {
            cassandraVersion = cluster.protocolVersion().minCassandraVersion();
            logger.warn("Cannot find Cassandra version for host {} to parse the schema, using {} based on protocol version in use. "
                    + "If parsing the schema fails, this could be the cause", connection.address, cassandraVersion);
        } else {
            cassandraVersion = host.getCassandraVersion();
        }

        SchemaParser.forVersion(cassandraVersion)
                .refresh(cluster.getCluster(),
                        targetType, targetKeyspace, targetName, targetSignature,
                        connection, cassandraVersion);
        if (targetType == null || targetType == KEYSPACE || targetType == TABLE) {
            // Refresh the entire partition map because one keyspace or table change may trigger
            // re-balancing for the entire cluster.
            refreshPartitionMap(connection, cluster);
        } // Other change types do not affect the partition map so nothing to do.
     }

    void refreshNodeListAndTokenMap() {
        Connection c = connectionRef.get();
        // At startup, when we add the initial nodes, this will be null, which is ok
        if (c == null || c.isClosed())
            return;

        try {
            refreshNodeListAndTokenMap(c, cluster, false, true);
            // Refresh the partition map for all tables.
            refreshPartitionMap(c, cluster);
        } catch (ConnectionException e) {
            logger.debug("[Control connection] Connection error while refreshing node list and token map ({})", e.getMessage());
            signalError();
        } catch (ExecutionException e) {
            // If we're being shutdown during refresh, this can happen. That's fine so don't scare the user.
            if (!isShutdown)
                logger.error("[Control connection] Unexpected error while refreshing node list and token map", e);
            signalError();
        } catch (BusyConnectionException e) {
            logger.debug("[Control connection] Connection is busy, reconnecting");
            signalError();
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            logger.debug("[Control connection] Interrupted while refreshing node list and token map, skipping it.");
        }
    }

    private static InetSocketAddress rpcAddressForPeerHost(Row peersRow, InetSocketAddress connectedHost, Cluster.Manager cluster) {

        // after CASSANDRA-9436, system.peers contains the following inet columns:
        // - peer: this is actually broadcast_address
        // - rpc_address: the address we are looking for (this corresponds to broadcast_rpc_address in the peer's cassandra yaml file;
        //                if this setting if unset, it defaults to the value for rpc_address or rpc_interface)
        // - preferred_ip: used by Ec2MultiRegionSnitch and GossipingPropertyFileSnitch, possibly others; contents unclear

        InetAddress broadcastAddress = peersRow.getInet("peer");
        InetAddress rpcAddress = peersRow.getInet("rpc_address");

        if (broadcastAddress == null) {
            return null;
        } else if (broadcastAddress.equals(connectedHost.getAddress()) || (rpcAddress != null && rpcAddress.equals(connectedHost.getAddress()))) {
            // Some DSE versions were inserting a line for the local node in peers (with mostly null values). This has been fixed, but if we
            // detect that's the case, ignore it as it's not really a big deal.
            logger.debug("System.peers on node {} has a line for itself. This is not normal but is a known problem of some DSE version. Ignoring the entry.", connectedHost);
            return null;
        } else if (rpcAddress == null) {
            return null;
        } else if (rpcAddress.equals(bindAllAddress)) {
            logger.warn("Found host with 0.0.0.0 as rpc_address, using broadcast_address ({}) to contact it instead. If this is incorrect you should avoid the use of 0.0.0.0 server side.", broadcastAddress);
            rpcAddress = broadcastAddress;
        }
        return cluster.translateAddress(rpcAddress);
    }

    private Row fetchNodeInfo(Host host, Connection c) throws ConnectionException, BusyConnectionException, ExecutionException, InterruptedException {
        boolean isConnectedHost = c.address.equals(host.getSocketAddress());
        if (isConnectedHost || host.getBroadcastAddress() != null) {
            DefaultResultSetFuture future = isConnectedHost
                    ? new DefaultResultSetFuture(null, cluster.protocolVersion(), new Requests.Query(SELECT_LOCAL))
                    : new DefaultResultSetFuture(null, cluster.protocolVersion(), new Requests.Query(SELECT_PEERS + " WHERE peer='" + host.getBroadcastAddress().getHostAddress() + '\''));
            c.write(future);
            Row row = future.get().one();
            if (row != null) {
                return row;
            } else {
                logger.debug("Could not find peer with broadcast address {}, " +
                        "falling back to a full system.peers scan to fetch info for {} " +
                        "(this can happen if the broadcast address changed)", host.getBroadcastAddress(), host);
            }
        }

        // We have to fetch the whole peers table and find the host we're looking for
        DefaultResultSetFuture future = new DefaultResultSetFuture(null, cluster.protocolVersion(), new Requests.Query(SELECT_PEERS));
        c.write(future);
        for (Row row : future.get()) {
            InetSocketAddress addr = rpcAddressForPeerHost(row, c.address, cluster);
            if (addr != null && addr.equals(host.getSocketAddress()))
                return row;
        }
        return null;
    }

    /**
     * @return whether we have enough information to bring the node back up
     */
    boolean refreshNodeInfo(Host host) {

        Connection c = connectionRef.get();
        // At startup, when we add the initial nodes, this will be null, which is ok
        if (c == null || c.isClosed())
            return true;

        logger.debug("[Control connection] Refreshing node info on {}", host);
        try {
            Row row = fetchNodeInfo(host, c);
            if (row == null) {
                if (c.isDefunct()) {
                    logger.debug("Control connection is down, could not refresh node info");
                    // Keep going with what we currently know about the node, otherwise we will ignore all nodes
                    // until the control connection is back up (which leads to a catch-22 if there is only one)
                    return true;
                } else {
                    logger.warn("No row found for host {} in {}'s peers system table. {} will be ignored.", host.getAddress(), c.address, host.getAddress());
                    return false;
                }
                // Ignore hosts with a null rpc_address, as this is most likely a phantom row in system.peers (JAVA-428).
                // Don't test this for the control host since we're already connected to it anyway, and we read the info from system.local
                // which didn't have an rpc_address column (JAVA-546) until CASSANDRA-9436
            } else if (!c.address.equals(host.getSocketAddress()) && !isValidPeer(row, true)) {
                return false;
            }

            updateInfo(host, row, cluster, false);
            return true;

        } catch (ConnectionException e) {
            logger.debug("[Control connection] Connection error while refreshing node info ({})", e.getMessage());
            signalError();
        } catch (ExecutionException e) {
            // If we're being shutdown during refresh, this can happen. That's fine so don't scare the user.
            if (!isShutdown)
                logger.debug("[Control connection] Unexpected error while refreshing node info", e);
            signalError();
        } catch (BusyConnectionException e) {
            logger.debug("[Control connection] Connection is busy, reconnecting");
            signalError();
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            logger.debug("[Control connection] Interrupted while refreshing node info, skipping it.");
        } catch (Exception e) {
            logger.debug("[Control connection] Unexpected error while refreshing node info", e);
            signalError();
        }
        // If we got an exception, always return true. Otherwise a faulty control connection would cause
        // reconnected hosts to be ignored permanently.
        return true;
    }

    // row can come either from the 'local' table or the 'peers' one
    private static void updateInfo(Host host, Row row, Cluster.Manager cluster, boolean isInitialConnection) {
        if (!row.isNull("data_center") || !row.isNull("rack"))
            updateLocationInfo(host, row.getString("data_center"), row.getString("rack"), isInitialConnection, cluster);

        String version = row.getString("release_version");
        host.setVersion(version);

        // Before CASSANDRA-9436 local row did not contain any info about the host addresses.
        // After CASSANDRA-9436 (2.0.16, 2.1.6, 2.2.0 rc1) local row contains two new columns:
        // - broadcast_address
        // - rpc_address
        // After CASSANDRA-9603 (2.0.17, 2.1.8, 2.2.0 rc2) local row contains one more column:
        // - listen_address

        InetAddress broadcastAddress = null;
        if (row.getColumnDefinitions().contains("peer")) { // system.peers
            broadcastAddress = row.getInet("peer");
        } else if (row.getColumnDefinitions().contains("broadcast_address")) { // system.local
            broadcastAddress = row.getInet("broadcast_address");
        }
        host.setBroadcastAddress(broadcastAddress);

        // in system.local only for C* versions >= 2.0.17, 2.1.8, 2.2.0 rc2,
        // not yet in system.peers as of C* 3.2
        InetAddress listenAddress = row.getColumnDefinitions().contains("listen_address")
                ? row.getInet("listen_address")
                : null;
        host.setListenAddress(listenAddress);

        if (row.getColumnDefinitions().contains("workload")) {
            String dseWorkload = row.getString("workload");
            host.setDseWorkload(dseWorkload);
        }
        if (row.getColumnDefinitions().contains("graph")) {
            boolean isDseGraph = row.getBool("graph");
            host.setDseGraphEnabled(isDseGraph);
        }
        if (row.getColumnDefinitions().contains("dse_version")) {
            String dseVersion = row.getString("dse_version");
            host.setDseVersion(dseVersion);
        }
    }

    private static void updateLocationInfo(Host host, String datacenter, String rack, boolean isInitialConnection, Cluster.Manager cluster) {
        if (MoreObjects.equal(host.getDatacenter(), datacenter) && MoreObjects.equal(host.getRack(), rack))
            return;

        // If the dc/rack information changes for an existing node, we need to update the load balancing policy.
        // For that, we remove and re-add the node against the policy. Not the most elegant, and assumes
        // that the policy will update correctly, but in practice this should work.
        if (!isInitialConnection)
            cluster.loadBalancingPolicy().onDown(host);
        host.setLocationInfo(datacenter, rack);
        if (!isInitialConnection)
            cluster.loadBalancingPolicy().onAdd(host);
    }

    private static void refreshNodeListAndTokenMap(Connection connection, Cluster.Manager cluster, boolean isInitialConnection, boolean logInvalidPeers) throws ConnectionException, BusyConnectionException, ExecutionException, InterruptedException {
        logger.debug("[Control connection] Refreshing node list and token map");

        boolean metadataEnabled = cluster.configuration.getQueryOptions().isMetadataEnabled();

        // Make sure we're up to date on nodes and tokens

        DefaultResultSetFuture localFuture = new DefaultResultSetFuture(null, cluster.protocolVersion(), new Requests.Query(SELECT_LOCAL));
        DefaultResultSetFuture peersFuture = new DefaultResultSetFuture(null, cluster.protocolVersion(), new Requests.Query(SELECT_PEERS));
        connection.write(localFuture);
        connection.write(peersFuture);

        String partitioner = null;
        Token.Factory factory = null;
        Map> tokenMap = new HashMap>();

        // Update cluster name, DC and rack for the one node we are connected to
        Row localRow = localFuture.get().one();
        if (localRow != null) {
            String clusterName = localRow.getString("cluster_name");
            if (clusterName != null)
                cluster.metadata.clusterName = clusterName;

            partitioner = localRow.getString("partitioner");
            if (partitioner != null) {
                cluster.metadata.partitioner = partitioner;
                factory = Token.getFactory(partitioner);
            }

            Host host = cluster.metadata.getHost(connection.address);
            // In theory host can't be null. However there is no point in risking a NPE in case we
            // have a race between a node removal and this.
            if (host == null) {
                logger.debug("Host in local system table ({}) unknown to us (ok if said host just got removed)", connection.address);
            } else {
                updateInfo(host, localRow, cluster, isInitialConnection);
                if (metadataEnabled && factory != null) {
                    Set tokensStr = localRow.getSet("tokens", String.class);
                    if (!tokensStr.isEmpty()) {
                        Set tokens = toTokens(factory, tokensStr);
                        tokenMap.put(host, tokens);
                    }
                }
            }
        }

        List foundHosts = new ArrayList();
        List dcs = new ArrayList();
        List racks = new ArrayList();
        List cassandraVersions = new ArrayList();
        List broadcastAddresses = new ArrayList();
        List listenAddresses = new ArrayList();
        List> allTokens = new ArrayList>();
        List dseVersions = new ArrayList();
        List dseGraphEnabled = new ArrayList();
        List dseWorkloads = new ArrayList();

        for (Row row : peersFuture.get()) {
            if (!isValidPeer(row, logInvalidPeers))
                continue;

            InetSocketAddress rpcAddress = rpcAddressForPeerHost(row, connection.address, cluster);
            if (rpcAddress == null)
                continue;
            foundHosts.add(rpcAddress);
            dcs.add(row.getString("data_center"));
            racks.add(row.getString("rack"));
            cassandraVersions.add(row.getString("release_version"));
            broadcastAddresses.add(row.getInet("peer"));
            if (metadataEnabled && factory != null) {
                Set tokensStr = row.getSet("tokens", String.class);
                Set tokens = null;
                if (!tokensStr.isEmpty()) {
                    tokens = toTokens(factory, tokensStr);
                }
                allTokens.add(tokens);
            }
            InetAddress listenAddress = row.getColumnDefinitions().contains("listen_address") ? row.getInet("listen_address") : null;
            listenAddresses.add(listenAddress);
            String dseWorkload = row.getColumnDefinitions().contains("workload") ? row.getString("workload") : null;
            dseWorkloads.add(dseWorkload);
            Boolean isDseGraph = row.getColumnDefinitions().contains("graph") ? row.getBool("graph") : null;
            dseGraphEnabled.add(isDseGraph);
            String dseVersion = row.getColumnDefinitions().contains("dse_version") ? row.getString("dse_version") : null;
            dseVersions.add(dseVersion);
        }

        for (int i = 0; i < foundHosts.size(); i++) {
            Host host = cluster.metadata.getHost(foundHosts.get(i));
            boolean isNew = false;
            if (host == null) {
                // We don't know that node, create the Host object but wait until we've set the known
                // info before signaling the addition.
                Host newHost = cluster.metadata.newHost(foundHosts.get(i));
                Host previous = cluster.metadata.addIfAbsent(newHost);
                if (previous == null) {
                    host = newHost;
                    isNew = true;
                } else {
                    host = previous;
                    isNew = false;
                }
            }
            if (dcs.get(i) != null || racks.get(i) != null)
                updateLocationInfo(host, dcs.get(i), racks.get(i), isInitialConnection, cluster);
            if (cassandraVersions.get(i) != null)
                host.setVersion(cassandraVersions.get(i));
            if (broadcastAddresses.get(i) != null)
                host.setBroadcastAddress(broadcastAddresses.get(i));
            if (listenAddresses.get(i) != null)
                host.setListenAddress(listenAddresses.get(i));

            if (dseVersions.get(i) != null)
                host.setDseVersion(dseVersions.get(i));
            if (dseWorkloads.get(i) != null)
                host.setDseWorkload(dseWorkloads.get(i));
            if (dseGraphEnabled.get(i) != null)
                host.setDseGraphEnabled(dseGraphEnabled.get(i));

            if (metadataEnabled && factory != null && allTokens.get(i) != null)
                tokenMap.put(host, allTokens.get(i));

            if (isNew && !isInitialConnection)
                cluster.triggerOnAdd(host);
        }

        // Removes all those that seems to have been removed (since we lost the control connection)
        Set foundHostsSet = new HashSet(foundHosts);
        for (Host host : cluster.metadata.allHosts())
            if (!host.getSocketAddress().equals(connection.address) && !foundHostsSet.contains(host.getSocketAddress()))
                cluster.removeHost(host, isInitialConnection);

        if (metadataEnabled && factory != null && !tokenMap.isEmpty())
            cluster.metadata.rebuildTokenMap(factory, tokenMap);

    }

    /**
     * Refresh the partition map for all tables.
     *
     * @param connection the connection object
     * @param cluster the cluster manager
     * @throws ConnectionException
     * @throws BusyConnectionException
     * @throws ExecutionException
     * @throws InterruptedException
     */
    private static void refreshPartitionMap(Connection connection, Cluster.Manager cluster)
            throws ConnectionException, BusyConnectionException, ExecutionException, InterruptedException {


        boolean partitionMetadataEnabled = cluster.configuration.getQueryOptions().isMetadataEnabled() &&
            cluster.requiresPartitionMap();

        DefaultResultSetFuture partitionsFuture =
            new DefaultResultSetFuture(null, cluster.protocolVersion(), new Requests.Query(SELECT_PARTITIONS));
        connection.write(partitionsFuture);

        // Refresh partition metadata (table-specific partition splits).
        if (partitionMetadataEnabled) {
            Map tableSplits = new HashMap();

            // Prepare the host map to look up host by the inet address.
            Map hostMap = new HashMap();
            for (Host host : cluster.metadata.allHosts()) {
                hostMap.put(host.getAddress(), host);
            }

            for (Row row : partitionsFuture.get()) {
                QualifiedTableName tableId = new QualifiedTableName(row.getString("keyspace_name"),
                                                                    row.getString("table_name"));

                TableSplitMetadata tableSplitMetadata = tableSplits.get(tableId);
                if (tableSplitMetadata == null) {
                    tableSplitMetadata = new TableSplitMetadata();
                    tableSplits.put(tableId, tableSplitMetadata);
                }

                Map replicaAddresses = row.getMap("replica_addresses",
                                                                       InetAddress.class,
                                                                       String.class);

                List hosts = new Vector();
                for (Map.Entry entry : replicaAddresses.entrySet()) {
                    Host host = hostMap.get(entry.getKey());
                    if (host == null) {
                        // Ignore tables in system keyspaces because they are hosted in master not tserver.
                        if (!KeyspaceMetadata.isSystem(tableId.getKeyspaceName())) {
                            logger.debug("Host " + entry.getKey() + " not found in cluster metadata for table " +
                                tableId.toString());
                        }
                        continue;
                    }
                    // Put the leader at the beginning and the rest after.
                    String role = entry.getValue();
                    if (role.equals("LEADER")) {
                        hosts.add(0, host);
                    } else if (role.equals("FOLLOWER") || role.equals("READ_REPLICA")) {
                        hosts.add(host);
                    }
                }
                int startKey = getKey(row.getBytes("start_key"));
                int endKey = getKey(row.getBytes("end_key"));
                tableSplitMetadata.getPartitionMap().put(startKey, new PartitionMetadata(startKey, endKey, hosts));
            }

            // Set the new partition map in the cluster metadata.
            cluster.metadata.setTableSplits(tableSplits);
        }
    }

    /**
     * Extracts an unsigned 16-bit number from a {@code ByteBuffer}.
     */
    private static int getKey(ByteBuffer bb) {
        int key = (bb.remaining() == 0) ? 0 : bb.getShort();
        // Flip the negative values back to positive.
        return (key >= 0) ? key : key + 0x10000;
    }

    private static Set toTokens(Token.Factory factory, Set tokensStr) {
        Set tokens = new LinkedHashSet(tokensStr.size());
        for (String tokenStr : tokensStr) {
            tokens.add(factory.fromString(tokenStr));
        }
        return tokens;
    }

    private static boolean isValidPeer(Row peerRow, boolean logIfInvalid) {
        boolean isValid = peerRow.getColumnDefinitions().contains("rpc_address")
                && !peerRow.isNull("rpc_address");
        if (EXTENDED_PEER_CHECK) {
            isValid &= peerRow.getColumnDefinitions().contains("host_id")
                    && !peerRow.isNull("host_id")
                    && peerRow.getColumnDefinitions().contains("data_center")
                    && !peerRow.isNull("data_center")
                    && peerRow.getColumnDefinitions().contains("rack")
                    && !peerRow.isNull("rack")
                    && peerRow.getColumnDefinitions().contains("tokens")
                    && !peerRow.isNull("tokens");
        }
        if (!isValid && logIfInvalid)
            logger.warn("Found invalid row in system.peers: {}. " +
                    "This is likely a gossip or snitch issue, this host will be ignored.", formatInvalidPeer(peerRow));
        return isValid;
    }

    // Custom formatting to avoid spamming the logs if 'tokens' is present and contains a gazillion tokens
    private static String formatInvalidPeer(Row peerRow) {
        StringBuilder sb = new StringBuilder("[peer=" + peerRow.getInet("peer"));
        formatMissingOrNullColumn(peerRow, "rpc_address", sb);
        if (EXTENDED_PEER_CHECK) {
            formatMissingOrNullColumn(peerRow, "host_id", sb);
            formatMissingOrNullColumn(peerRow, "data_center", sb);
            formatMissingOrNullColumn(peerRow, "rack", sb);
            formatMissingOrNullColumn(peerRow, "tokens", sb);
        }
        sb.append("]");
        return sb.toString();
    }

    private static void formatMissingOrNullColumn(Row peerRow, String columnName, StringBuilder sb) {
        if (!peerRow.getColumnDefinitions().contains(columnName))
            sb.append(", missing ").append(columnName);
        else if (peerRow.isNull(columnName))
            sb.append(", ").append(columnName).append("=null");
    }

    static boolean waitForSchemaAgreement(Connection connection, Cluster.Manager cluster) throws ConnectionException, BusyConnectionException, ExecutionException, InterruptedException {
        long start = System.nanoTime();
        long elapsed = 0;
        int maxSchemaAgreementWaitSeconds = cluster.configuration.getProtocolOptions().getMaxSchemaAgreementWaitSeconds();
        while (elapsed < maxSchemaAgreementWaitSeconds * 1000) {

            if (checkSchemaAgreement(connection, cluster))
                return true;

            // let's not flood the node too much
            Thread.sleep(200);

            elapsed = Cluster.timeSince(start, TimeUnit.MILLISECONDS);
        }

        return false;
    }

    private static boolean checkSchemaAgreement(Connection connection, Cluster.Manager cluster) throws InterruptedException, ExecutionException {
        DefaultResultSetFuture peersFuture = new DefaultResultSetFuture(null, cluster.protocolVersion(), new Requests.Query(SELECT_SCHEMA_PEERS));
        DefaultResultSetFuture localFuture = new DefaultResultSetFuture(null, cluster.protocolVersion(), new Requests.Query(SELECT_SCHEMA_LOCAL));
        connection.write(peersFuture);
        connection.write(localFuture);

        Set versions = new HashSet();

        Row localRow = localFuture.get().one();
        if (localRow != null && !localRow.isNull("schema_version"))
            versions.add(localRow.getUUID("schema_version"));

        for (Row row : peersFuture.get()) {

            InetSocketAddress addr = rpcAddressForPeerHost(row, connection.address, cluster);
            if (addr == null || row.isNull("schema_version"))
                continue;

            Host peer = cluster.metadata.getHost(addr);
            if (peer != null && peer.isUp())
                versions.add(row.getUUID("schema_version"));
        }
        logger.debug("Checking for schema agreement: versions are {}", versions);
        return versions.size() <= 1;
    }

    boolean checkSchemaAgreement() throws ConnectionException, BusyConnectionException, InterruptedException, ExecutionException {
        Connection connection = connectionRef.get();
        return connection != null &&
                !connection.isClosed() &&
                checkSchemaAgreement(connection, cluster);
    }

    boolean isOpen() {
        Connection c = connectionRef.get();
        return c != null && !c.isClosed();
    }

    public void onUp(Host host) {
    }

    public void onAdd(Host host) {
    }

    public void onDown(Host host) {
        onHostGone(host);
    }

    public void onRemove(Host host) {
        onHostGone(host);
    }

    private void onHostGone(Host host) {
        Connection current = connectionRef.get();

        if (current != null && current.address.equals(host.getSocketAddress())) {
            logger.debug("[Control connection] {} is down/removed and it was the control host, triggering reconnect",
                    current.address);
            if (!current.isClosed())
                current.closeAsync().force();
            backgroundReconnect(0);
        }
    }

    @Override
    public void onConnectionDefunct(Connection connection) {
        if (connection == connectionRef.get())
            backgroundReconnect(0);
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy