com.aerospike.client.cluster.Cluster Maven / Gradle / Ivy
Show all versions of aerospike-client-jdk8 Show documentation
/*
* Copyright 2012-2023 Aerospike, Inc.
*
* Portions may be licensed to Aerospike, Inc. under one or more contributor
* license agreements WHICH ARE COMPATIBLE WITH THE APACHE LICENSE, VERSION 2.0.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.aerospike.client.cluster;
import java.io.Closeable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReferenceArray;
import com.aerospike.client.AerospikeClient;
import com.aerospike.client.AerospikeException;
import com.aerospike.client.Host;
import com.aerospike.client.Log;
import com.aerospike.client.ResultCode;
import com.aerospike.client.admin.AdminCommand;
import com.aerospike.client.async.EventLoop;
import com.aerospike.client.async.EventLoopStats;
import com.aerospike.client.async.EventLoops;
import com.aerospike.client.async.EventState;
import com.aerospike.client.async.Monitor;
import com.aerospike.client.async.NettyTlsContext;
import com.aerospike.client.async.NioEventLoops;
import com.aerospike.client.cluster.Node.AsyncPool;
import com.aerospike.client.command.Buffer;
import com.aerospike.client.listener.ClusterStatsListener;
import com.aerospike.client.metrics.MetricsListener;
import com.aerospike.client.metrics.MetricsPolicy;
import com.aerospike.client.metrics.MetricsWriter;
import com.aerospike.client.policy.AuthMode;
import com.aerospike.client.policy.ClientPolicy;
import com.aerospike.client.policy.TCPKeepAlive;
import com.aerospike.client.policy.TlsPolicy;
import com.aerospike.client.util.ThreadLocalData;
import com.aerospike.client.util.Util;
public class Cluster implements Runnable, Closeable {
// Client back pointer.
public final AerospikeClient client;
// Expected cluster name.
protected final String clusterName;
// Initial host nodes specified by user.
private volatile Host[] seeds;
// All host aliases for all nodes in cluster.
// Only accessed within cluster tend thread.
protected final HashMap aliases;
// Map of active nodes in cluster.
// Only accessed within cluster tend thread.
protected final HashMap nodesMap;
// Active nodes in cluster.
private volatile Node[] nodes;
// Hints for best node for a partition
public volatile HashMap partitionMap;
// IP translations.
protected final Map ipMap;
// TLS connection policy.
public final TlsPolicy tlsPolicy;
// Netty TLS context.
public final NettyTlsContext nettyTlsContext;
// Authentication mode.
public final AuthMode authMode;
// User name in UTF-8 encoded bytes.
protected final byte[] user;
// Password in UTF-8 encoded bytes.
private byte[] password;
// Password in hashed format in bytes.
private byte[] passwordHash;
// Random node index.
private final AtomicInteger nodeIndex;
// Random partition replica index.
final AtomicInteger replicaIndex;
// Count of connections in recover queue.
private final AtomicInteger recoverCount;
// Thread-safe queue of sync connections to be recovered.
private final ConcurrentLinkedDeque recoverQueue;
// Thread pool used in synchronous batch, scan and query commands.
private final ExecutorService threadPool;
// TCP keep-alive configuration. Only used in native netty epoll library.
public final TCPKeepAlive keepAlive;
// Optional event loops for async mode.
public final EventLoops eventLoops;
// Extra event loop state for this cluster.
public final EventState[] eventState;
// Maximum socket idle to validate connections in transactions.
private final long maxSocketIdleNanosTran;
// Maximum socket idle to trim peak connections to min connections.
private final long maxSocketIdleNanosTrim;
// Minimum sync connections per node.
protected final int minConnsPerNode;
// Maximum sync connections per node.
protected final int maxConnsPerNode;
// Minimum async connections per node.
protected final int asyncMinConnsPerNode;
// Maximum async connections per node.
protected final int asyncMaxConnsPerNode;
// Sync connection pools per node.
protected final int connPoolsPerNode;
// Max errors per node per errorRateWindow.
int maxErrorRate;
// Number of tend iterations defining window for maxErrorRate.
int errorRateWindow;
// Initial connection timeout.
public final int connectTimeout;
// Login timeout.
public final int loginTimeout;
// Cluster close timeout.
public final int closeTimeout;
// Rack id.
public final int[] rackIds;
// Count of add node failures in the most recent cluster tend iteration.
private volatile int invalidNodeCount;
// Interval in milliseconds between cluster tends.
private final int tendInterval;
// Cluster tend counter
private int tendCount;
// Has cluster instance been closed.
private AtomicBoolean closed;
// Tend thread variables.
private Thread tendThread;
protected volatile boolean tendValid;
// Is threadPool shared with other client instances?
private final boolean sharedThreadPool;
// Should use "services-alternate" instead of "services" in info request?
protected final boolean useServicesAlternate;
// Request server rack ids.
final boolean rackAware;
// Verify clusterName if populated.
public final boolean validateClusterName;
// Is authentication enabled
public final boolean authEnabled;
// Does cluster support query by partition.
public boolean hasPartitionQuery;
private boolean asyncComplete;
public boolean metricsEnabled;
MetricsPolicy metricsPolicy;
private volatile MetricsListener metricsListener;
private final AtomicLong retryCount = new AtomicLong();
private final AtomicLong tranCount = new AtomicLong();
private final AtomicLong delayQueueTimeoutCount = new AtomicLong();
public Cluster(AerospikeClient client, ClientPolicy policy, Host[] hosts) {
this.client = client;
this.clusterName = policy.clusterName;
this.validateClusterName = policy.validateClusterName;
this.tlsPolicy = policy.tlsPolicy;
this.authMode = policy.authMode;
// Default TLS names when TLS enabled.
if (tlsPolicy != null) {
boolean useClusterName = clusterName != null && clusterName.length() > 0;
for (int i = 0; i < hosts.length; i++) {
Host host = hosts[i];
if (host.tlsName == null) {
String tlsName = useClusterName ? clusterName : host.name;
hosts[i] = new Host(host.name, tlsName, host.port);
}
}
}
else {
if (authMode == AuthMode.EXTERNAL || authMode == AuthMode.PKI) {
throw new AerospikeException("TLS is required for authentication mode: " + authMode);
}
}
this.seeds = hosts;
if (policy.authMode == AuthMode.PKI) {
this.authEnabled = true;
this.user = null;
}
else if (policy.user != null && policy.user.length() > 0) {
this.authEnabled = true;
this.user = Buffer.stringToUtf8(policy.user);
// Only store clear text password if external authentication is used.
if (authMode != AuthMode.INTERNAL) {
this.password = Buffer.stringToUtf8(policy.password);
}
String pass = policy.password;
if (pass == null)
{
pass = "";
}
pass = AdminCommand.hashPassword(pass);
this.passwordHash = Buffer.stringToUtf8(pass);
}
else {
this.authEnabled = false;
this.user = null;
}
if (policy.maxSocketIdle < 0) {
throw new AerospikeException("Invalid maxSocketIdle: " + policy.maxSocketIdle);
}
if (policy.maxSocketIdle == 0) {
maxSocketIdleNanosTran = 0;
maxSocketIdleNanosTrim = TimeUnit.SECONDS.toNanos(55);
}
else {
maxSocketIdleNanosTran = TimeUnit.SECONDS.toNanos(policy.maxSocketIdle);
maxSocketIdleNanosTrim = maxSocketIdleNanosTran;
}
minConnsPerNode = policy.minConnsPerNode;
maxConnsPerNode = policy.maxConnsPerNode;
if (minConnsPerNode > maxConnsPerNode) {
throw new AerospikeException("Invalid connection range: " + minConnsPerNode + " - " + maxConnsPerNode);
}
asyncMinConnsPerNode = policy.asyncMinConnsPerNode;
asyncMaxConnsPerNode = (policy.asyncMaxConnsPerNode >= 0)? policy.asyncMaxConnsPerNode : policy.maxConnsPerNode;
if (asyncMinConnsPerNode > asyncMaxConnsPerNode) {
throw new AerospikeException("Invalid async connection range: " + asyncMinConnsPerNode + " - " + asyncMaxConnsPerNode);
}
connPoolsPerNode = policy.connPoolsPerNode;
maxErrorRate = policy.maxErrorRate;
errorRateWindow = policy.errorRateWindow;
connectTimeout = policy.timeout;
loginTimeout = policy.loginTimeout;
closeTimeout = policy.closeTimeout;
tendInterval = policy.tendInterval;
ipMap = policy.ipMap;
keepAlive = policy.keepAlive;
if (policy.threadPool == null) {
threadPool = Executors.newCachedThreadPool(new ThreadDaemonFactory());
}
else {
threadPool = policy.threadPool;
}
sharedThreadPool = policy.sharedThreadPool;
useServicesAlternate = policy.useServicesAlternate;
rackAware = policy.rackAware;
if (policy.rackIds != null && policy.rackIds.size() > 0) {
List list = policy.rackIds;
int max = list.size();
rackIds = new int[max];
for (int i = 0; i < max; i++) {
rackIds[i] = list.get(i);
}
}
else {
rackIds = new int[] {policy.rackId};
}
aliases = new HashMap();
nodesMap = new HashMap();
nodes = new Node[0];
partitionMap = new HashMap();
nodeIndex = new AtomicInteger();
replicaIndex = new AtomicInteger();
recoverCount = new AtomicInteger();
recoverQueue = new ConcurrentLinkedDeque();
closed = new AtomicBoolean();
eventLoops = policy.eventLoops;
if (eventLoops != null) {
EventLoop[] loops = eventLoops.getArray();
if (asyncMaxConnsPerNode < loops.length) {
throw new AerospikeException("asyncMaxConnsPerNode " + asyncMaxConnsPerNode +
" must be >= event loop count " + loops.length);
}
eventState = new EventState[loops.length];
for (int i = 0; i < loops.length; i++) {
eventState[i] = loops[i].createState();
}
if (policy.tlsPolicy != null) {
if (eventLoops instanceof NioEventLoops) {
throw new AerospikeException("TLS not supported in direct NIO event loops");
}
if (policy.tlsPolicy.nettyContext != null) {
nettyTlsContext = policy.tlsPolicy.nettyContext;
}
else {
nettyTlsContext = new NettyTlsContext(policy.tlsPolicy);
}
}
else {
nettyTlsContext = null;
}
}
else {
eventState = null;
nettyTlsContext = null;
}
if (policy.forceSingleNode) {
// Communicate with the first seed node only.
// Do not run cluster tend thread.
try {
forceSingleNode();
}
catch (Throwable e) {
close();
throw e;
}
}
else {
initTendThread(policy.failIfNotConnected);
}
}
public void forceSingleNode() {
// Initialize tendThread, but do not start it.
tendValid = true;
tendThread = new Thread(this);
// Validate first seed.
Host seed = seeds[0];
NodeValidator nv = new NodeValidator();
Node node = null;
try {
node = nv.seedNode(this, seed, null);
}
catch (Throwable e) {
throw new AerospikeException("Seed " + seed + " failed: " + e.getMessage(), e);
}
node.createMinConnections();
// Add seed node to nodes.
HashMap nodesToAdd = new HashMap(1);
nodesToAdd.put(node.getName(), node);
addNodes(nodesToAdd);
// Initialize partitionMaps.
Peers peers = new Peers(nodes.length + 16);
node.refreshPartitions(peers);
// Set partition maps for all namespaces to point to same node.
for (Partitions partitions : partitionMap.values()) {
for (AtomicReferenceArray nodeArray : partitions.replicas) {
int max = nodeArray.length();
for (int i = 0; i < max; i++) {
nodeArray.set(i, node);
}
}
}
}
public void initTendThread(boolean failIfNotConnected) {
// Tend cluster until all nodes identified.
waitTillStabilized(failIfNotConnected);
if (Log.debugEnabled()) {
for (Host host : seeds) {
Log.debug("Add seed " + host);
}
}
// Add other nodes as seeds, if they don't already exist.
ArrayList seedsToAdd = new ArrayList(nodes.length);
for (Node node : nodes) {
Host host = node.getHost();
if (! findSeed(host)) {
seedsToAdd.add(host);
}
}
if (seedsToAdd.size() > 0) {
addSeeds(seedsToAdd.toArray(new Host[seedsToAdd.size()]));
}
// Run cluster tend thread.
tendValid = true;
tendThread = new Thread(this);
tendThread.setName("tend");
tendThread.setDaemon(true);
tendThread.start();
}
public final void addSeeds(Host[] hosts) {
// Use copy on write semantics.
Host[] seedArray = new Host[seeds.length + hosts.length];
int count = 0;
// Add existing seeds.
for (Host seed : seeds) {
seedArray[count++] = seed;
}
// Add new seeds
for (Host host : hosts) {
if (Log.debugEnabled()) {
Log.debug("Add seed " + host);
}
seedArray[count++] = host;
}
// Replace nodes with copy.
seeds = seedArray;
}
private final boolean findSeed(Host search) {
for (Host seed : seeds) {
if (seed.equals(search)) {
return true;
}
}
return false;
}
/**
* Tend the cluster until it has stabilized and return control.
* This helps avoid initial database request timeout issues when
* a large number of threads are initiated at client startup.
*/
private final void waitTillStabilized(boolean failIfNotConnected) {
// Tend now requests partition maps in same iteration as the nodes
// are added, so there is no need to call tend twice anymore.
tend(failIfNotConnected, true);
if (nodes.length == 0) {
String message = "Cluster seed(s) failed";
if (failIfNotConnected) {
throw new AerospikeException(message);
}
else {
Log.warn(message);
}
}
}
public final void run() {
while (tendValid) {
// Tend cluster.
try {
tend(false, false);
}
catch (Throwable e) {
if (Log.warnEnabled()) {
Log.warn("Cluster tend failed: " + Util.getErrorMessage(e));
}
}
// Sleep between polling intervals.
Util.sleep(tendInterval);
}
}
/**
* Check health of all nodes in the cluster.
*/
private final void tend(boolean failIfNotConnected, boolean isInit) {
// All node additions/deletions are performed in tend thread.
// Initialize tend iteration node statistics.
Peers peers = new Peers(nodes.length + 16);
// Clear node reference counts.
for (Node node : nodes) {
node.referenceCount = 0;
node.partitionChanged = false;
node.rebalanceChanged = false;
}
// If active nodes don't exist, seed cluster.
if (nodes.length == 0) {
seedNode(peers, failIfNotConnected);
// Abort cluster init if all peers of the seed are not reachable and failIfNotConnected is true.
if (isInit && failIfNotConnected && nodes.length == 1 && peers.getInvalidCount() > 0) {
peers.clusterInitError();
}
}
else {
// Refresh all known nodes.
for (Node node : nodes) {
node.refresh(peers);
}
// Refresh peers when necessary.
if (peers.genChanged) {
// Refresh peers for all nodes that responded the first time even if only one node's peers changed.
peers.refreshCount = 0;
for (Node node : nodes) {
node.refreshPeers(peers);
}
// Handle nodes changes determined from refreshes.
findNodesToRemove(peers);
// Remove nodes in a batch.
if (peers.removeList.size() > 0) {
removeNodes(peers.removeList);
}
}
// Add peer nodes to cluster.
if (peers.nodes.size() > 0) {
addNodes(peers.nodes);
refreshPeers(peers);
}
}
invalidNodeCount += peers.getInvalidCount();
// Refresh partition map when necessary.
for (Node node : nodes) {
if (node.partitionChanged) {
node.refreshPartitions(peers);
}
if (node.rebalanceChanged) {
node.refreshRacks();
}
}
tendCount++;
// Balance connections every 30 tend iterations.
if (tendCount % 30 == 0) {
for (Node node : nodes) {
node.balanceConnections();
}
if (eventState != null) {
for (EventState es : eventState) {
final EventLoop eventLoop = es.eventLoop;
eventLoop.execute(new Runnable() {
public void run() {
try {
final Node[] nodeArray = nodes;
for (Node node : nodeArray) {
node.balanceAsyncConnections(eventLoop);
}
}
catch (Throwable e) {
if (Log.warnEnabled()) {
Log.warn("balanceAsyncConnections failed: " + Util.getErrorMessage(e));
}
}
}
});
}
}
}
// Reset connection error window for all nodes every connErrorWindow tend iterations.
if (maxErrorRate > 0 && tendCount % errorRateWindow == 0) {
for (Node node : nodes) {
node.resetErrorRate();
}
}
if (metricsEnabled && (tendCount % metricsPolicy.interval) == 0) {
metricsListener.onSnapshot(this);
}
processRecoverQueue();
}
private final boolean seedNode(Peers peers, boolean failIfNotConnected) {
// Must copy array reference for copy on write semantics to work.
Host[] seedArray = seeds;
Throwable[] exceptions = null;
NodeValidator nv = new NodeValidator();
for (int i = 0; i < seedArray.length; i++) {
Host seed = seedArray[i];
try {
Node node = nv.seedNode(this, seed, peers);
if (node != null) {
addSeedAndPeers(node, peers);
return true;
}
}
catch (Throwable e) {
peers.fail(seed);
if (seed.tlsName != null && tlsPolicy == null) {
// Fail immediately for known configuration errors like this.
throw new AerospikeException.Connection("Seed host tlsName '" + seed.tlsName +
"' defined but client tlsPolicy not enabled", e);
}
// Store exception and try next seed.
if (failIfNotConnected) {
if (exceptions == null) {
exceptions = new Exception[seedArray.length];
}
exceptions[i] = e;
}
else {
if (Log.warnEnabled()) {
Log.warn("Seed " + seed + " failed: " + Util.getErrorMessage(e));
}
}
}
}
// No seeds valid. Use fallback node if it exists.
if (nv.fallback != null) {
// When a fallback is used, peers refreshCount is reset to zero.
// refreshCount should always be one at this point.
peers.refreshCount = 1;
addSeedAndPeers(nv.fallback, peers);
return true;
}
if (failIfNotConnected) {
StringBuilder sb = new StringBuilder(500);
sb.append("Failed to connect to ["+ seedArray.length +"] host(s): ");
sb.append(System.lineSeparator());
for (int i = 0; i < seedArray.length; i++) {
sb.append(seedArray[i]);
sb.append(' ');
Throwable ex = exceptions == null ? null : exceptions[i];
if (ex != null) {
sb.append(ex.getMessage());
sb.append(System.lineSeparator());
}
}
throw new AerospikeException.Connection(sb.toString());
}
return false;
}
private void addSeedAndPeers(Node seed, Peers peers) {
seed.createMinConnections();
nodesMap.clear();
addNodes(seed, peers);
if (peers.nodes.size() > 0) {
refreshPeers(peers);
}
}
private void refreshPeers(Peers peers) {
// Iterate until peers have been refreshed and all new peers added.
while (true) {
// Copy peer node references to array.
Node[] nodeArray = new Node[peers.nodes.size()];
int count = 0;
for (Node node : peers.nodes.values()) {
nodeArray[count++] = node;
}
// Reset peer nodes.
peers.nodes.clear();
// Refresh peers of peers in order retrieve the node's peersCount
// which is used in RefreshPartitions(). This call might add even
// more peers.
for (Node node : nodeArray) {
node.refreshPeers(peers);
}
if (peers.nodes.size() > 0) {
// Add new peer nodes to cluster.
addNodes(peers.nodes);
}
else {
break;
}
}
}
protected Node createNode(NodeValidator nv) {
Node node = new Node(this, nv);
node.createMinConnections();
return node;
}
private final void findNodesToRemove(Peers peers) {
int refreshCount = peers.refreshCount;
ArrayList removeList = peers.removeList;
for (Node node : nodes) {
if (! node.isActive()) {
// Inactive nodes must be removed.
removeList.add(node);
continue;
}
if (refreshCount == 0 && node.failures >= 5) {
// All node info requests failed and this node had 5 consecutive failures.
// Remove node. If no nodes are left, seeds will be tried in next cluster
// tend iteration.
removeList.add(node);
continue;
}
if (nodes.length > 1 && refreshCount >= 1 && node.referenceCount == 0) {
// Node is not referenced by other nodes.
// Check if node responded to info request.
if (node.failures == 0) {
// Node is alive, but not referenced by other nodes. Check if mapped.
if (! findNodeInPartitionMap(node)) {
// Node doesn't have any partitions mapped to it.
// There is no point in keeping it in the cluster.
removeList.add(node);
}
}
else {
// Node not responding. Remove it.
removeList.add(node);
}
}
}
}
private final boolean findNodeInPartitionMap(Node filter) {
for (Partitions partitions : partitionMap.values()) {
for (AtomicReferenceArray nodeArray : partitions.replicas) {
int max = nodeArray.length();
for (int i = 0; i < max; i++) {
Node node = nodeArray.get(i);
// Use reference equality for performance.
if (node == filter) {
return true;
}
}
}
}
return false;
}
private void addNodes(Node seed, Peers peers) {
// Add all nodes at once to avoid copying entire array multiple times.
// Create temporary nodes array.
Node[] nodeArray = new Node[peers.nodes.size() + 1];
int count = 0;
// Add seed.
nodeArray[count++] = seed;
addNode(seed);
// Add peers.
for (Node peer : peers.nodes.values()) {
nodeArray[count++] = peer;
addNode(peer);
}
hasPartitionQuery = Cluster.supportsPartitionQuery(nodeArray);
// Replace nodes with copy.
nodes = nodeArray;
}
/**
* Add nodes using copy on write semantics.
*/
private final void addNodes(HashMap nodesToAdd) {
// Add all nodes at once to avoid copying entire array multiple times.
// Create temporary nodes array.
Node[] nodeArray = new Node[nodes.length + nodesToAdd.size()];
int count = 0;
// Add existing nodes.
for (Node node : nodes) {
nodeArray[count++] = node;
}
// Add new nodes.
for (Node node : nodesToAdd.values()) {
nodeArray[count++] = node;
addNode(node);
}
hasPartitionQuery = Cluster.supportsPartitionQuery(nodeArray);
// Replace nodes with copy.
nodes = nodeArray;
}
private final void addNode(Node node) {
if (Log.infoEnabled()) {
Log.info("Add node " + node);
}
nodesMap.put(node.getName(), node);
// Add node's aliases to global alias set.
// Aliases are only used in tend thread, so synchronization is not necessary.
for (Host alias : node.aliases) {
aliases.put(alias, node);
}
}
private final void removeNodes(List nodesToRemove) {
// There is no need to delete nodes from partitionWriteMap because the nodes
// have already been set to inactive. Further connection requests will result
// in an exception and a different node will be tried.
// Cleanup node resources.
for (Node node : nodesToRemove) {
// Remove node from map.
nodesMap.remove(node.getName());
// Remove node's aliases from cluster alias set.
// Aliases are only used in tend thread, so synchronization is not necessary.
for (Host alias : node.aliases) {
// Log.debug("Remove alias " + alias);
aliases.remove(alias);
}
if (metricsEnabled) {
// Flush node metrics before removal.
try {
metricsListener.onNodeClose(node);
}
catch (Throwable e) {
Log.warn("Write metrics failed on " + node + ": " + Util.getErrorMessage(e));
}
}
node.close();
}
// Remove all nodes at once to avoid copying entire array multiple times.
removeNodesCopy(nodesToRemove);
}
/**
* Remove nodes using copy on write semantics.
*/
private final void removeNodesCopy(List nodesToRemove) {
// Create temporary nodes array.
// Since nodes are only marked for deletion using node references in the nodes array,
// and the tend thread is the only thread modifying nodes, we are guaranteed that nodes
// in nodesToRemove exist. Therefore, we know the final array size.
Node[] nodeArray = new Node[nodes.length - nodesToRemove.size()];
int count = 0;
// Add nodes that are not in remove list.
for (Node node : nodes) {
if (findNode(node, nodesToRemove)) {
if (Log.infoEnabled()) {
Log.info("Remove node " + node);
}
}
else {
nodeArray[count++] = node;
}
}
// Do sanity check to make sure assumptions are correct.
if (count < nodeArray.length) {
if (Log.warnEnabled()) {
Log.warn("Node remove mismatch. Expected " + nodeArray.length + " Received " + count);
}
// Resize array.
Node[] nodeArray2 = new Node[count];
System.arraycopy(nodeArray, 0, nodeArray2, 0, count);
nodeArray = nodeArray2;
}
hasPartitionQuery = Cluster.supportsPartitionQuery(nodeArray);
// Replace nodes with copy.
nodes = nodeArray;
}
private final static boolean findNode(Node search, List nodeList) {
for (Node node : nodeList) {
if (node.equals(search)) {
return true;
}
}
return false;
}
public final boolean isConnected() {
// Must copy array reference for copy on write semantics to work.
Node[] nodeArray = nodes;
if (nodeArray.length > 0 && tendValid) {
// Even though nodes exist, they may not be currently responding. Check further.
for (Node node : nodeArray) {
// Mark connected if any node is active and cluster tend consecutive info request
// failures are less than 5.
if (node.active && node.failures < 5) {
return true;
}
}
}
return false;
}
public final Node getRandomNode() throws AerospikeException.InvalidNode {
// Must copy array reference for copy on write semantics to work.
Node[] nodeArray = nodes;
if (nodeArray.length > 0) {
int index = Math.abs(nodeIndex.getAndIncrement() % nodeArray.length);
for (int i = 0; i < nodeArray.length; i++) {
Node node = nodeArray[index];
if (node.isActive()) {
return node;
}
index++;
index %= nodeArray.length;
}
}
throw new AerospikeException.InvalidNode("Cluster is empty");
}
public final Node[] getNodes() {
// Must copy array reference for copy on write semantics to work.
Node[] nodeArray = nodes;
return nodeArray;
}
public final Node[] validateNodes() {
// Must copy array reference for copy on write semantics to work.
Node[] nodeArray = nodes;
if (nodeArray.length == 0) {
throw new AerospikeException(ResultCode.SERVER_NOT_AVAILABLE, "Cluster is empty");
}
return nodeArray;
}
public final Node getNode(String nodeName) throws AerospikeException.InvalidNode {
Node node = findNode(nodeName);
if (node == null) {
throw new AerospikeException.InvalidNode("Invalid node name: " + nodeName);
}
return node;
}
protected final Node findNode(String nodeName) {
// Must copy array reference for copy on write semantics to work.
Node[] nodeArray = nodes;
for (Node node : nodeArray) {
if (node.getName().equals(nodeName)) {
return node;
}
}
return null;
}
public final boolean isConnCurrentTran(long lastUsed) {
return maxSocketIdleNanosTran == 0 || (System.nanoTime() - lastUsed) <= maxSocketIdleNanosTran;
}
public final boolean isConnCurrentTrim(long lastUsed) {
return (System.nanoTime() - lastUsed) <= maxSocketIdleNanosTrim;
}
public final void recoverConnection(ConnectionRecover cs) {
// Many cloud providers encounter performance problems when sockets are
// closed by the client when the server still has data left to write.
// The solution is to shutdown the socket and give the server time to
// respond before closing the socket.
//
// Put connection on a queue for later closing.
if (cs.isComplete()) {
return;
}
// Do not let queue get out of control.
if (recoverCount.getAndIncrement() < 10000) {
recoverQueue.offerLast(cs);
}
else {
recoverCount.getAndDecrement();
cs.abort();
}
}
private void processRecoverQueue() {
byte[] buf = ThreadLocalData.getBuffer();
ConnectionRecover last = recoverQueue.peekLast();
ConnectionRecover cs;
while ((cs = recoverQueue.pollFirst()) != null) {
if (cs.drain(buf)) {
recoverCount.getAndDecrement();
}
else {
recoverQueue.offerLast(cs);
}
if (cs == last) {
break;
}
}
}
public final void enableMetrics(MetricsPolicy policy) {
if (metricsEnabled) {
this.metricsListener.onDisable(this);
}
MetricsListener listener = policy.listener;
if (listener == null) {
listener = new MetricsWriter(policy.reportDir);
}
this.metricsListener = listener;
this.metricsPolicy = policy;
Node[] nodeArray = nodes;
for (Node node : nodeArray) {
node.enableMetrics(policy);
}
listener.onEnable(this, policy);
metricsEnabled = true;
}
public final void disableMetrics() {
if (metricsEnabled) {
metricsEnabled = false;
metricsListener.onDisable(this);
}
}
public EventLoop[] getEventLoopArray() {
if (eventLoops == null) {
return null;
}
return eventLoops.getArray();
}
public final ClusterStats getStats() {
// Get sync statistics.
final Node[] nodeArray = nodes;
NodeStats[] nodeStats = new NodeStats[nodeArray.length];
int count = 0;
for (Node node : nodeArray) {
nodeStats[count++] = new NodeStats(node);
}
// Get async statistics.
EventLoopStats[] eventLoopStats = null;
if (eventLoops != null) {
EventLoop[] eventLoopArray = eventLoops.getArray();
for (EventLoop eventLoop : eventLoopArray) {
if (eventLoop.inEventLoop()) {
// We can't block in an eventloop thread, so use non-blocking approximate statistics.
// The statistics might be less accurate because cross-thread references are made
// without a lock for the pool's queue size and other counters.
eventLoopStats = new EventLoopStats[eventLoopArray.length];
for (int i = 0; i < eventLoopArray.length; i++) {
eventLoopStats[i] = new EventLoopStats(eventLoopArray[i]);
}
for (int i = 0; i < nodeArray.length; i++) {
nodeStats[i].async = nodeArray[i].getAsyncConnectionStats();
}
return new ClusterStats(this, nodeStats, eventLoopStats);
}
}
final EventLoopStats[] loopStats = new EventLoopStats[eventLoopArray.length];
final ConnectionStats[][] connStats = new ConnectionStats[nodeArray.length][eventLoopArray.length];
final AtomicInteger eventLoopCount = new AtomicInteger(eventLoopArray.length);
final Monitor monitor = new Monitor();
for (EventLoop eventLoop : eventLoopArray) {
eventLoop.execute(new Runnable() {
public void run() {
int index = eventLoop.getIndex();
loopStats[index] = new EventLoopStats(eventLoop);
for (int i = 0; i < nodeArray.length; i++) {
AsyncPool pool = nodeArray[i].getAsyncPool(index);
int inPool = pool.queue.size();
connStats[i][index] = new ConnectionStats(pool.total - inPool, inPool, pool.opened, pool.closed);
}
if (eventLoopCount.decrementAndGet() == 0) {
monitor.notifyComplete();
}
}
});
}
// Not running in eventloop thread, so it's ok to wait for results.
monitor.waitTillComplete();
eventLoopStats = loopStats;
for (int i = 0; i < nodeArray.length; i++) {
int inUse = 0;
int inPool = 0;
int opened = 0;
int closed = 0;
for (EventLoop eventLoop : eventLoopArray) {
ConnectionStats cs = connStats[i][eventLoop.getIndex()];
inUse += cs.inUse;
inPool += cs.inPool;
opened += cs.opened;
closed += cs.closed;
}
nodeStats[i].async = new ConnectionStats(inUse, inPool, opened, closed);
}
}
return new ClusterStats(this, nodeStats, eventLoopStats);
}
public final void getStats(ClusterStatsListener listener) {
try {
// Get sync statistics.
final Node[] nodeArray = nodes;
NodeStats[] nodeStats = new NodeStats[nodeArray.length];
int count = 0;
for (Node node : nodeArray) {
nodeStats[count++] = new NodeStats(node);
}
if (eventLoops == null) {
try {
listener.onSuccess(new ClusterStats(this, nodeStats, null));
}
catch (Throwable e) {
}
return;
}
// Get async statistics.
EventLoop[] eventLoopArray = eventLoops.getArray();
final EventLoopStats[] loopStats = new EventLoopStats[eventLoopArray.length];
final ConnectionStats[][] connStats = new ConnectionStats[nodeArray.length][eventLoopArray.length];
final AtomicInteger eventLoopCount = new AtomicInteger(eventLoopArray.length);
final Cluster cluster = this;
for (EventLoop eventLoop : eventLoopArray) {
Runnable fetch = new Runnable() {
public void run() {
int index = eventLoop.getIndex();
loopStats[index] = new EventLoopStats(eventLoop);
for (int i = 0; i < nodeArray.length; i++) {
AsyncPool pool = nodeArray[i].getAsyncPool(index);
int inPool = pool.queue.size();
connStats[i][index] = new ConnectionStats(pool.total - inPool, inPool, pool.opened, pool.closed);
}
if (eventLoopCount.decrementAndGet() == 0) {
// All eventloops reported. Pass results to listener.
for (int i = 0; i < nodeArray.length; i++) {
int inUse = 0;
int inPool = 0;
int opened = 0;
int closed = 0;
for (EventLoop eventLoop : eventLoopArray) {
ConnectionStats cs = connStats[i][eventLoop.getIndex()];
inUse += cs.inUse;
inPool += cs.inPool;
opened += cs.opened;
closed += cs.closed;
}
nodeStats[i].async = new ConnectionStats(inUse, inPool, opened, closed);
}
try {
listener.onSuccess(new ClusterStats(cluster, nodeStats, loopStats));
}
catch (Throwable e) {
}
}
}
};
if (eventLoop.inEventLoop()) {
fetch.run();
}
else {
eventLoop.execute(fetch);
}
}
}
catch (AerospikeException ae) {
listener.onFailure(ae);
}
catch (Throwable e) {
listener.onFailure(new AerospikeException(e));
}
}
public final void interruptTendSleep() {
// Interrupt tendThread's sleep(), so node refreshes will be performed sooner.
tendThread.interrupt();
}
public final void printPartitionMap() {
for (Entry entry : partitionMap.entrySet()) {
String namespace = entry.getKey();
Partitions partitions = entry.getValue();
AtomicReferenceArray[] replicas = partitions.replicas;
for (int i = 0; i < replicas.length; i++) {
AtomicReferenceArray nodeArray = replicas[i];
int max = nodeArray.length();
for (int j = 0; j < max; j++) {
Node node = nodeArray.get(j);
if (node != null) {
Log.info(namespace + ',' + i + ',' + j + ',' + node);
}
}
}
}
}
public void changePassword(byte[] user, byte[] password, byte[] passwordHash) {
if (this.user != null && Arrays.equals(user, this.user)) {
this.passwordHash = passwordHash;
// Only store clear text password if external authentication is used.
if (authMode != AuthMode.INTERNAL) {
this.password = password;
}
}
}
/**
* Set max errors allowed within configurable window for all nodes.
* For performance reasons, maxErrorRate is not declared volatile,
* so we are relying on cache coherency for other threads to
* recognize this change.
*/
public final void setMaxErrorRate(int rate) {
this.maxErrorRate = rate;
}
/**
* The number of cluster tend iterations that defines the window for maxErrorRate.
* For performance reasons, errorRateWindow is not declared volatile,
* so we are relying on cache coherency for other threads to
* recognize this change.
*/
public final void setErrorRateWindow(int window) {
this.errorRateWindow = window;
}
private static boolean supportsPartitionQuery(Node[] nodes) {
if (nodes.length == 0) {
return false;
}
for (Node node : nodes) {
if (! node.hasPartitionQuery()) {
return false;
}
}
return true;
}
public final ExecutorService getThreadPool() {
return threadPool;
}
/**
* Return cluster name.
*/
public String getClusterName() {
return clusterName;
}
public boolean validateClusterName() {
return validateClusterName && clusterName != null && clusterName.length() > 0;
}
public final byte[] getUser() {
return user;
}
public final byte[] getPassword() {
return password;
}
public final byte[] getPasswordHash() {
return passwordHash;
}
public final boolean isActive() {
return tendValid;
}
/**
* Increment transaction count when metrics are enabled.
*/
public final void addTran() {
if (metricsEnabled) {
tranCount.getAndIncrement();
}
}
/**
* Return transaction count. The value is cumulative and not reset per metrics interval.
*/
public final long getTranCount() {
return tranCount.get();
}
/**
* Increment transaction retry count. There can be multiple retries for a single transaction.
*/
public final void addRetry() {
retryCount.getAndIncrement();
}
/**
* Add transaction retry count. There can be multiple retries for a single transaction.
*/
public final void addRetries(int count) {
retryCount.getAndAdd(count);
}
/**
* Return transaction retry count. The value is cumulative and not reset per metrics interval.
*/
public final long getRetryCount() {
return retryCount.get();
}
/**
* Increment async delay queue timeout count.
*/
public final void addDelayQueueTimeout() {
delayQueueTimeoutCount.getAndIncrement();
}
/**
* Increment async delay queue timeout count.
*/
public final long getDelayQueueTimeoutCount() {
return delayQueueTimeoutCount.get();
}
/**
* Return thread pool active thread count. This thread pool is used in sync batch/query/scan
* commands.
*/
public final int getThreadsInUse() {
if (threadPool instanceof ThreadPoolExecutor) {
ThreadPoolExecutor tpe = (ThreadPoolExecutor)threadPool;
return tpe.getActiveCount();
}
return 0;
}
/**
* Return connection recoverQueue size. The queue contains connections that have timed out and
* need to be drained before returning the connection to a connection pool. The recoverQueue
* is only used when {@link com.aerospike.client.policy.Policy#timeoutDelay} is true.
*
* Since recoverQueue is a linked list where the size() calculation is expensive, a separate
* counter is used to track recoverQueue.size().
*/
public final int getRecoverQueueSize() {
return recoverCount.get();
}
/**
* Return count of add node failures in the most recent cluster tend iteration.
*/
public final int getInvalidNodeCount() {
return invalidNodeCount;
}
public void close() {
if (! closed.compareAndSet(false, true)) {
// close() has already been called.
return;
}
// Stop cluster tend thread.
tendValid = false;
tendThread.interrupt();
if (! sharedThreadPool) {
// Shutdown synchronous thread pool.
threadPool.shutdown();
}
try {
disableMetrics();
}
catch (Throwable e) {
Log.warn("DisableMetrics failed: " + Util.getErrorMessage(e));
}
if (eventLoops == null) {
// Close synchronous node connections.
Node[] nodeArray = nodes;
for (Node node : nodeArray) {
node.closeSyncConnections();
}
}
else {
// Send cluster close notification to async event loops.
final long deadline = (closeTimeout > 0)? System.nanoTime() + TimeUnit.MILLISECONDS.toNanos(closeTimeout) : 0L;
final AtomicInteger eventLoopCount = new AtomicInteger(eventState.length);
final AtomicBoolean closedWithPending = new AtomicBoolean();
boolean inEventLoop = false;
// Send close node notification to async event loops.
for (final EventState state : eventState) {
if (state.eventLoop.inEventLoop()) {
inEventLoop = true;
}
state.eventLoop.execute(new Runnable() {
public void run() {
if (state.closed) {
// Cluster's event loop connections are already closed.
return;
}
if (state.pending > 0) {
// Cluster has pending commands.
if (closeTimeout >= 0 && (closeTimeout == 0 || deadline - System.nanoTime() > 0)) {
// Check again in 200ms.
state.eventLoop.schedule(this, 200, TimeUnit.MILLISECONDS);
return;
}
closedWithPending.set(true);
}
// Cluster's event loop connections can now be closed.
closeEventLoop(eventLoopCount, state);
}
});
}
// Deadlock would occur if we wait from an event loop thread.
// Only wait when not in event loop thread.
if (! inEventLoop) {
waitAsyncComplete();
}
if (closedWithPending.get()) {
Log.warn("Cluster closed with pending async commands");
}
}
}
/**
* Wait until all event loops have finished processing pending cluster commands.
* Must be called from an event loop thread.
*/
private final void closeEventLoop(AtomicInteger eventLoopCount, EventState state) {
// Prevent future cluster commands on this event loop.
state.closed = true;
// Close asynchronous node connections for single event loop.
Node[] nodeArray = nodes;
for (Node node : nodeArray) {
node.closeAsyncConnections(state.index);
}
if (eventLoopCount.decrementAndGet() == 0) {
// All event loops have reported.
// Close synchronous node connections.
for (Node node : nodeArray) {
node.closeSyncConnections();
}
notifyAsyncComplete();
}
}
private synchronized void waitAsyncComplete() {
while (! asyncComplete) {
try {
super.wait();
}
catch (InterruptedException ie) {
}
}
}
private synchronized void notifyAsyncComplete() {
asyncComplete = true;
super.notify();
}
}