com.datastax.driver.core.ReplicationStategy Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of dse-java-driver-core Show documentation
Show all versions of dse-java-driver-core Show documentation
A driver for DataStax Enterprise (DSE)
and Apache Cassandra 1.2+ clusters that works exclusively with the
Cassandra Query Language version 3 (CQL3) and Cassandra's binary protocol,
supporting DSE-specific features such as geospatial types, DSE Graph and DSE authentication.
/*
* Copyright DataStax, Inc.
*
* This software can be used solely with DataStax Enterprise. Please consult the license at
* http://www.datastax.com/terms/datastax-dse-driver-license-terms
*/
package com.datastax.driver.core;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
/*
* Computes the token->list association, given the token ring and token->primary token map.
*
* Note: it's not an interface mainly because we don't want to expose it.
*/
abstract class ReplicationStrategy {
private static final Logger logger = LoggerFactory.getLogger(ReplicationStrategy.class);
static ReplicationStrategy create(Map replicationOptions) {
String strategyClass = replicationOptions.get("class");
if (strategyClass == null)
return null;
try {
if (strategyClass.contains("SimpleStrategy")) {
String repFactorString = replicationOptions.get("replication_factor");
return repFactorString == null ? null : new SimpleStrategy(Integer.parseInt(repFactorString));
} else if (strategyClass.contains("NetworkTopologyStrategy")) {
Map dcRfs = new HashMap();
for (Map.Entry entry : replicationOptions.entrySet()) {
if (entry.getKey().equals("class"))
continue;
dcRfs.put(entry.getKey(), Integer.parseInt(entry.getValue()));
}
return new NetworkTopologyStrategy(dcRfs);
} else {
// We might want to support oldNetworkTopologyStrategy, though not sure anyone still using that
return null;
}
} catch (NumberFormatException e) {
// Cassandra wouldn't let that pass in the first place so this really should never happen
logger.error("Failed to parse replication options: " + replicationOptions, e);
return null;
}
}
abstract Map> computeTokenToReplicaMap(String keyspaceName, Map tokenToPrimary, List ring);
private static Token getTokenWrapping(int i, List ring) {
return ring.get(i % ring.size());
}
static class SimpleStrategy extends ReplicationStrategy {
private final int replicationFactor;
private SimpleStrategy(int replicationFactor) {
this.replicationFactor = replicationFactor;
}
@Override
Map> computeTokenToReplicaMap(String keyspaceName, Map tokenToPrimary, List ring) {
int rf = Math.min(replicationFactor, ring.size());
Map> replicaMap = new HashMap>(tokenToPrimary.size());
for (int i = 0; i < ring.size(); i++) {
// Consecutive sections of the ring can assigned to the same host
Set replicas = new LinkedHashSet();
for (int j = 0; j < ring.size() && replicas.size() < rf; j++)
replicas.add(tokenToPrimary.get(getTokenWrapping(i + j, ring)));
replicaMap.put(ring.get(i), ImmutableSet.copyOf(replicas));
}
return replicaMap;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
SimpleStrategy that = (SimpleStrategy) o;
return replicationFactor == that.replicationFactor;
}
@Override
public int hashCode() {
return replicationFactor;
}
}
static class NetworkTopologyStrategy extends ReplicationStrategy {
private static final Logger logger = LoggerFactory.getLogger(NetworkTopologyStrategy.class);
private final Map replicationFactors;
private NetworkTopologyStrategy(Map replicationFactors) {
this.replicationFactors = replicationFactors;
}
@Override
Map> computeTokenToReplicaMap(String keyspaceName, Map tokenToPrimary, List ring) {
logger.debug("Computing token to replica map for keyspace: {}.", keyspaceName);
// Track how long it takes to compute the token to replica map
long startTime = System.currentTimeMillis();
// This is essentially a copy of org.apache.cassandra.locator.NetworkTopologyStrategy
Map> racks = getRacksInDcs(tokenToPrimary.values());
Map> replicaMap = new HashMap>(tokenToPrimary.size());
Map dcHostCount = Maps.newHashMapWithExpectedSize(replicationFactors.size());
Set warnedDcs = Sets.newHashSetWithExpectedSize(replicationFactors.size());
// find maximum number of nodes in each DC
for (Host host : Sets.newHashSet(tokenToPrimary.values())) {
String dc = host.getDatacenter();
if (dcHostCount.get(dc) == null) {
dcHostCount.put(dc, 0);
}
dcHostCount.put(dc, dcHostCount.get(dc) + 1);
}
for (int i = 0; i < ring.size(); i++) {
Map> allDcReplicas = new HashMap>();
Map> seenRacks = new HashMap>();
Map> skippedDcEndpoints = new HashMap>();
for (String dc : replicationFactors.keySet()) {
allDcReplicas.put(dc, new HashSet());
seenRacks.put(dc, new HashSet());
skippedDcEndpoints.put(dc, new LinkedHashSet()); // preserve order
}
// Preserve order - primary replica will be first
Set replicas = new LinkedHashSet();
for (int j = 0; j < ring.size() && !allDone(allDcReplicas, dcHostCount); j++) {
Host h = tokenToPrimary.get(getTokenWrapping(i + j, ring));
String dc = h.getDatacenter();
if (dc == null || !allDcReplicas.containsKey(dc))
continue;
Integer rf = replicationFactors.get(dc);
Set dcReplicas = allDcReplicas.get(dc);
if (rf == null || dcReplicas.size() >= rf)
continue;
String rack = h.getRack();
// Check if we already visited all racks in dc
if (rack == null || seenRacks.get(dc).size() == racks.get(dc).size()) {
replicas.add(h);
dcReplicas.add(h);
} else {
// Is this a new rack?
if (seenRacks.get(dc).contains(rack)) {
skippedDcEndpoints.get(dc).add(h);
} else {
replicas.add(h);
dcReplicas.add(h);
seenRacks.get(dc).add(rack);
// If we've run out of distinct racks, add the nodes skipped so far
if (seenRacks.get(dc).size() == racks.get(dc).size()) {
Iterator skippedIt = skippedDcEndpoints.get(dc).iterator();
while (skippedIt.hasNext() && dcReplicas.size() < rf) {
Host nextSkipped = skippedIt.next();
replicas.add(nextSkipped);
dcReplicas.add(nextSkipped);
}
}
}
}
}
// If we haven't found enough replicas after a whole trip around the ring, this probably
// means that the replication factors are broken.
// Warn the user because that leads to quadratic performance of this method (JAVA-702).
for (Map.Entry> entry : allDcReplicas.entrySet()) {
String dcName = entry.getKey();
int expectedFactor = replicationFactors.get(dcName);
int achievedFactor = entry.getValue().size();
if (achievedFactor < expectedFactor && !warnedDcs.contains(dcName)) {
logger.warn("Error while computing token map for keyspace {} with datacenter {}: "
+ "could not achieve replication factor {} (found {} replicas only), "
+ "check your keyspace replication settings.",
keyspaceName, dcName, expectedFactor, achievedFactor);
// only warn once per DC
warnedDcs.add(dcName);
}
}
replicaMap.put(ring.get(i), ImmutableSet.copyOf(replicas));
}
long duration = System.currentTimeMillis() - startTime;
logger.debug("Token to replica map computation for keyspace {} completed in {} milliseconds",
keyspaceName, duration);
return replicaMap;
}
private boolean allDone(Map> map, Map dcHostCount) {
for (Map.Entry> entry : map.entrySet()) {
String dc = entry.getKey();
int dcCount = dcHostCount.get(dc) == null ? 0 : dcHostCount.get(dc);
if (entry.getValue().size() < Math.min(replicationFactors.get(dc), dcCount))
return false;
}
return true;
}
private Map> getRacksInDcs(Iterable hosts) {
Map> result = new HashMap>();
for (Host host : hosts) {
Set racks = result.get(host.getDatacenter());
if (racks == null) {
racks = new HashSet();
result.put(host.getDatacenter(), racks);
}
racks.add(host.getRack());
}
return result;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
NetworkTopologyStrategy that = (NetworkTopologyStrategy) o;
return replicationFactors.equals(that.replicationFactors);
}
@Override
public int hashCode() {
return replicationFactors.hashCode();
}
}
}