org.infinispan.distribution.ch.impl.SyncConsistentHashFactory Maven / Gradle / Ivy
package org.infinispan.distribution.ch.impl;
import org.infinispan.commons.hash.Hash;
import org.infinispan.commons.marshall.AbstractExternalizer;
import org.infinispan.commons.util.Util;
import org.infinispan.distribution.ch.ConsistentHashFactory;
import org.infinispan.globalstate.ScopedPersistentState;
import org.infinispan.marshall.core.Ids;
import org.infinispan.remoting.transport.Address;
import org.infinispan.util.logging.Log;
import org.infinispan.util.logging.LogFactory;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* One of the assumptions people made on consistent hashing involves thinking
* that given a particular key and same topology, it would produce the same
* consistent hash value no matter which cache it was stored in. However,
* that's not exactly the case in Infinispan.
*
* In order to the optimise the number of segments moved on join/leave,
* Infinispan uses a consistent hash that depends on the previous consistent
* hash. Given two caches, even if they contain exactly the same keyset, it's
* very easy for the consistent hash history to differ, e.g. if 2 nodes join
* you might see two separate topology change in one cache and a single
* topology change in the other. The reason for that each node has to send a
* {@link org.infinispan.topology.CacheTopologyControlCommand} for each cache
* it wants to join and Infinispan can and does batch cache topology changes.
* For example, if a rebalance is in progress, joins are queued and send in
* one go when the rebalance has finished.
*
* This {@link org.infinispan.distribution.ch.ConsistentHashFactory} implementation avois any of the issues
* mentioned and guarantees that multiple caches with the same members will
* have the same consistent hash.
*
* It has a drawback compared to {@link org.infinispan.distribution.ch.impl.DefaultConsistentHashFactory} though:
* it can potentially move a lot more segments during a rebalance than
* strictly necessary because it's not taking advantage of the optimisation
* mentioned above.
*
* @author Dan Berindei
* @since 5.2
*/
public class SyncConsistentHashFactory implements ConsistentHashFactory {
private static final Log log = LogFactory.getLog(SyncConsistentHashFactory.class);
public static final float OWNED_SEGMENTS_ALLOWED_VARIATION = 1.10f;
public static final float PRIMARY_SEGMENTS_ALLOWED_VARIATION = 1.05f;
@Override
public DefaultConsistentHash create(Hash hashFunction, int numOwners, int numSegments, List members,
Map capacityFactors) {
checkCapacityFactors(members, capacityFactors);
Builder builder = createBuilder(hashFunction, numOwners, numSegments, members, capacityFactors);
builder.populateOwners(numSegments);
builder.copyOwners();
return new DefaultConsistentHash(hashFunction, numOwners, numSegments, members, capacityFactors, builder.segmentOwners);
}
@Override
public DefaultConsistentHash fromPersistentState(ScopedPersistentState state) {
String consistentHashClass = state.getProperty("consistentHash");
if (!DefaultConsistentHash.class.getName().equals(consistentHashClass))
throw log.persistentConsistentHashMismatch(this.getClass().getName(), consistentHashClass);
return new DefaultConsistentHash(state);
}
protected Builder createBuilder(Hash hashFunction, int numOwners, int numSegments, List members, Map capacityFactors) {
return new Builder(hashFunction, numOwners, numSegments, members, capacityFactors);
}
protected void checkCapacityFactors(List members, Map capacityFactors) {
if (capacityFactors != null) {
float totalCapacity = 0;
for (Address node : members) {
Float capacityFactor = capacityFactors.get(node);
if (capacityFactor == null || capacityFactor < 0)
throw new IllegalArgumentException("Invalid capacity factor for node " + node);
totalCapacity += capacityFactor;
}
if (totalCapacity == 0)
throw new IllegalArgumentException("There must be at least one node with a non-zero capacity factor");
}
}
@Override
public DefaultConsistentHash updateMembers(DefaultConsistentHash baseCH, List newMembers,
Map actualCapacityFactors) {
checkCapacityFactors(newMembers, actualCapacityFactors);
// The ConsistentHashFactory contract says we should return the same instance if we're not making changes
boolean sameCapacityFactors = actualCapacityFactors == null ? baseCH.getCapacityFactors() == null :
actualCapacityFactors.equals(baseCH.getCapacityFactors());
if (newMembers.equals(baseCH.getMembers()) && sameCapacityFactors)
return baseCH;
int numSegments = baseCH.getNumSegments();
int numOwners = baseCH.getNumOwners();
// We assume leavers are far fewer than members, so it makes sense to check for leavers
HashSet leavers = new HashSet(baseCH.getMembers());
leavers.removeAll(newMembers);
// Create a new "balanced" CH in case we need to allocate new owners for segments with 0 owners
DefaultConsistentHash rebalancedCH = null;
// Remove leavers
List[] newSegmentOwners = new List[numSegments];
for (int i = 0; i < numSegments; i++) {
List owners = new ArrayList(baseCH.locateOwnersForSegment(i));
owners.removeAll(leavers);
if (!owners.isEmpty()) {
newSegmentOwners[i] = owners;
} else {
// this segment has 0 owners, fix it
if (rebalancedCH == null) {
rebalancedCH = create(baseCH.getHashFunction(), numOwners, numSegments, newMembers, actualCapacityFactors);
}
newSegmentOwners[i] = rebalancedCH.locateOwnersForSegment(i);
}
}
return new DefaultConsistentHash(baseCH.getHashFunction(), numOwners, numSegments, newMembers,
actualCapacityFactors, newSegmentOwners);
}
@Override
public DefaultConsistentHash rebalance(DefaultConsistentHash baseCH) {
DefaultConsistentHash rebalancedCH = create(baseCH.getHashFunction(), baseCH.getNumOwners(),
baseCH.getNumSegments(), baseCH.getMembers(), baseCH.getCapacityFactors());
// the ConsistentHashFactory contract says we should return the same instance if we're not making changes
if (rebalancedCH.equals(baseCH))
return baseCH;
return rebalancedCH;
}
@Override
public DefaultConsistentHash union(DefaultConsistentHash ch1, DefaultConsistentHash ch2) {
return ch1.union(ch2);
}
protected static class Builder {
protected final Hash hashFunction;
protected final int numOwners;
protected final Map capacityFactors;
protected final int actualNumOwners;
protected final int numSegments;
protected final List sortedMembers;
protected final int segmentSize;
protected final List[] segmentOwners;
protected final OwnershipStatistics stats;
protected boolean ignoreMaxSegments;
protected Builder(Hash hashFunction, int numOwners, int numSegments, List members,
Map capacityFactors) {
this.hashFunction = hashFunction;
this.numSegments = numSegments;
this.numOwners = numOwners;
this.actualNumOwners = Math.min(numOwners, members.size());
this.sortedMembers = sort(members, capacityFactors);
this.capacityFactors = populateCapacityFactors(capacityFactors, sortedMembers);
this.segmentSize = Util.getSegmentSize(numSegments);
this.segmentOwners = new List[numSegments];
for (int i = 0; i < numSegments; i++) {
segmentOwners[i] = new ArrayList(actualNumOwners);
}
stats = new OwnershipStatistics(members);
}
private Map populateCapacityFactors(Map capacityFactors, List sortedMembers) {
if (capacityFactors != null)
return capacityFactors;
Map realCapacityFactors = new HashMap<>();
for (Address member : sortedMembers) {
realCapacityFactors.put(member, 1.0f);
}
return realCapacityFactors;
}
protected void addOwnerNoCheck(int segment, Address owner) {
segmentOwners[segment].add(owner);
stats.incOwned(owner);
if (segmentOwners[segment].size() == 1) {
stats.incPrimaryOwned(owner);
}
}
protected float computeTotalCapacity() {
if (capacityFactors == null)
return sortedMembers.size();
float totalCapacity = 0;
for (Address member : sortedMembers) {
Float capacityFactor = capacityFactors.get(member);
totalCapacity += capacityFactor;
}
return totalCapacity;
}
protected List sort(List members, final Map capacityFactors) {
ArrayList result = new ArrayList(members);
Collections.sort(result, new Comparator() {
@Override
public int compare(Address o1, Address o2) {
// Sort descending by capacity factor and ascending by address (UUID)
int capacityComparison = capacityFactors != null ? capacityFactors.get(o1).compareTo(capacityFactors.get(o2)) : 0;
return capacityComparison != 0 ? -capacityComparison : o1.compareTo(o2);
}
});
return result;
}
protected void copyOwners() {
ignoreMaxSegments = false;
doCopyOwners();
ignoreMaxSegments = true;
doCopyOwners();
}
protected void doCopyOwners() {
// The primary owners have been already assigned (and sometimes backup owners as well).
// For each segment with not enough owners, add the owners from the previous segments.
for (int segment = 0; segment < numSegments; segment++) {
List owners = segmentOwners[segment];
int additionalOwnersSegment = nextSegment(segment);
while (canAddOwners(owners) && additionalOwnersSegment != segment) {
List additionalOwners = segmentOwners[additionalOwnersSegment];
for (Address additionalOwner : additionalOwners) {
addBackupOwner(segment, additionalOwner);
if (!canAddOwners(owners))
break;
}
additionalOwnersSegment = nextSegment(additionalOwnersSegment);
}
}
}
protected boolean canAddOwners(List owners) {
return owners.size() < actualNumOwners;
}
protected int nextSegment(int segment) {
if (segment == numSegments - 1)
return 0;
return segment + 1;
}
protected void populateOwners(int numSegments) {
int virtualNode = 0;
// Loop until we have already assigned a primary owner to each segment
do {
for (Address member : sortedMembers) {
int segment = computeSegment(member, virtualNode);
addPrimaryOwner(segment, member);
}
virtualNode++;
} while (stats.sumPrimaryOwned() < numSegments);
// If there are too few segments, some members may not have any segments at this point
// Loop until we have assigned at least one segment to each member
// or until we have assigned numOwners owners to all segments
virtualNode = 0;
boolean membersWithZeroSegments = false;
do {
for (Address member : sortedMembers) {
if (stats.getOwned(member) > 0)
continue;
membersWithZeroSegments = true;
int segment = computeSegment(member, virtualNode);
addBackupOwner(segment, member);
}
virtualNode++;
} while (membersWithZeroSegments && stats.sumOwned() < numSegments);
}
private int computeSegment(Address member, int virtualNode) {
// Add the virtual node count after applying MurmurHash on the node's hashCode
// to make up for badly spread test addresses.
int virtualNodeHash = normalizedHash(hashFunction, member.hashCode());
if (virtualNode != 0) {
virtualNodeHash = normalizedHash(hashFunction, virtualNodeHash + virtualNode);
}
return virtualNodeHash / segmentSize;
}
protected double computeExpectedSegmentsForNode(Address node, int numCopies) {
Float nodeCapacityFactor = capacityFactors.get(node);
if (nodeCapacityFactor == 0)
return 0;
double remainingCapacity = computeTotalCapacity();
double remainingCopies = numCopies * numSegments;
for (Address a : sortedMembers) {
float capacityFactor = capacityFactors.get(a);
double nodeSegments = capacityFactor / remainingCapacity * remainingCopies;
if (nodeSegments > numSegments) {
nodeSegments = numSegments;
remainingCapacity -= capacityFactor;
remainingCopies -= nodeSegments;
if (node.equals(a))
return nodeSegments;
} else {
// All the nodes from now on will have less than numSegments segments, so we can stop the iteration
if (!node.equals(a)) {
nodeSegments = nodeCapacityFactor / remainingCapacity * remainingCopies;
}
return Math.max(nodeSegments, 1);
}
}
throw new IllegalStateException("The nodes collection does not include " + node);
}
protected boolean addPrimaryOwner(int segment, Address candidate) {
List owners = segmentOwners[segment];
if (owners.isEmpty()) {
double expectedSegments = computeExpectedSegmentsForNode(candidate, 1);
long maxSegments = Math.round(Math.ceil(expectedSegments) * PRIMARY_SEGMENTS_ALLOWED_VARIATION);
if (stats.getPrimaryOwned(candidate) < maxSegments) {
addOwnerNoCheck(segment, candidate);
return true;
}
}
return false;
}
protected boolean addBackupOwner(int segment, Address candidate) {
List owners = segmentOwners[segment];
if (owners.size() < actualNumOwners && !owners.contains(candidate)) {
if (!ignoreMaxSegments) {
double expectedSegments = computeExpectedSegmentsForNode(candidate, actualNumOwners);
long maxSegments = Math.round(Math.ceil(expectedSegments) * OWNED_SEGMENTS_ALLOWED_VARIATION);
if (stats.getOwned(candidate) < maxSegments) {
addOwnerNoCheck(segment, candidate);
return true;
}
} else {
if (!capacityFactors.get(candidate).equals(0f)) {
addOwnerNoCheck(segment, candidate);
return true;
}
}
}
return false;
}
protected int normalizedHash(Hash hashFunction, int hashcode) {
return hashFunction.hash(hashcode) & Integer.MAX_VALUE;
}
}
@Override
public boolean equals(Object other) {
return other != null && other.getClass() == getClass();
}
@Override
public int hashCode() {
return -10007;
}
public static class Externalizer extends AbstractExternalizer {
@Override
public void writeObject(ObjectOutput output, SyncConsistentHashFactory chf) {
}
@Override
@SuppressWarnings("unchecked")
public SyncConsistentHashFactory readObject(ObjectInput unmarshaller) {
return new SyncConsistentHashFactory();
}
@Override
public Integer getId() {
return Ids.SYNC_CONSISTENT_HASH_FACTORY;
}
@Override
public Set> getTypeClasses() {
return Collections.>singleton(SyncConsistentHashFactory.class);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy