hudson.util.ConsistentHash Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hudson-core Show documentation
Contains the core Hudson code and view files to render HTML.
The newest version!
/*******************************************************************************
 *
 * Copyright (c) 2004-2009, Oracle Corporation
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors: 
 *
 *   
 *        
 *
 *******************************************************************************/ 

package hudson.util;

import ch.ethz.ssh2.crypto.digest.MD5;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Collection;
import java.util.Iterator;
import java.util.NoSuchElementException;

import hudson.util.Iterators.DuplicateFilterIterator;

/**
 * Consistent hash.
 *
 * 
 * This implementation is concurrency safe; additions and removals are serialized, but look up
 * can be performed concurrently even when modifications is in progress.
 *
 * 

 * Since typical hash functions we use in {@link Object#hashCode()} isn't random enough to
 * evenly populate the 2^32 ring space, we only ask the user to give us
 * an injective function to a string,
 * and then we use MD5 to create random enough distribution.
 *
 * 

 * This consistent hash implementaiton is consistent both to the addition/removal of Ts, as well
 * as increase/decrease of the replicas.
 *
 * 

 * See http://en.wikipedia.org/wiki/Consistent_hashing for references, and
 * http://weblogs.java.net/blog/tomwhite/archive/2007/11/consistent_hash.html is probably a reasonable depiction.
 * If we trust his experiments, creating 100 replicas will reduce the stddev to 10% of the mean for 10 nodes.
 *
 * @author Kohsuke Kawaguchi
 * @since 1.302
 */
public class ConsistentHash {
    /**
     * All the items in the hash, to their replication factors.
     */
    private final Map items = new HashMap();

    private final int defaultReplication;
    private final Hash hash;

    /**
     * Used for remembering the computed MD5 hash, since it's bit expensive to do it all over again.
     */
    private static final class Point implements Comparable {
        final int hash;
        final Object item;

        private Point(int hash, Object item) {
            this.hash = hash;
            this.item = item;
        }

        public int compareTo(Point that) {
            if(this.hash
         * This is a permutation of all the nodes, where nodes with more replicas
         * are more likely to show up early on. 
         */
        Iterator list(int queryPoint) {
            final int start = index(queryPoint);
            return new DuplicateFilterIterator(new Iterator() {
                int pos=0;
                public boolean hasNext() {
                    return pos
     * By default, {@link ConsistentHash} uses {@link Object#toString()} on 'T' to
     * obtain the hash, but that behavior can be changed by providing
     * a {@link Hash} implementation.
     *
     * 

     * This hash function need not produce a very uniform distribution, as the
     * output is rehashed with MD5. But it does need to make sure it doesn't
     * produce the same value for two different 'T's (and that's why this returns
     * String, not the usual int.) 
     */
    public interface Hash {
        /**
         * @param t
         *      The object to be hashed. Never null.
         * @return
         *      The hash value.
         */
        String hash(T t);
    }

    private static final Hash DEFAULT_HASH = new Hash() {
        public String hash(Object o) {
            return o.toString();
        }
    };

    public ConsistentHash() {
        this(DEFAULT_HASH);
    }

    public ConsistentHash(int defaultReplication) {
        this(DEFAULT_HASH,defaultReplication);
    }

    public ConsistentHash(Hash hash) {
        this(hash,100);
    }

    public ConsistentHash(Hash hash, int defaultReplication) {
        this.hash = hash;
        this.defaultReplication = defaultReplication;
        this.table = new Table(); // initial empty table
    }

    public int countAllPoints() {
        int r=0;
        for (Point[] v : items.values())
            r+=v.length;
        return r;
    }

    /**
     * Adds a new node with the default number of replica.
     */
    public void add(T node) {
        add(node,defaultReplication);
    }

    /**
     * Calls {@link #add(Object)} with all the arguments.
     */
    public void addAll(T... nodes) {
        for (T node : nodes)
            add(node);
    }

    /**
     * Calls {@link #add(Object)} with all the arguments.
     */
    public void addAll(Collection nodes) {
        for (T node : nodes)
            add(node);
    }

    /**
     * Removes the node entirely. This is the same as {@code add(node,0)}
     */
    public void remove(T node) {
        add(node,0);
    }

    /**
     * Adds a new node with the given number of replica.
     *
     * 

     * This is the only function that manipulates {@link #items}.
     */
    public synchronized void add(T node, int replica) {
        if(replica==0) {
            items.remove(node);
        } else {
            Point[] points = new Point[replica];
            String seed = hash.hash(node);
            for (int i=0; i 4 bytes
        for (int i=0; i<4; i++)
            digest[i] ^= digest[i+4]+digest[i+8]+digest[i+12];
        return (b2i(digest[0])<< 24)|(b2i(digest[1])<<16)|(b2i(digest[2])<< 8)|b2i(digest[3]);
    }

    /**
     * unsigned byte->int.
     */
    private int b2i(byte b) {
        return ((int)b)&0xFF;
    }

    /**
     * Looks up a consistent hash with the given data point.
     *
     * 

     * The whole point of this class is that if the same query point is given,
     * it's likely to return the same result even when other nodes are added/removed,
     * or the # of replicas for the given node is changed.
     *
     * @return
     *      null if the consistent hash is empty. Otherwise always non-null.
     */
    public T lookup(int queryPoint) {
        return table.lookup(queryPoint);
    }

    /**
     * Takes a string, hash it with MD5, then calls {@link #lookup(int)}. 
     */
    public T lookup(String queryPoint) {
        return lookup(md5(queryPoint));
    }

    /**
     * Creates a permutation of all the nodes for the given data point.
     *
     * 

     * The returned pemutation is consistent, in the sense that small change
     * to the consitent hash (like addition/removal/change of replicas) only
     * creates a small change in the permutation.
     *
     * 
     * Nodes with more replicas are more likely to show up early in the list
     */
    public Iterable list(final int queryPoint) {
        return new Iterable() {
            public Iterator iterator() {
                return table.list(queryPoint);
            }
        };
    }

    /**
     * Takes a string, hash it with MD5, then calls {@link #list(int)}.
     */
    public Iterable list(String queryPoint) {
        return list(md5(queryPoint));
    }
}